1// RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi apcs-gnu\ 2// RUN: -target-cpu swift -fallow-half-arguments-and-returns -ffreestanding -emit-llvm -o - %s \ 3// RUN: | opt -S -mem2reg | FileCheck %s 4 5// REQUIRES: long-tests 6 7#include <arm_neon.h> 8 9// CHECK-LABEL: define <8 x i8> @test_vaba_s8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 { 10// CHECK: [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %b, <8 x i8> %c) #4 11// CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, [[VABD_V_I_I]] 12// CHECK: ret <8 x i8> [[ADD_I]] 13int8x8_t test_vaba_s8(int8x8_t a, int8x8_t b, int8x8_t c) { 14 return vaba_s8(a, b, c); 15} 16 17// CHECK-LABEL: define <4 x i16> @test_vaba_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 { 18// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 19// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> 20// CHECK: [[VABD_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 21// CHECK: [[VABD_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 22// CHECK: [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> [[VABD_V_I_I]], <4 x i16> [[VABD_V1_I_I]]) #4 23// CHECK: [[VABD_V3_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8> 24// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I]] to <4 x i16> 25// CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[TMP2]] 26// CHECK: ret <4 x i16> [[ADD_I]] 27int16x4_t test_vaba_s16(int16x4_t a, int16x4_t b, int16x4_t c) { 28 return vaba_s16(a, b, c); 29} 30 31// CHECK-LABEL: define <2 x i32> @test_vaba_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 { 32// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 33// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> 34// CHECK: [[VABD_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 35// CHECK: [[VABD_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 36// CHECK: [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> [[VABD_V_I_I]], <2 x i32> [[VABD_V1_I_I]]) #4 37// CHECK: [[VABD_V3_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8> 38// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I]] to <2 x i32> 39// CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[TMP2]] 40// CHECK: ret <2 x i32> [[ADD_I]] 41int32x2_t test_vaba_s32(int32x2_t a, int32x2_t b, int32x2_t c) { 42 return vaba_s32(a, b, c); 43} 44 45// CHECK-LABEL: define <8 x i8> @test_vaba_u8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 { 46// CHECK: [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %b, <8 x i8> %c) #4 47// CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, [[VABD_V_I_I]] 48// CHECK: ret <8 x i8> [[ADD_I]] 49uint8x8_t test_vaba_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) { 50 return vaba_u8(a, b, c); 51} 52 53// CHECK-LABEL: define <4 x i16> @test_vaba_u16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 { 54// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 55// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> 56// CHECK: [[VABD_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 57// CHECK: [[VABD_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 58// CHECK: [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> [[VABD_V_I_I]], <4 x i16> [[VABD_V1_I_I]]) #4 59// CHECK: [[VABD_V3_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8> 60// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I]] to <4 x i16> 61// CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[TMP2]] 62// CHECK: ret <4 x i16> [[ADD_I]] 63uint16x4_t test_vaba_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) { 64 return vaba_u16(a, b, c); 65} 66 67// CHECK-LABEL: define <2 x i32> @test_vaba_u32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 { 68// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 69// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> 70// CHECK: [[VABD_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 71// CHECK: [[VABD_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 72// CHECK: [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> [[VABD_V_I_I]], <2 x i32> [[VABD_V1_I_I]]) #4 73// CHECK: [[VABD_V3_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8> 74// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I]] to <2 x i32> 75// CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[TMP2]] 76// CHECK: ret <2 x i32> [[ADD_I]] 77uint32x2_t test_vaba_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) { 78 return vaba_u32(a, b, c); 79} 80 81// CHECK-LABEL: define <16 x i8> @test_vabaq_s8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 { 82// CHECK: [[VABDQ_V_I_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %b, <16 x i8> %c) #4 83// CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, [[VABDQ_V_I_I]] 84// CHECK: ret <16 x i8> [[ADD_I]] 85int8x16_t test_vabaq_s8(int8x16_t a, int8x16_t b, int8x16_t c) { 86 return vabaq_s8(a, b, c); 87} 88 89// CHECK-LABEL: define <8 x i16> @test_vabaq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 { 90// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> 91// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %c to <16 x i8> 92// CHECK: [[VABDQ_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 93// CHECK: [[VABDQ_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 94// CHECK: [[VABDQ_V2_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> [[VABDQ_V_I_I]], <8 x i16> [[VABDQ_V1_I_I]]) #4 95// CHECK: [[VABDQ_V3_I_I:%.*]] = bitcast <8 x i16> [[VABDQ_V2_I_I]] to <16 x i8> 96// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VABDQ_V3_I_I]] to <8 x i16> 97// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[TMP2]] 98// CHECK: ret <8 x i16> [[ADD_I]] 99int16x8_t test_vabaq_s16(int16x8_t a, int16x8_t b, int16x8_t c) { 100 return vabaq_s16(a, b, c); 101} 102 103// CHECK-LABEL: define <4 x i32> @test_vabaq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #0 { 104// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> 105// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %c to <16 x i8> 106// CHECK: [[VABDQ_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 107// CHECK: [[VABDQ_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 108// CHECK: [[VABDQ_V2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> [[VABDQ_V_I_I]], <4 x i32> [[VABDQ_V1_I_I]]) #4 109// CHECK: [[VABDQ_V3_I_I:%.*]] = bitcast <4 x i32> [[VABDQ_V2_I_I]] to <16 x i8> 110// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VABDQ_V3_I_I]] to <4 x i32> 111// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[TMP2]] 112// CHECK: ret <4 x i32> [[ADD_I]] 113int32x4_t test_vabaq_s32(int32x4_t a, int32x4_t b, int32x4_t c) { 114 return vabaq_s32(a, b, c); 115} 116 117// CHECK-LABEL: define <16 x i8> @test_vabaq_u8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 { 118// CHECK: [[VABDQ_V_I_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %b, <16 x i8> %c) #4 119// CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, [[VABDQ_V_I_I]] 120// CHECK: ret <16 x i8> [[ADD_I]] 121uint8x16_t test_vabaq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) { 122 return vabaq_u8(a, b, c); 123} 124 125// CHECK-LABEL: define <8 x i16> @test_vabaq_u16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 { 126// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> 127// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %c to <16 x i8> 128// CHECK: [[VABDQ_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 129// CHECK: [[VABDQ_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 130// CHECK: [[VABDQ_V2_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> [[VABDQ_V_I_I]], <8 x i16> [[VABDQ_V1_I_I]]) #4 131// CHECK: [[VABDQ_V3_I_I:%.*]] = bitcast <8 x i16> [[VABDQ_V2_I_I]] to <16 x i8> 132// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VABDQ_V3_I_I]] to <8 x i16> 133// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[TMP2]] 134// CHECK: ret <8 x i16> [[ADD_I]] 135uint16x8_t test_vabaq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) { 136 return vabaq_u16(a, b, c); 137} 138 139// CHECK-LABEL: define <4 x i32> @test_vabaq_u32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #0 { 140// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> 141// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %c to <16 x i8> 142// CHECK: [[VABDQ_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 143// CHECK: [[VABDQ_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 144// CHECK: [[VABDQ_V2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> [[VABDQ_V_I_I]], <4 x i32> [[VABDQ_V1_I_I]]) #4 145// CHECK: [[VABDQ_V3_I_I:%.*]] = bitcast <4 x i32> [[VABDQ_V2_I_I]] to <16 x i8> 146// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VABDQ_V3_I_I]] to <4 x i32> 147// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[TMP2]] 148// CHECK: ret <4 x i32> [[ADD_I]] 149uint32x4_t test_vabaq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) { 150 return vabaq_u32(a, b, c); 151} 152 153 154// CHECK-LABEL: define <8 x i16> @test_vabal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 { 155// CHECK: [[VABD_V_I_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %b, <8 x i8> %c) #4 156// CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_V_I_I_I]] to <8 x i16> 157// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]] 158// CHECK: ret <8 x i16> [[ADD_I]] 159int16x8_t test_vabal_s8(int16x8_t a, int8x8_t b, int8x8_t c) { 160 return vabal_s8(a, b, c); 161} 162 163// CHECK-LABEL: define <4 x i32> @test_vabal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { 164// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 165// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> 166// CHECK: [[VABD_V_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 167// CHECK: [[VABD_V1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 168// CHECK: [[VABD_V2_I_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> [[VABD_V_I_I_I]], <4 x i16> [[VABD_V1_I_I_I]]) #4 169// CHECK: [[VABD_V3_I_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I_I]] to <8 x i8> 170// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I_I]] to <4 x i16> 171// CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8> 172// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16> 173// CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[TMP4]] to <4 x i32> 174// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]] 175// CHECK: ret <4 x i32> [[ADD_I]] 176int32x4_t test_vabal_s16(int32x4_t a, int16x4_t b, int16x4_t c) { 177 return vabal_s16(a, b, c); 178} 179 180// CHECK-LABEL: define <2 x i64> @test_vabal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { 181// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 182// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> 183// CHECK: [[VABD_V_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 184// CHECK: [[VABD_V1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 185// CHECK: [[VABD_V2_I_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> [[VABD_V_I_I_I]], <2 x i32> [[VABD_V1_I_I_I]]) #4 186// CHECK: [[VABD_V3_I_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I_I]] to <8 x i8> 187// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I_I]] to <2 x i32> 188// CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8> 189// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32> 190// CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[TMP4]] to <2 x i64> 191// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]] 192// CHECK: ret <2 x i64> [[ADD_I]] 193int64x2_t test_vabal_s32(int64x2_t a, int32x2_t b, int32x2_t c) { 194 return vabal_s32(a, b, c); 195} 196 197// CHECK-LABEL: define <8 x i16> @test_vabal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 { 198// CHECK: [[VABD_V_I_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %b, <8 x i8> %c) #4 199// CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_V_I_I_I]] to <8 x i16> 200// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]] 201// CHECK: ret <8 x i16> [[ADD_I]] 202uint16x8_t test_vabal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) { 203 return vabal_u8(a, b, c); 204} 205 206// CHECK-LABEL: define <4 x i32> @test_vabal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { 207// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 208// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> 209// CHECK: [[VABD_V_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 210// CHECK: [[VABD_V1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 211// CHECK: [[VABD_V2_I_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> [[VABD_V_I_I_I]], <4 x i16> [[VABD_V1_I_I_I]]) #4 212// CHECK: [[VABD_V3_I_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I_I]] to <8 x i8> 213// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I_I]] to <4 x i16> 214// CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8> 215// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16> 216// CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[TMP4]] to <4 x i32> 217// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]] 218// CHECK: ret <4 x i32> [[ADD_I]] 219uint32x4_t test_vabal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) { 220 return vabal_u16(a, b, c); 221} 222 223// CHECK-LABEL: define <2 x i64> @test_vabal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { 224// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 225// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> 226// CHECK: [[VABD_V_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 227// CHECK: [[VABD_V1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 228// CHECK: [[VABD_V2_I_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> [[VABD_V_I_I_I]], <2 x i32> [[VABD_V1_I_I_I]]) #4 229// CHECK: [[VABD_V3_I_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I_I]] to <8 x i8> 230// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I_I]] to <2 x i32> 231// CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8> 232// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32> 233// CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[TMP4]] to <2 x i64> 234// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]] 235// CHECK: ret <2 x i64> [[ADD_I]] 236uint64x2_t test_vabal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) { 237 return vabal_u32(a, b, c); 238} 239 240 241// CHECK-LABEL: define <8 x i8> @test_vabd_s8(<8 x i8> %a, <8 x i8> %b) #0 { 242// CHECK: [[VABD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %a, <8 x i8> %b) #4 243// CHECK: ret <8 x i8> [[VABD_V_I]] 244int8x8_t test_vabd_s8(int8x8_t a, int8x8_t b) { 245 return vabd_s8(a, b); 246} 247 248// CHECK-LABEL: define <4 x i16> @test_vabd_s16(<4 x i16> %a, <4 x i16> %b) #0 { 249// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 250// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 251// CHECK: [[VABD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 252// CHECK: [[VABD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 253// CHECK: [[VABD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> [[VABD_V_I]], <4 x i16> [[VABD_V1_I]]) #4 254// CHECK: [[VABD_V3_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I]] to <8 x i8> 255// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I]] to <4 x i16> 256// CHECK: ret <4 x i16> [[TMP2]] 257int16x4_t test_vabd_s16(int16x4_t a, int16x4_t b) { 258 return vabd_s16(a, b); 259} 260 261// CHECK-LABEL: define <2 x i32> @test_vabd_s32(<2 x i32> %a, <2 x i32> %b) #0 { 262// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 263// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 264// CHECK: [[VABD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 265// CHECK: [[VABD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 266// CHECK: [[VABD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> [[VABD_V_I]], <2 x i32> [[VABD_V1_I]]) #4 267// CHECK: [[VABD_V3_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I]] to <8 x i8> 268// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I]] to <2 x i32> 269// CHECK: ret <2 x i32> [[TMP2]] 270int32x2_t test_vabd_s32(int32x2_t a, int32x2_t b) { 271 return vabd_s32(a, b); 272} 273 274// CHECK-LABEL: define <8 x i8> @test_vabd_u8(<8 x i8> %a, <8 x i8> %b) #0 { 275// CHECK: [[VABD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 276// CHECK: ret <8 x i8> [[VABD_V_I]] 277uint8x8_t test_vabd_u8(uint8x8_t a, uint8x8_t b) { 278 return vabd_u8(a, b); 279} 280 281// CHECK-LABEL: define <4 x i16> @test_vabd_u16(<4 x i16> %a, <4 x i16> %b) #0 { 282// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 283// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 284// CHECK: [[VABD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 285// CHECK: [[VABD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 286// CHECK: [[VABD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> [[VABD_V_I]], <4 x i16> [[VABD_V1_I]]) #4 287// CHECK: [[VABD_V3_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I]] to <8 x i8> 288// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I]] to <4 x i16> 289// CHECK: ret <4 x i16> [[TMP2]] 290uint16x4_t test_vabd_u16(uint16x4_t a, uint16x4_t b) { 291 return vabd_u16(a, b); 292} 293 294// CHECK-LABEL: define <2 x i32> @test_vabd_u32(<2 x i32> %a, <2 x i32> %b) #0 { 295// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 296// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 297// CHECK: [[VABD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 298// CHECK: [[VABD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 299// CHECK: [[VABD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> [[VABD_V_I]], <2 x i32> [[VABD_V1_I]]) #4 300// CHECK: [[VABD_V3_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I]] to <8 x i8> 301// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I]] to <2 x i32> 302// CHECK: ret <2 x i32> [[TMP2]] 303uint32x2_t test_vabd_u32(uint32x2_t a, uint32x2_t b) { 304 return vabd_u32(a, b); 305} 306 307// CHECK-LABEL: define <2 x float> @test_vabd_f32(<2 x float> %a, <2 x float> %b) #0 { 308// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 309// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 310// CHECK: [[VABD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 311// CHECK: [[VABD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 312// CHECK: [[VABD_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float> [[VABD_V_I]], <2 x float> [[VABD_V1_I]]) #4 313// CHECK: [[VABD_V3_I:%.*]] = bitcast <2 x float> [[VABD_V2_I]] to <8 x i8> 314// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I]] to <2 x float> 315// CHECK: ret <2 x float> [[TMP2]] 316float32x2_t test_vabd_f32(float32x2_t a, float32x2_t b) { 317 return vabd_f32(a, b); 318} 319 320// CHECK-LABEL: define <16 x i8> @test_vabdq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 321// CHECK: [[VABDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %a, <16 x i8> %b) #4 322// CHECK: ret <16 x i8> [[VABDQ_V_I]] 323int8x16_t test_vabdq_s8(int8x16_t a, int8x16_t b) { 324 return vabdq_s8(a, b); 325} 326 327// CHECK-LABEL: define <8 x i16> @test_vabdq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 328// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 329// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 330// CHECK: [[VABDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 331// CHECK: [[VABDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 332// CHECK: [[VABDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> [[VABDQ_V_I]], <8 x i16> [[VABDQ_V1_I]]) #4 333// CHECK: [[VABDQ_V3_I:%.*]] = bitcast <8 x i16> [[VABDQ_V2_I]] to <16 x i8> 334// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VABDQ_V3_I]] to <8 x i16> 335// CHECK: ret <8 x i16> [[TMP2]] 336int16x8_t test_vabdq_s16(int16x8_t a, int16x8_t b) { 337 return vabdq_s16(a, b); 338} 339 340// CHECK-LABEL: define <4 x i32> @test_vabdq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 341// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 342// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 343// CHECK: [[VABDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 344// CHECK: [[VABDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 345// CHECK: [[VABDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> [[VABDQ_V_I]], <4 x i32> [[VABDQ_V1_I]]) #4 346// CHECK: [[VABDQ_V3_I:%.*]] = bitcast <4 x i32> [[VABDQ_V2_I]] to <16 x i8> 347// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VABDQ_V3_I]] to <4 x i32> 348// CHECK: ret <4 x i32> [[TMP2]] 349int32x4_t test_vabdq_s32(int32x4_t a, int32x4_t b) { 350 return vabdq_s32(a, b); 351} 352 353// CHECK-LABEL: define <16 x i8> @test_vabdq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 354// CHECK: [[VABDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %a, <16 x i8> %b) #4 355// CHECK: ret <16 x i8> [[VABDQ_V_I]] 356uint8x16_t test_vabdq_u8(uint8x16_t a, uint8x16_t b) { 357 return vabdq_u8(a, b); 358} 359 360// CHECK-LABEL: define <8 x i16> @test_vabdq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 361// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 362// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 363// CHECK: [[VABDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 364// CHECK: [[VABDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 365// CHECK: [[VABDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> [[VABDQ_V_I]], <8 x i16> [[VABDQ_V1_I]]) #4 366// CHECK: [[VABDQ_V3_I:%.*]] = bitcast <8 x i16> [[VABDQ_V2_I]] to <16 x i8> 367// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VABDQ_V3_I]] to <8 x i16> 368// CHECK: ret <8 x i16> [[TMP2]] 369uint16x8_t test_vabdq_u16(uint16x8_t a, uint16x8_t b) { 370 return vabdq_u16(a, b); 371} 372 373// CHECK-LABEL: define <4 x i32> @test_vabdq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 374// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 375// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 376// CHECK: [[VABDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 377// CHECK: [[VABDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 378// CHECK: [[VABDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> [[VABDQ_V_I]], <4 x i32> [[VABDQ_V1_I]]) #4 379// CHECK: [[VABDQ_V3_I:%.*]] = bitcast <4 x i32> [[VABDQ_V2_I]] to <16 x i8> 380// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VABDQ_V3_I]] to <4 x i32> 381// CHECK: ret <4 x i32> [[TMP2]] 382uint32x4_t test_vabdq_u32(uint32x4_t a, uint32x4_t b) { 383 return vabdq_u32(a, b); 384} 385 386// CHECK-LABEL: define <4 x float> @test_vabdq_f32(<4 x float> %a, <4 x float> %b) #0 { 387// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 388// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 389// CHECK: [[VABDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 390// CHECK: [[VABDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 391// CHECK: [[VABDQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float> [[VABDQ_V_I]], <4 x float> [[VABDQ_V1_I]]) #4 392// CHECK: [[VABDQ_V3_I:%.*]] = bitcast <4 x float> [[VABDQ_V2_I]] to <16 x i8> 393// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VABDQ_V3_I]] to <4 x float> 394// CHECK: ret <4 x float> [[TMP2]] 395float32x4_t test_vabdq_f32(float32x4_t a, float32x4_t b) { 396 return vabdq_f32(a, b); 397} 398 399 400// CHECK-LABEL: define <8 x i16> @test_vabdl_s8(<8 x i8> %a, <8 x i8> %b) #0 { 401// CHECK: [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %a, <8 x i8> %b) #4 402// CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_V_I_I]] to <8 x i16> 403// CHECK: ret <8 x i16> [[VMOVL_I_I]] 404int16x8_t test_vabdl_s8(int8x8_t a, int8x8_t b) { 405 return vabdl_s8(a, b); 406} 407 408// CHECK-LABEL: define <4 x i32> @test_vabdl_s16(<4 x i16> %a, <4 x i16> %b) #0 { 409// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 410// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 411// CHECK: [[VABD_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 412// CHECK: [[VABD_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 413// CHECK: [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> [[VABD_V_I_I]], <4 x i16> [[VABD_V1_I_I]]) #4 414// CHECK: [[VABD_V3_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8> 415// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I]] to <4 x i16> 416// CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8> 417// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16> 418// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP4]] to <4 x i32> 419// CHECK: ret <4 x i32> [[VMOVL_I_I]] 420int32x4_t test_vabdl_s16(int16x4_t a, int16x4_t b) { 421 return vabdl_s16(a, b); 422} 423 424// CHECK-LABEL: define <2 x i64> @test_vabdl_s32(<2 x i32> %a, <2 x i32> %b) #0 { 425// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 426// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 427// CHECK: [[VABD_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 428// CHECK: [[VABD_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 429// CHECK: [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> [[VABD_V_I_I]], <2 x i32> [[VABD_V1_I_I]]) #4 430// CHECK: [[VABD_V3_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8> 431// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I]] to <2 x i32> 432// CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8> 433// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32> 434// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP4]] to <2 x i64> 435// CHECK: ret <2 x i64> [[VMOVL_I_I]] 436int64x2_t test_vabdl_s32(int32x2_t a, int32x2_t b) { 437 return vabdl_s32(a, b); 438} 439 440// CHECK-LABEL: define <8 x i16> @test_vabdl_u8(<8 x i8> %a, <8 x i8> %b) #0 { 441// CHECK: [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 442// CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_V_I_I]] to <8 x i16> 443// CHECK: ret <8 x i16> [[VMOVL_I_I]] 444uint16x8_t test_vabdl_u8(uint8x8_t a, uint8x8_t b) { 445 return vabdl_u8(a, b); 446} 447 448// CHECK-LABEL: define <4 x i32> @test_vabdl_u16(<4 x i16> %a, <4 x i16> %b) #0 { 449// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 450// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 451// CHECK: [[VABD_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 452// CHECK: [[VABD_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 453// CHECK: [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> [[VABD_V_I_I]], <4 x i16> [[VABD_V1_I_I]]) #4 454// CHECK: [[VABD_V3_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8> 455// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I]] to <4 x i16> 456// CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8> 457// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16> 458// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP4]] to <4 x i32> 459// CHECK: ret <4 x i32> [[VMOVL_I_I]] 460uint32x4_t test_vabdl_u16(uint16x4_t a, uint16x4_t b) { 461 return vabdl_u16(a, b); 462} 463 464// CHECK-LABEL: define <2 x i64> @test_vabdl_u32(<2 x i32> %a, <2 x i32> %b) #0 { 465// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 466// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 467// CHECK: [[VABD_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 468// CHECK: [[VABD_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 469// CHECK: [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> [[VABD_V_I_I]], <2 x i32> [[VABD_V1_I_I]]) #4 470// CHECK: [[VABD_V3_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8> 471// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I]] to <2 x i32> 472// CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8> 473// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32> 474// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP4]] to <2 x i64> 475// CHECK: ret <2 x i64> [[VMOVL_I_I]] 476uint64x2_t test_vabdl_u32(uint32x2_t a, uint32x2_t b) { 477 return vabdl_u32(a, b); 478} 479 480 481// CHECK-LABEL: define <8 x i8> @test_vabs_s8(<8 x i8> %a) #0 { 482// CHECK: [[VABS_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabs.v8i8(<8 x i8> %a) #4 483// CHECK: ret <8 x i8> [[VABS_I]] 484int8x8_t test_vabs_s8(int8x8_t a) { 485 return vabs_s8(a); 486} 487 488// CHECK-LABEL: define <4 x i16> @test_vabs_s16(<4 x i16> %a) #0 { 489// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 490// CHECK: [[VABS_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 491// CHECK: [[VABS1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16> [[VABS_I]]) #4 492// CHECK: ret <4 x i16> [[VABS1_I]] 493int16x4_t test_vabs_s16(int16x4_t a) { 494 return vabs_s16(a); 495} 496 497// CHECK-LABEL: define <2 x i32> @test_vabs_s32(<2 x i32> %a) #0 { 498// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 499// CHECK: [[VABS_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 500// CHECK: [[VABS1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32> [[VABS_I]]) #4 501// CHECK: ret <2 x i32> [[VABS1_I]] 502int32x2_t test_vabs_s32(int32x2_t a) { 503 return vabs_s32(a); 504} 505 506// CHECK-LABEL: define <2 x float> @test_vabs_f32(<2 x float> %a) #0 { 507// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 508// CHECK: [[VABS_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 509// CHECK: [[VABS1_I:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[VABS_I]]) #4 510// CHECK: ret <2 x float> [[VABS1_I]] 511float32x2_t test_vabs_f32(float32x2_t a) { 512 return vabs_f32(a); 513} 514 515// CHECK-LABEL: define <16 x i8> @test_vabsq_s8(<16 x i8> %a) #0 { 516// CHECK: [[VABS_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabs.v16i8(<16 x i8> %a) #4 517// CHECK: ret <16 x i8> [[VABS_I]] 518int8x16_t test_vabsq_s8(int8x16_t a) { 519 return vabsq_s8(a); 520} 521 522// CHECK-LABEL: define <8 x i16> @test_vabsq_s16(<8 x i16> %a) #0 { 523// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 524// CHECK: [[VABS_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 525// CHECK: [[VABS1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16> [[VABS_I]]) #4 526// CHECK: ret <8 x i16> [[VABS1_I]] 527int16x8_t test_vabsq_s16(int16x8_t a) { 528 return vabsq_s16(a); 529} 530 531// CHECK-LABEL: define <4 x i32> @test_vabsq_s32(<4 x i32> %a) #0 { 532// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 533// CHECK: [[VABS_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 534// CHECK: [[VABS1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32> [[VABS_I]]) #4 535// CHECK: ret <4 x i32> [[VABS1_I]] 536int32x4_t test_vabsq_s32(int32x4_t a) { 537 return vabsq_s32(a); 538} 539 540// CHECK-LABEL: define <4 x float> @test_vabsq_f32(<4 x float> %a) #0 { 541// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 542// CHECK: [[VABS_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 543// CHECK: [[VABS1_I:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[VABS_I]]) #4 544// CHECK: ret <4 x float> [[VABS1_I]] 545float32x4_t test_vabsq_f32(float32x4_t a) { 546 return vabsq_f32(a); 547} 548 549 550// CHECK-LABEL: define <8 x i8> @test_vadd_s8(<8 x i8> %a, <8 x i8> %b) #0 { 551// CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, %b 552// CHECK: ret <8 x i8> [[ADD_I]] 553int8x8_t test_vadd_s8(int8x8_t a, int8x8_t b) { 554 return vadd_s8(a, b); 555} 556 557// CHECK-LABEL: define <4 x i16> @test_vadd_s16(<4 x i16> %a, <4 x i16> %b) #0 { 558// CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, %b 559// CHECK: ret <4 x i16> [[ADD_I]] 560int16x4_t test_vadd_s16(int16x4_t a, int16x4_t b) { 561 return vadd_s16(a, b); 562} 563 564// CHECK-LABEL: define <2 x i32> @test_vadd_s32(<2 x i32> %a, <2 x i32> %b) #0 { 565// CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, %b 566// CHECK: ret <2 x i32> [[ADD_I]] 567int32x2_t test_vadd_s32(int32x2_t a, int32x2_t b) { 568 return vadd_s32(a, b); 569} 570 571// CHECK-LABEL: define <1 x i64> @test_vadd_s64(<1 x i64> %a, <1 x i64> %b) #0 { 572// CHECK: [[ADD_I:%.*]] = add <1 x i64> %a, %b 573// CHECK: ret <1 x i64> [[ADD_I]] 574int64x1_t test_vadd_s64(int64x1_t a, int64x1_t b) { 575 return vadd_s64(a, b); 576} 577 578// CHECK-LABEL: define <2 x float> @test_vadd_f32(<2 x float> %a, <2 x float> %b) #0 { 579// CHECK: [[ADD_I:%.*]] = fadd <2 x float> %a, %b 580// CHECK: ret <2 x float> [[ADD_I]] 581float32x2_t test_vadd_f32(float32x2_t a, float32x2_t b) { 582 return vadd_f32(a, b); 583} 584 585// CHECK-LABEL: define <8 x i8> @test_vadd_u8(<8 x i8> %a, <8 x i8> %b) #0 { 586// CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, %b 587// CHECK: ret <8 x i8> [[ADD_I]] 588uint8x8_t test_vadd_u8(uint8x8_t a, uint8x8_t b) { 589 return vadd_u8(a, b); 590} 591 592// CHECK-LABEL: define <4 x i16> @test_vadd_u16(<4 x i16> %a, <4 x i16> %b) #0 { 593// CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, %b 594// CHECK: ret <4 x i16> [[ADD_I]] 595uint16x4_t test_vadd_u16(uint16x4_t a, uint16x4_t b) { 596 return vadd_u16(a, b); 597} 598 599// CHECK-LABEL: define <2 x i32> @test_vadd_u32(<2 x i32> %a, <2 x i32> %b) #0 { 600// CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, %b 601// CHECK: ret <2 x i32> [[ADD_I]] 602uint32x2_t test_vadd_u32(uint32x2_t a, uint32x2_t b) { 603 return vadd_u32(a, b); 604} 605 606// CHECK-LABEL: define <1 x i64> @test_vadd_u64(<1 x i64> %a, <1 x i64> %b) #0 { 607// CHECK: [[ADD_I:%.*]] = add <1 x i64> %a, %b 608// CHECK: ret <1 x i64> [[ADD_I]] 609uint64x1_t test_vadd_u64(uint64x1_t a, uint64x1_t b) { 610 return vadd_u64(a, b); 611} 612 613// CHECK-LABEL: define <16 x i8> @test_vaddq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 614// CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, %b 615// CHECK: ret <16 x i8> [[ADD_I]] 616int8x16_t test_vaddq_s8(int8x16_t a, int8x16_t b) { 617 return vaddq_s8(a, b); 618} 619 620// CHECK-LABEL: define <8 x i16> @test_vaddq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 621// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, %b 622// CHECK: ret <8 x i16> [[ADD_I]] 623int16x8_t test_vaddq_s16(int16x8_t a, int16x8_t b) { 624 return vaddq_s16(a, b); 625} 626 627// CHECK-LABEL: define <4 x i32> @test_vaddq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 628// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, %b 629// CHECK: ret <4 x i32> [[ADD_I]] 630int32x4_t test_vaddq_s32(int32x4_t a, int32x4_t b) { 631 return vaddq_s32(a, b); 632} 633 634// CHECK-LABEL: define <2 x i64> @test_vaddq_s64(<2 x i64> %a, <2 x i64> %b) #0 { 635// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, %b 636// CHECK: ret <2 x i64> [[ADD_I]] 637int64x2_t test_vaddq_s64(int64x2_t a, int64x2_t b) { 638 return vaddq_s64(a, b); 639} 640 641// CHECK-LABEL: define <4 x float> @test_vaddq_f32(<4 x float> %a, <4 x float> %b) #0 { 642// CHECK: [[ADD_I:%.*]] = fadd <4 x float> %a, %b 643// CHECK: ret <4 x float> [[ADD_I]] 644float32x4_t test_vaddq_f32(float32x4_t a, float32x4_t b) { 645 return vaddq_f32(a, b); 646} 647 648// CHECK-LABEL: define <16 x i8> @test_vaddq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 649// CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, %b 650// CHECK: ret <16 x i8> [[ADD_I]] 651uint8x16_t test_vaddq_u8(uint8x16_t a, uint8x16_t b) { 652 return vaddq_u8(a, b); 653} 654 655// CHECK-LABEL: define <8 x i16> @test_vaddq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 656// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, %b 657// CHECK: ret <8 x i16> [[ADD_I]] 658uint16x8_t test_vaddq_u16(uint16x8_t a, uint16x8_t b) { 659 return vaddq_u16(a, b); 660} 661 662// CHECK-LABEL: define <4 x i32> @test_vaddq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 663// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, %b 664// CHECK: ret <4 x i32> [[ADD_I]] 665uint32x4_t test_vaddq_u32(uint32x4_t a, uint32x4_t b) { 666 return vaddq_u32(a, b); 667} 668 669// CHECK-LABEL: define <2 x i64> @test_vaddq_u64(<2 x i64> %a, <2 x i64> %b) #0 { 670// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, %b 671// CHECK: ret <2 x i64> [[ADD_I]] 672uint64x2_t test_vaddq_u64(uint64x2_t a, uint64x2_t b) { 673 return vaddq_u64(a, b); 674} 675 676 677// CHECK-LABEL: define <8 x i8> @test_vaddhn_s16(<8 x i16> %a, <8 x i16> %b) #0 { 678// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 679// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 680// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 681// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 682// CHECK: [[VADDHN_I:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]] 683// CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 684// CHECK: [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8> 685// CHECK: ret <8 x i8> [[VADDHN2_I]] 686int8x8_t test_vaddhn_s16(int16x8_t a, int16x8_t b) { 687 return vaddhn_s16(a, b); 688} 689 690// CHECK-LABEL: define <4 x i16> @test_vaddhn_s32(<4 x i32> %a, <4 x i32> %b) #0 { 691// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 692// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 693// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 694// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 695// CHECK: [[VADDHN_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]] 696// CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], <i32 16, i32 16, i32 16, i32 16> 697// CHECK: [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16> 698// CHECK: ret <4 x i16> [[VADDHN2_I]] 699int16x4_t test_vaddhn_s32(int32x4_t a, int32x4_t b) { 700 return vaddhn_s32(a, b); 701} 702 703// CHECK-LABEL: define <2 x i32> @test_vaddhn_s64(<2 x i64> %a, <2 x i64> %b) #0 { 704// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 705// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 706// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 707// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 708// CHECK: [[VADDHN_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]] 709// CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], <i64 32, i64 32> 710// CHECK: [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32> 711// CHECK: ret <2 x i32> [[VADDHN2_I]] 712int32x2_t test_vaddhn_s64(int64x2_t a, int64x2_t b) { 713 return vaddhn_s64(a, b); 714} 715 716// CHECK-LABEL: define <8 x i8> @test_vaddhn_u16(<8 x i16> %a, <8 x i16> %b) #0 { 717// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 718// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 719// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 720// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 721// CHECK: [[VADDHN_I:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]] 722// CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 723// CHECK: [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8> 724// CHECK: ret <8 x i8> [[VADDHN2_I]] 725uint8x8_t test_vaddhn_u16(uint16x8_t a, uint16x8_t b) { 726 return vaddhn_u16(a, b); 727} 728 729// CHECK-LABEL: define <4 x i16> @test_vaddhn_u32(<4 x i32> %a, <4 x i32> %b) #0 { 730// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 731// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 732// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 733// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 734// CHECK: [[VADDHN_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]] 735// CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], <i32 16, i32 16, i32 16, i32 16> 736// CHECK: [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16> 737// CHECK: ret <4 x i16> [[VADDHN2_I]] 738uint16x4_t test_vaddhn_u32(uint32x4_t a, uint32x4_t b) { 739 return vaddhn_u32(a, b); 740} 741 742// CHECK-LABEL: define <2 x i32> @test_vaddhn_u64(<2 x i64> %a, <2 x i64> %b) #0 { 743// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 744// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 745// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 746// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 747// CHECK: [[VADDHN_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]] 748// CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], <i64 32, i64 32> 749// CHECK: [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32> 750// CHECK: ret <2 x i32> [[VADDHN2_I]] 751uint32x2_t test_vaddhn_u64(uint64x2_t a, uint64x2_t b) { 752 return vaddhn_u64(a, b); 753} 754 755 756// CHECK-LABEL: define <8 x i16> @test_vaddl_s8(<8 x i8> %a, <8 x i8> %b) #0 { 757// CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16> 758// CHECK: [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16> 759// CHECK: [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]] 760// CHECK: ret <8 x i16> [[ADD_I]] 761int16x8_t test_vaddl_s8(int8x8_t a, int8x8_t b) { 762 return vaddl_s8(a, b); 763} 764 765// CHECK-LABEL: define <4 x i32> @test_vaddl_s16(<4 x i16> %a, <4 x i16> %b) #0 { 766// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 767// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 768// CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> 769// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> 770// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 771// CHECK: [[VMOVL_I4_I:%.*]] = sext <4 x i16> [[TMP3]] to <4 x i32> 772// CHECK: [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]] 773// CHECK: ret <4 x i32> [[ADD_I]] 774int32x4_t test_vaddl_s16(int16x4_t a, int16x4_t b) { 775 return vaddl_s16(a, b); 776} 777 778// CHECK-LABEL: define <2 x i64> @test_vaddl_s32(<2 x i32> %a, <2 x i32> %b) #0 { 779// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 780// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 781// CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> 782// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> 783// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 784// CHECK: [[VMOVL_I4_I:%.*]] = sext <2 x i32> [[TMP3]] to <2 x i64> 785// CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]] 786// CHECK: ret <2 x i64> [[ADD_I]] 787int64x2_t test_vaddl_s32(int32x2_t a, int32x2_t b) { 788 return vaddl_s32(a, b); 789} 790 791// CHECK-LABEL: define <8 x i16> @test_vaddl_u8(<8 x i8> %a, <8 x i8> %b) #0 { 792// CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16> 793// CHECK: [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16> 794// CHECK: [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]] 795// CHECK: ret <8 x i16> [[ADD_I]] 796uint16x8_t test_vaddl_u8(uint8x8_t a, uint8x8_t b) { 797 return vaddl_u8(a, b); 798} 799 800// CHECK-LABEL: define <4 x i32> @test_vaddl_u16(<4 x i16> %a, <4 x i16> %b) #0 { 801// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 802// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 803// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> 804// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> 805// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 806// CHECK: [[VMOVL_I4_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> 807// CHECK: [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]] 808// CHECK: ret <4 x i32> [[ADD_I]] 809uint32x4_t test_vaddl_u16(uint16x4_t a, uint16x4_t b) { 810 return vaddl_u16(a, b); 811} 812 813// CHECK-LABEL: define <2 x i64> @test_vaddl_u32(<2 x i32> %a, <2 x i32> %b) #0 { 814// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 815// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 816// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> 817// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> 818// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 819// CHECK: [[VMOVL_I4_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64> 820// CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]] 821// CHECK: ret <2 x i64> [[ADD_I]] 822uint64x2_t test_vaddl_u32(uint32x2_t a, uint32x2_t b) { 823 return vaddl_u32(a, b); 824} 825 826 827// CHECK-LABEL: define <8 x i16> @test_vaddw_s8(<8 x i16> %a, <8 x i8> %b) #0 { 828// CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16> 829// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]] 830// CHECK: ret <8 x i16> [[ADD_I]] 831int16x8_t test_vaddw_s8(int16x8_t a, int8x8_t b) { 832 return vaddw_s8(a, b); 833} 834 835// CHECK-LABEL: define <4 x i32> @test_vaddw_s16(<4 x i32> %a, <4 x i16> %b) #0 { 836// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 837// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 838// CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> 839// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]] 840// CHECK: ret <4 x i32> [[ADD_I]] 841int32x4_t test_vaddw_s16(int32x4_t a, int16x4_t b) { 842 return vaddw_s16(a, b); 843} 844 845// CHECK-LABEL: define <2 x i64> @test_vaddw_s32(<2 x i64> %a, <2 x i32> %b) #0 { 846// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 847// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 848// CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> 849// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]] 850// CHECK: ret <2 x i64> [[ADD_I]] 851int64x2_t test_vaddw_s32(int64x2_t a, int32x2_t b) { 852 return vaddw_s32(a, b); 853} 854 855// CHECK-LABEL: define <8 x i16> @test_vaddw_u8(<8 x i16> %a, <8 x i8> %b) #0 { 856// CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16> 857// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]] 858// CHECK: ret <8 x i16> [[ADD_I]] 859uint16x8_t test_vaddw_u8(uint16x8_t a, uint8x8_t b) { 860 return vaddw_u8(a, b); 861} 862 863// CHECK-LABEL: define <4 x i32> @test_vaddw_u16(<4 x i32> %a, <4 x i16> %b) #0 { 864// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 865// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 866// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> 867// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]] 868// CHECK: ret <4 x i32> [[ADD_I]] 869uint32x4_t test_vaddw_u16(uint32x4_t a, uint16x4_t b) { 870 return vaddw_u16(a, b); 871} 872 873// CHECK-LABEL: define <2 x i64> @test_vaddw_u32(<2 x i64> %a, <2 x i32> %b) #0 { 874// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 875// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 876// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> 877// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]] 878// CHECK: ret <2 x i64> [[ADD_I]] 879uint64x2_t test_vaddw_u32(uint64x2_t a, uint32x2_t b) { 880 return vaddw_u32(a, b); 881} 882 883 884// CHECK-LABEL: define <8 x i8> @test_vand_s8(<8 x i8> %a, <8 x i8> %b) #0 { 885// CHECK: [[AND_I:%.*]] = and <8 x i8> %a, %b 886// CHECK: ret <8 x i8> [[AND_I]] 887int8x8_t test_vand_s8(int8x8_t a, int8x8_t b) { 888 return vand_s8(a, b); 889} 890 891// CHECK-LABEL: define <4 x i16> @test_vand_s16(<4 x i16> %a, <4 x i16> %b) #0 { 892// CHECK: [[AND_I:%.*]] = and <4 x i16> %a, %b 893// CHECK: ret <4 x i16> [[AND_I]] 894int16x4_t test_vand_s16(int16x4_t a, int16x4_t b) { 895 return vand_s16(a, b); 896} 897 898// CHECK-LABEL: define <2 x i32> @test_vand_s32(<2 x i32> %a, <2 x i32> %b) #0 { 899// CHECK: [[AND_I:%.*]] = and <2 x i32> %a, %b 900// CHECK: ret <2 x i32> [[AND_I]] 901int32x2_t test_vand_s32(int32x2_t a, int32x2_t b) { 902 return vand_s32(a, b); 903} 904 905// CHECK-LABEL: define <1 x i64> @test_vand_s64(<1 x i64> %a, <1 x i64> %b) #0 { 906// CHECK: [[AND_I:%.*]] = and <1 x i64> %a, %b 907// CHECK: ret <1 x i64> [[AND_I]] 908int64x1_t test_vand_s64(int64x1_t a, int64x1_t b) { 909 return vand_s64(a, b); 910} 911 912// CHECK-LABEL: define <8 x i8> @test_vand_u8(<8 x i8> %a, <8 x i8> %b) #0 { 913// CHECK: [[AND_I:%.*]] = and <8 x i8> %a, %b 914// CHECK: ret <8 x i8> [[AND_I]] 915uint8x8_t test_vand_u8(uint8x8_t a, uint8x8_t b) { 916 return vand_u8(a, b); 917} 918 919// CHECK-LABEL: define <4 x i16> @test_vand_u16(<4 x i16> %a, <4 x i16> %b) #0 { 920// CHECK: [[AND_I:%.*]] = and <4 x i16> %a, %b 921// CHECK: ret <4 x i16> [[AND_I]] 922uint16x4_t test_vand_u16(uint16x4_t a, uint16x4_t b) { 923 return vand_u16(a, b); 924} 925 926// CHECK-LABEL: define <2 x i32> @test_vand_u32(<2 x i32> %a, <2 x i32> %b) #0 { 927// CHECK: [[AND_I:%.*]] = and <2 x i32> %a, %b 928// CHECK: ret <2 x i32> [[AND_I]] 929uint32x2_t test_vand_u32(uint32x2_t a, uint32x2_t b) { 930 return vand_u32(a, b); 931} 932 933// CHECK-LABEL: define <1 x i64> @test_vand_u64(<1 x i64> %a, <1 x i64> %b) #0 { 934// CHECK: [[AND_I:%.*]] = and <1 x i64> %a, %b 935// CHECK: ret <1 x i64> [[AND_I]] 936uint64x1_t test_vand_u64(uint64x1_t a, uint64x1_t b) { 937 return vand_u64(a, b); 938} 939 940// CHECK-LABEL: define <16 x i8> @test_vandq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 941// CHECK: [[AND_I:%.*]] = and <16 x i8> %a, %b 942// CHECK: ret <16 x i8> [[AND_I]] 943int8x16_t test_vandq_s8(int8x16_t a, int8x16_t b) { 944 return vandq_s8(a, b); 945} 946 947// CHECK-LABEL: define <8 x i16> @test_vandq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 948// CHECK: [[AND_I:%.*]] = and <8 x i16> %a, %b 949// CHECK: ret <8 x i16> [[AND_I]] 950int16x8_t test_vandq_s16(int16x8_t a, int16x8_t b) { 951 return vandq_s16(a, b); 952} 953 954// CHECK-LABEL: define <4 x i32> @test_vandq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 955// CHECK: [[AND_I:%.*]] = and <4 x i32> %a, %b 956// CHECK: ret <4 x i32> [[AND_I]] 957int32x4_t test_vandq_s32(int32x4_t a, int32x4_t b) { 958 return vandq_s32(a, b); 959} 960 961// CHECK-LABEL: define <2 x i64> @test_vandq_s64(<2 x i64> %a, <2 x i64> %b) #0 { 962// CHECK: [[AND_I:%.*]] = and <2 x i64> %a, %b 963// CHECK: ret <2 x i64> [[AND_I]] 964int64x2_t test_vandq_s64(int64x2_t a, int64x2_t b) { 965 return vandq_s64(a, b); 966} 967 968// CHECK-LABEL: define <16 x i8> @test_vandq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 969// CHECK: [[AND_I:%.*]] = and <16 x i8> %a, %b 970// CHECK: ret <16 x i8> [[AND_I]] 971uint8x16_t test_vandq_u8(uint8x16_t a, uint8x16_t b) { 972 return vandq_u8(a, b); 973} 974 975// CHECK-LABEL: define <8 x i16> @test_vandq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 976// CHECK: [[AND_I:%.*]] = and <8 x i16> %a, %b 977// CHECK: ret <8 x i16> [[AND_I]] 978uint16x8_t test_vandq_u16(uint16x8_t a, uint16x8_t b) { 979 return vandq_u16(a, b); 980} 981 982// CHECK-LABEL: define <4 x i32> @test_vandq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 983// CHECK: [[AND_I:%.*]] = and <4 x i32> %a, %b 984// CHECK: ret <4 x i32> [[AND_I]] 985uint32x4_t test_vandq_u32(uint32x4_t a, uint32x4_t b) { 986 return vandq_u32(a, b); 987} 988 989// CHECK-LABEL: define <2 x i64> @test_vandq_u64(<2 x i64> %a, <2 x i64> %b) #0 { 990// CHECK: [[AND_I:%.*]] = and <2 x i64> %a, %b 991// CHECK: ret <2 x i64> [[AND_I]] 992uint64x2_t test_vandq_u64(uint64x2_t a, uint64x2_t b) { 993 return vandq_u64(a, b); 994} 995 996 997// CHECK-LABEL: define <8 x i8> @test_vbic_s8(<8 x i8> %a, <8 x i8> %b) #0 { 998// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 999// CHECK: [[AND_I:%.*]] = and <8 x i8> %a, [[NEG_I]] 1000// CHECK: ret <8 x i8> [[AND_I]] 1001int8x8_t test_vbic_s8(int8x8_t a, int8x8_t b) { 1002 return vbic_s8(a, b); 1003} 1004 1005// CHECK-LABEL: define <4 x i16> @test_vbic_s16(<4 x i16> %a, <4 x i16> %b) #0 { 1006// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1> 1007// CHECK: [[AND_I:%.*]] = and <4 x i16> %a, [[NEG_I]] 1008// CHECK: ret <4 x i16> [[AND_I]] 1009int16x4_t test_vbic_s16(int16x4_t a, int16x4_t b) { 1010 return vbic_s16(a, b); 1011} 1012 1013// CHECK-LABEL: define <2 x i32> @test_vbic_s32(<2 x i32> %a, <2 x i32> %b) #0 { 1014// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, <i32 -1, i32 -1> 1015// CHECK: [[AND_I:%.*]] = and <2 x i32> %a, [[NEG_I]] 1016// CHECK: ret <2 x i32> [[AND_I]] 1017int32x2_t test_vbic_s32(int32x2_t a, int32x2_t b) { 1018 return vbic_s32(a, b); 1019} 1020 1021// CHECK-LABEL: define <1 x i64> @test_vbic_s64(<1 x i64> %a, <1 x i64> %b) #0 { 1022// CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, <i64 -1> 1023// CHECK: [[AND_I:%.*]] = and <1 x i64> %a, [[NEG_I]] 1024// CHECK: ret <1 x i64> [[AND_I]] 1025int64x1_t test_vbic_s64(int64x1_t a, int64x1_t b) { 1026 return vbic_s64(a, b); 1027} 1028 1029// CHECK-LABEL: define <8 x i8> @test_vbic_u8(<8 x i8> %a, <8 x i8> %b) #0 { 1030// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 1031// CHECK: [[AND_I:%.*]] = and <8 x i8> %a, [[NEG_I]] 1032// CHECK: ret <8 x i8> [[AND_I]] 1033uint8x8_t test_vbic_u8(uint8x8_t a, uint8x8_t b) { 1034 return vbic_u8(a, b); 1035} 1036 1037// CHECK-LABEL: define <4 x i16> @test_vbic_u16(<4 x i16> %a, <4 x i16> %b) #0 { 1038// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1> 1039// CHECK: [[AND_I:%.*]] = and <4 x i16> %a, [[NEG_I]] 1040// CHECK: ret <4 x i16> [[AND_I]] 1041uint16x4_t test_vbic_u16(uint16x4_t a, uint16x4_t b) { 1042 return vbic_u16(a, b); 1043} 1044 1045// CHECK-LABEL: define <2 x i32> @test_vbic_u32(<2 x i32> %a, <2 x i32> %b) #0 { 1046// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, <i32 -1, i32 -1> 1047// CHECK: [[AND_I:%.*]] = and <2 x i32> %a, [[NEG_I]] 1048// CHECK: ret <2 x i32> [[AND_I]] 1049uint32x2_t test_vbic_u32(uint32x2_t a, uint32x2_t b) { 1050 return vbic_u32(a, b); 1051} 1052 1053// CHECK-LABEL: define <1 x i64> @test_vbic_u64(<1 x i64> %a, <1 x i64> %b) #0 { 1054// CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, <i64 -1> 1055// CHECK: [[AND_I:%.*]] = and <1 x i64> %a, [[NEG_I]] 1056// CHECK: ret <1 x i64> [[AND_I]] 1057uint64x1_t test_vbic_u64(uint64x1_t a, uint64x1_t b) { 1058 return vbic_u64(a, b); 1059} 1060 1061// CHECK-LABEL: define <16 x i8> @test_vbicq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 1062// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 1063// CHECK: [[AND_I:%.*]] = and <16 x i8> %a, [[NEG_I]] 1064// CHECK: ret <16 x i8> [[AND_I]] 1065int8x16_t test_vbicq_s8(int8x16_t a, int8x16_t b) { 1066 return vbicq_s8(a, b); 1067} 1068 1069// CHECK-LABEL: define <8 x i16> @test_vbicq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 1070// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 1071// CHECK: [[AND_I:%.*]] = and <8 x i16> %a, [[NEG_I]] 1072// CHECK: ret <8 x i16> [[AND_I]] 1073int16x8_t test_vbicq_s16(int16x8_t a, int16x8_t b) { 1074 return vbicq_s16(a, b); 1075} 1076 1077// CHECK-LABEL: define <4 x i32> @test_vbicq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 1078// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1> 1079// CHECK: [[AND_I:%.*]] = and <4 x i32> %a, [[NEG_I]] 1080// CHECK: ret <4 x i32> [[AND_I]] 1081int32x4_t test_vbicq_s32(int32x4_t a, int32x4_t b) { 1082 return vbicq_s32(a, b); 1083} 1084 1085// CHECK-LABEL: define <2 x i64> @test_vbicq_s64(<2 x i64> %a, <2 x i64> %b) #0 { 1086// CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1> 1087// CHECK: [[AND_I:%.*]] = and <2 x i64> %a, [[NEG_I]] 1088// CHECK: ret <2 x i64> [[AND_I]] 1089int64x2_t test_vbicq_s64(int64x2_t a, int64x2_t b) { 1090 return vbicq_s64(a, b); 1091} 1092 1093// CHECK-LABEL: define <16 x i8> @test_vbicq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 1094// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 1095// CHECK: [[AND_I:%.*]] = and <16 x i8> %a, [[NEG_I]] 1096// CHECK: ret <16 x i8> [[AND_I]] 1097uint8x16_t test_vbicq_u8(uint8x16_t a, uint8x16_t b) { 1098 return vbicq_u8(a, b); 1099} 1100 1101// CHECK-LABEL: define <8 x i16> @test_vbicq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 1102// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 1103// CHECK: [[AND_I:%.*]] = and <8 x i16> %a, [[NEG_I]] 1104// CHECK: ret <8 x i16> [[AND_I]] 1105uint16x8_t test_vbicq_u16(uint16x8_t a, uint16x8_t b) { 1106 return vbicq_u16(a, b); 1107} 1108 1109// CHECK-LABEL: define <4 x i32> @test_vbicq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 1110// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1> 1111// CHECK: [[AND_I:%.*]] = and <4 x i32> %a, [[NEG_I]] 1112// CHECK: ret <4 x i32> [[AND_I]] 1113uint32x4_t test_vbicq_u32(uint32x4_t a, uint32x4_t b) { 1114 return vbicq_u32(a, b); 1115} 1116 1117// CHECK-LABEL: define <2 x i64> @test_vbicq_u64(<2 x i64> %a, <2 x i64> %b) #0 { 1118// CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1> 1119// CHECK: [[AND_I:%.*]] = and <2 x i64> %a, [[NEG_I]] 1120// CHECK: ret <2 x i64> [[AND_I]] 1121uint64x2_t test_vbicq_u64(uint64x2_t a, uint64x2_t b) { 1122 return vbicq_u64(a, b); 1123} 1124 1125 1126// CHECK-LABEL: define <8 x i8> @test_vbsl_s8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 { 1127// CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #4 1128// CHECK: ret <8 x i8> [[VBSL_V_I]] 1129int8x8_t test_vbsl_s8(uint8x8_t a, int8x8_t b, int8x8_t c) { 1130 return vbsl_s8(a, b, c); 1131} 1132 1133// CHECK-LABEL: define <4 x i16> @test_vbsl_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 { 1134// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 1135// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 1136// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8> 1137// CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) #4 1138// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x i16> 1139// CHECK: ret <4 x i16> [[TMP3]] 1140int16x4_t test_vbsl_s16(uint16x4_t a, int16x4_t b, int16x4_t c) { 1141 return vbsl_s16(a, b, c); 1142} 1143 1144// CHECK-LABEL: define <2 x i32> @test_vbsl_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 { 1145// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 1146// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 1147// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8> 1148// CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) #4 1149// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <2 x i32> 1150// CHECK: ret <2 x i32> [[TMP3]] 1151int32x2_t test_vbsl_s32(uint32x2_t a, int32x2_t b, int32x2_t c) { 1152 return vbsl_s32(a, b, c); 1153} 1154 1155// CHECK-LABEL: define <1 x i64> @test_vbsl_s64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) #0 { 1156// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 1157// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 1158// CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %c to <8 x i8> 1159// CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) #4 1160// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <1 x i64> 1161// CHECK: ret <1 x i64> [[TMP3]] 1162int64x1_t test_vbsl_s64(uint64x1_t a, int64x1_t b, int64x1_t c) { 1163 return vbsl_s64(a, b, c); 1164} 1165 1166// CHECK-LABEL: define <8 x i8> @test_vbsl_u8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 { 1167// CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #4 1168// CHECK: ret <8 x i8> [[VBSL_V_I]] 1169uint8x8_t test_vbsl_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) { 1170 return vbsl_u8(a, b, c); 1171} 1172 1173// CHECK-LABEL: define <4 x i16> @test_vbsl_u16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 { 1174// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 1175// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 1176// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8> 1177// CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) #4 1178// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x i16> 1179// CHECK: ret <4 x i16> [[TMP3]] 1180uint16x4_t test_vbsl_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) { 1181 return vbsl_u16(a, b, c); 1182} 1183 1184// CHECK-LABEL: define <2 x i32> @test_vbsl_u32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 { 1185// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 1186// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 1187// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8> 1188// CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) #4 1189// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <2 x i32> 1190// CHECK: ret <2 x i32> [[TMP3]] 1191uint32x2_t test_vbsl_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) { 1192 return vbsl_u32(a, b, c); 1193} 1194 1195// CHECK-LABEL: define <1 x i64> @test_vbsl_u64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) #0 { 1196// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 1197// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 1198// CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %c to <8 x i8> 1199// CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) #4 1200// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <1 x i64> 1201// CHECK: ret <1 x i64> [[TMP3]] 1202uint64x1_t test_vbsl_u64(uint64x1_t a, uint64x1_t b, uint64x1_t c) { 1203 return vbsl_u64(a, b, c); 1204} 1205 1206// CHECK-LABEL: define <2 x float> @test_vbsl_f32(<2 x i32> %a, <2 x float> %b, <2 x float> %c) #0 { 1207// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 1208// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 1209// CHECK: [[TMP2:%.*]] = bitcast <2 x float> %c to <8 x i8> 1210// CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) #4 1211// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <2 x float> 1212// CHECK: ret <2 x float> [[TMP3]] 1213float32x2_t test_vbsl_f32(uint32x2_t a, float32x2_t b, float32x2_t c) { 1214 return vbsl_f32(a, b, c); 1215} 1216 1217// CHECK-LABEL: define <8 x i8> @test_vbsl_p8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 { 1218// CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #4 1219// CHECK: ret <8 x i8> [[VBSL_V_I]] 1220poly8x8_t test_vbsl_p8(uint8x8_t a, poly8x8_t b, poly8x8_t c) { 1221 return vbsl_p8(a, b, c); 1222} 1223 1224// CHECK-LABEL: define <4 x i16> @test_vbsl_p16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 { 1225// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 1226// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 1227// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8> 1228// CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) #4 1229// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x i16> 1230// CHECK: ret <4 x i16> [[TMP3]] 1231poly16x4_t test_vbsl_p16(uint16x4_t a, poly16x4_t b, poly16x4_t c) { 1232 return vbsl_p16(a, b, c); 1233} 1234 1235// CHECK-LABEL: define <16 x i8> @test_vbslq_s8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 { 1236// CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #4 1237// CHECK: ret <16 x i8> [[VBSLQ_V_I]] 1238int8x16_t test_vbslq_s8(uint8x16_t a, int8x16_t b, int8x16_t c) { 1239 return vbslq_s8(a, b, c); 1240} 1241 1242// CHECK-LABEL: define <8 x i16> @test_vbslq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 { 1243// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 1244// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 1245// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %c to <16 x i8> 1246// CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) #4 1247// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <8 x i16> 1248// CHECK: ret <8 x i16> [[TMP3]] 1249int16x8_t test_vbslq_s16(uint16x8_t a, int16x8_t b, int16x8_t c) { 1250 return vbslq_s16(a, b, c); 1251} 1252 1253// CHECK-LABEL: define <4 x i32> @test_vbslq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #0 { 1254// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 1255// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 1256// CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %c to <16 x i8> 1257// CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) #4 1258// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <4 x i32> 1259// CHECK: ret <4 x i32> [[TMP3]] 1260int32x4_t test_vbslq_s32(uint32x4_t a, int32x4_t b, int32x4_t c) { 1261 return vbslq_s32(a, b, c); 1262} 1263 1264// CHECK-LABEL: define <2 x i64> @test_vbslq_s64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) #0 { 1265// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 1266// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 1267// CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %c to <16 x i8> 1268// CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) #4 1269// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <2 x i64> 1270// CHECK: ret <2 x i64> [[TMP3]] 1271int64x2_t test_vbslq_s64(uint64x2_t a, int64x2_t b, int64x2_t c) { 1272 return vbslq_s64(a, b, c); 1273} 1274 1275// CHECK-LABEL: define <16 x i8> @test_vbslq_u8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 { 1276// CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #4 1277// CHECK: ret <16 x i8> [[VBSLQ_V_I]] 1278uint8x16_t test_vbslq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) { 1279 return vbslq_u8(a, b, c); 1280} 1281 1282// CHECK-LABEL: define <8 x i16> @test_vbslq_u16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 { 1283// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 1284// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 1285// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %c to <16 x i8> 1286// CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) #4 1287// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <8 x i16> 1288// CHECK: ret <8 x i16> [[TMP3]] 1289uint16x8_t test_vbslq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) { 1290 return vbslq_u16(a, b, c); 1291} 1292 1293// CHECK-LABEL: define <4 x i32> @test_vbslq_u32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #0 { 1294// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 1295// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 1296// CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %c to <16 x i8> 1297// CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) #4 1298// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <4 x i32> 1299// CHECK: ret <4 x i32> [[TMP3]] 1300uint32x4_t test_vbslq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) { 1301 return vbslq_u32(a, b, c); 1302} 1303 1304// CHECK-LABEL: define <2 x i64> @test_vbslq_u64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) #0 { 1305// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 1306// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 1307// CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %c to <16 x i8> 1308// CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) #4 1309// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <2 x i64> 1310// CHECK: ret <2 x i64> [[TMP3]] 1311uint64x2_t test_vbslq_u64(uint64x2_t a, uint64x2_t b, uint64x2_t c) { 1312 return vbslq_u64(a, b, c); 1313} 1314 1315// CHECK-LABEL: define <4 x float> @test_vbslq_f32(<4 x i32> %a, <4 x float> %b, <4 x float> %c) #0 { 1316// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 1317// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 1318// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %c to <16 x i8> 1319// CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) #4 1320// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <4 x float> 1321// CHECK: ret <4 x float> [[TMP3]] 1322float32x4_t test_vbslq_f32(uint32x4_t a, float32x4_t b, float32x4_t c) { 1323 return vbslq_f32(a, b, c); 1324} 1325 1326// CHECK-LABEL: define <16 x i8> @test_vbslq_p8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 { 1327// CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #4 1328// CHECK: ret <16 x i8> [[VBSLQ_V_I]] 1329poly8x16_t test_vbslq_p8(uint8x16_t a, poly8x16_t b, poly8x16_t c) { 1330 return vbslq_p8(a, b, c); 1331} 1332 1333// CHECK-LABEL: define <8 x i16> @test_vbslq_p16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 { 1334// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 1335// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 1336// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %c to <16 x i8> 1337// CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) #4 1338// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <8 x i16> 1339// CHECK: ret <8 x i16> [[TMP3]] 1340poly16x8_t test_vbslq_p16(uint16x8_t a, poly16x8_t b, poly16x8_t c) { 1341 return vbslq_p16(a, b, c); 1342} 1343 1344 1345// CHECK-LABEL: define <2 x i32> @test_vcage_f32(<2 x float> %a, <2 x float> %b) #0 { 1346// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 1347// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 1348// CHECK: [[VCAGE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 1349// CHECK: [[VCAGE_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 1350// CHECK: [[VCAGE_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float> [[VCAGE_V_I]], <2 x float> [[VCAGE_V1_I]]) #4 1351// CHECK: ret <2 x i32> [[VCAGE_V2_I]] 1352uint32x2_t test_vcage_f32(float32x2_t a, float32x2_t b) { 1353 return vcage_f32(a, b); 1354} 1355 1356// CHECK-LABEL: define <4 x i32> @test_vcageq_f32(<4 x float> %a, <4 x float> %b) #0 { 1357// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 1358// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 1359// CHECK: [[VCAGEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 1360// CHECK: [[VCAGEQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 1361// CHECK: [[VCAGEQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float> [[VCAGEQ_V_I]], <4 x float> [[VCAGEQ_V1_I]]) #4 1362// CHECK: ret <4 x i32> [[VCAGEQ_V2_I]] 1363uint32x4_t test_vcageq_f32(float32x4_t a, float32x4_t b) { 1364 return vcageq_f32(a, b); 1365} 1366 1367 1368// CHECK-LABEL: define <2 x i32> @test_vcagt_f32(<2 x float> %a, <2 x float> %b) #0 { 1369// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 1370// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 1371// CHECK: [[VCAGT_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 1372// CHECK: [[VCAGT_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 1373// CHECK: [[VCAGT_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float> [[VCAGT_V_I]], <2 x float> [[VCAGT_V1_I]]) #4 1374// CHECK: ret <2 x i32> [[VCAGT_V2_I]] 1375uint32x2_t test_vcagt_f32(float32x2_t a, float32x2_t b) { 1376 return vcagt_f32(a, b); 1377} 1378 1379// CHECK-LABEL: define <4 x i32> @test_vcagtq_f32(<4 x float> %a, <4 x float> %b) #0 { 1380// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 1381// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 1382// CHECK: [[VCAGTQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 1383// CHECK: [[VCAGTQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 1384// CHECK: [[VCAGTQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float> [[VCAGTQ_V_I]], <4 x float> [[VCAGTQ_V1_I]]) #4 1385// CHECK: ret <4 x i32> [[VCAGTQ_V2_I]] 1386uint32x4_t test_vcagtq_f32(float32x4_t a, float32x4_t b) { 1387 return vcagtq_f32(a, b); 1388} 1389 1390 1391// CHECK-LABEL: define <2 x i32> @test_vcale_f32(<2 x float> %a, <2 x float> %b) #0 { 1392// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 1393// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 1394// CHECK: [[VCALE_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 1395// CHECK: [[VCALE_V1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 1396// CHECK: [[VCALE_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float> [[VCALE_V_I]], <2 x float> [[VCALE_V1_I]]) #4 1397// CHECK: ret <2 x i32> [[VCALE_V2_I]] 1398uint32x2_t test_vcale_f32(float32x2_t a, float32x2_t b) { 1399 return vcale_f32(a, b); 1400} 1401 1402// CHECK-LABEL: define <4 x i32> @test_vcaleq_f32(<4 x float> %a, <4 x float> %b) #0 { 1403// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 1404// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 1405// CHECK: [[VCALEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 1406// CHECK: [[VCALEQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 1407// CHECK: [[VCALEQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float> [[VCALEQ_V_I]], <4 x float> [[VCALEQ_V1_I]]) #4 1408// CHECK: ret <4 x i32> [[VCALEQ_V2_I]] 1409uint32x4_t test_vcaleq_f32(float32x4_t a, float32x4_t b) { 1410 return vcaleq_f32(a, b); 1411} 1412 1413 1414// CHECK-LABEL: define <2 x i32> @test_vcalt_f32(<2 x float> %a, <2 x float> %b) #0 { 1415// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 1416// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 1417// CHECK: [[VCALT_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 1418// CHECK: [[VCALT_V1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 1419// CHECK: [[VCALT_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float> [[VCALT_V_I]], <2 x float> [[VCALT_V1_I]]) #4 1420// CHECK: ret <2 x i32> [[VCALT_V2_I]] 1421uint32x2_t test_vcalt_f32(float32x2_t a, float32x2_t b) { 1422 return vcalt_f32(a, b); 1423} 1424 1425// CHECK-LABEL: define <4 x i32> @test_vcaltq_f32(<4 x float> %a, <4 x float> %b) #0 { 1426// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 1427// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 1428// CHECK: [[VCALTQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 1429// CHECK: [[VCALTQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 1430// CHECK: [[VCALTQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float> [[VCALTQ_V_I]], <4 x float> [[VCALTQ_V1_I]]) #4 1431// CHECK: ret <4 x i32> [[VCALTQ_V2_I]] 1432uint32x4_t test_vcaltq_f32(float32x4_t a, float32x4_t b) { 1433 return vcaltq_f32(a, b); 1434} 1435 1436 1437// CHECK-LABEL: define <8 x i8> @test_vceq_s8(<8 x i8> %a, <8 x i8> %b) #0 { 1438// CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %a, %b 1439// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 1440// CHECK: ret <8 x i8> [[SEXT_I]] 1441uint8x8_t test_vceq_s8(int8x8_t a, int8x8_t b) { 1442 return vceq_s8(a, b); 1443} 1444 1445// CHECK-LABEL: define <4 x i16> @test_vceq_s16(<4 x i16> %a, <4 x i16> %b) #0 { 1446// CHECK: [[CMP_I:%.*]] = icmp eq <4 x i16> %a, %b 1447// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> 1448// CHECK: ret <4 x i16> [[SEXT_I]] 1449uint16x4_t test_vceq_s16(int16x4_t a, int16x4_t b) { 1450 return vceq_s16(a, b); 1451} 1452 1453// CHECK-LABEL: define <2 x i32> @test_vceq_s32(<2 x i32> %a, <2 x i32> %b) #0 { 1454// CHECK: [[CMP_I:%.*]] = icmp eq <2 x i32> %a, %b 1455// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 1456// CHECK: ret <2 x i32> [[SEXT_I]] 1457uint32x2_t test_vceq_s32(int32x2_t a, int32x2_t b) { 1458 return vceq_s32(a, b); 1459} 1460 1461// CHECK-LABEL: define <2 x i32> @test_vceq_f32(<2 x float> %a, <2 x float> %b) #0 { 1462// CHECK: [[CMP_I:%.*]] = fcmp oeq <2 x float> %a, %b 1463// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 1464// CHECK: ret <2 x i32> [[SEXT_I]] 1465uint32x2_t test_vceq_f32(float32x2_t a, float32x2_t b) { 1466 return vceq_f32(a, b); 1467} 1468 1469// CHECK-LABEL: define <8 x i8> @test_vceq_u8(<8 x i8> %a, <8 x i8> %b) #0 { 1470// CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %a, %b 1471// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 1472// CHECK: ret <8 x i8> [[SEXT_I]] 1473uint8x8_t test_vceq_u8(uint8x8_t a, uint8x8_t b) { 1474 return vceq_u8(a, b); 1475} 1476 1477// CHECK-LABEL: define <4 x i16> @test_vceq_u16(<4 x i16> %a, <4 x i16> %b) #0 { 1478// CHECK: [[CMP_I:%.*]] = icmp eq <4 x i16> %a, %b 1479// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> 1480// CHECK: ret <4 x i16> [[SEXT_I]] 1481uint16x4_t test_vceq_u16(uint16x4_t a, uint16x4_t b) { 1482 return vceq_u16(a, b); 1483} 1484 1485// CHECK-LABEL: define <2 x i32> @test_vceq_u32(<2 x i32> %a, <2 x i32> %b) #0 { 1486// CHECK: [[CMP_I:%.*]] = icmp eq <2 x i32> %a, %b 1487// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 1488// CHECK: ret <2 x i32> [[SEXT_I]] 1489uint32x2_t test_vceq_u32(uint32x2_t a, uint32x2_t b) { 1490 return vceq_u32(a, b); 1491} 1492 1493// CHECK-LABEL: define <8 x i8> @test_vceq_p8(<8 x i8> %a, <8 x i8> %b) #0 { 1494// CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %a, %b 1495// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 1496// CHECK: ret <8 x i8> [[SEXT_I]] 1497uint8x8_t test_vceq_p8(poly8x8_t a, poly8x8_t b) { 1498 return vceq_p8(a, b); 1499} 1500 1501// CHECK-LABEL: define <16 x i8> @test_vceqq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 1502// CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %a, %b 1503// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 1504// CHECK: ret <16 x i8> [[SEXT_I]] 1505uint8x16_t test_vceqq_s8(int8x16_t a, int8x16_t b) { 1506 return vceqq_s8(a, b); 1507} 1508 1509// CHECK-LABEL: define <8 x i16> @test_vceqq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 1510// CHECK: [[CMP_I:%.*]] = icmp eq <8 x i16> %a, %b 1511// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> 1512// CHECK: ret <8 x i16> [[SEXT_I]] 1513uint16x8_t test_vceqq_s16(int16x8_t a, int16x8_t b) { 1514 return vceqq_s16(a, b); 1515} 1516 1517// CHECK-LABEL: define <4 x i32> @test_vceqq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 1518// CHECK: [[CMP_I:%.*]] = icmp eq <4 x i32> %a, %b 1519// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 1520// CHECK: ret <4 x i32> [[SEXT_I]] 1521uint32x4_t test_vceqq_s32(int32x4_t a, int32x4_t b) { 1522 return vceqq_s32(a, b); 1523} 1524 1525// CHECK-LABEL: define <4 x i32> @test_vceqq_f32(<4 x float> %a, <4 x float> %b) #0 { 1526// CHECK: [[CMP_I:%.*]] = fcmp oeq <4 x float> %a, %b 1527// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 1528// CHECK: ret <4 x i32> [[SEXT_I]] 1529uint32x4_t test_vceqq_f32(float32x4_t a, float32x4_t b) { 1530 return vceqq_f32(a, b); 1531} 1532 1533// CHECK-LABEL: define <16 x i8> @test_vceqq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 1534// CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %a, %b 1535// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 1536// CHECK: ret <16 x i8> [[SEXT_I]] 1537uint8x16_t test_vceqq_u8(uint8x16_t a, uint8x16_t b) { 1538 return vceqq_u8(a, b); 1539} 1540 1541// CHECK-LABEL: define <8 x i16> @test_vceqq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 1542// CHECK: [[CMP_I:%.*]] = icmp eq <8 x i16> %a, %b 1543// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> 1544// CHECK: ret <8 x i16> [[SEXT_I]] 1545uint16x8_t test_vceqq_u16(uint16x8_t a, uint16x8_t b) { 1546 return vceqq_u16(a, b); 1547} 1548 1549// CHECK-LABEL: define <4 x i32> @test_vceqq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 1550// CHECK: [[CMP_I:%.*]] = icmp eq <4 x i32> %a, %b 1551// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 1552// CHECK: ret <4 x i32> [[SEXT_I]] 1553uint32x4_t test_vceqq_u32(uint32x4_t a, uint32x4_t b) { 1554 return vceqq_u32(a, b); 1555} 1556 1557// CHECK-LABEL: define <16 x i8> @test_vceqq_p8(<16 x i8> %a, <16 x i8> %b) #0 { 1558// CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %a, %b 1559// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 1560// CHECK: ret <16 x i8> [[SEXT_I]] 1561uint8x16_t test_vceqq_p8(poly8x16_t a, poly8x16_t b) { 1562 return vceqq_p8(a, b); 1563} 1564 1565 1566// CHECK-LABEL: define <8 x i8> @test_vcge_s8(<8 x i8> %a, <8 x i8> %b) #0 { 1567// CHECK: [[CMP_I:%.*]] = icmp sge <8 x i8> %a, %b 1568// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 1569// CHECK: ret <8 x i8> [[SEXT_I]] 1570uint8x8_t test_vcge_s8(int8x8_t a, int8x8_t b) { 1571 return vcge_s8(a, b); 1572} 1573 1574// CHECK-LABEL: define <4 x i16> @test_vcge_s16(<4 x i16> %a, <4 x i16> %b) #0 { 1575// CHECK: [[CMP_I:%.*]] = icmp sge <4 x i16> %a, %b 1576// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> 1577// CHECK: ret <4 x i16> [[SEXT_I]] 1578uint16x4_t test_vcge_s16(int16x4_t a, int16x4_t b) { 1579 return vcge_s16(a, b); 1580} 1581 1582// CHECK-LABEL: define <2 x i32> @test_vcge_s32(<2 x i32> %a, <2 x i32> %b) #0 { 1583// CHECK: [[CMP_I:%.*]] = icmp sge <2 x i32> %a, %b 1584// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 1585// CHECK: ret <2 x i32> [[SEXT_I]] 1586uint32x2_t test_vcge_s32(int32x2_t a, int32x2_t b) { 1587 return vcge_s32(a, b); 1588} 1589 1590// CHECK-LABEL: define <2 x i32> @test_vcge_f32(<2 x float> %a, <2 x float> %b) #0 { 1591// CHECK: [[CMP_I:%.*]] = fcmp oge <2 x float> %a, %b 1592// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 1593// CHECK: ret <2 x i32> [[SEXT_I]] 1594uint32x2_t test_vcge_f32(float32x2_t a, float32x2_t b) { 1595 return vcge_f32(a, b); 1596} 1597 1598// CHECK-LABEL: define <8 x i8> @test_vcge_u8(<8 x i8> %a, <8 x i8> %b) #0 { 1599// CHECK: [[CMP_I:%.*]] = icmp uge <8 x i8> %a, %b 1600// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 1601// CHECK: ret <8 x i8> [[SEXT_I]] 1602uint8x8_t test_vcge_u8(uint8x8_t a, uint8x8_t b) { 1603 return vcge_u8(a, b); 1604} 1605 1606// CHECK-LABEL: define <4 x i16> @test_vcge_u16(<4 x i16> %a, <4 x i16> %b) #0 { 1607// CHECK: [[CMP_I:%.*]] = icmp uge <4 x i16> %a, %b 1608// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> 1609// CHECK: ret <4 x i16> [[SEXT_I]] 1610uint16x4_t test_vcge_u16(uint16x4_t a, uint16x4_t b) { 1611 return vcge_u16(a, b); 1612} 1613 1614// CHECK-LABEL: define <2 x i32> @test_vcge_u32(<2 x i32> %a, <2 x i32> %b) #0 { 1615// CHECK: [[CMP_I:%.*]] = icmp uge <2 x i32> %a, %b 1616// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 1617// CHECK: ret <2 x i32> [[SEXT_I]] 1618uint32x2_t test_vcge_u32(uint32x2_t a, uint32x2_t b) { 1619 return vcge_u32(a, b); 1620} 1621 1622// CHECK-LABEL: define <16 x i8> @test_vcgeq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 1623// CHECK: [[CMP_I:%.*]] = icmp sge <16 x i8> %a, %b 1624// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 1625// CHECK: ret <16 x i8> [[SEXT_I]] 1626uint8x16_t test_vcgeq_s8(int8x16_t a, int8x16_t b) { 1627 return vcgeq_s8(a, b); 1628} 1629 1630// CHECK-LABEL: define <8 x i16> @test_vcgeq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 1631// CHECK: [[CMP_I:%.*]] = icmp sge <8 x i16> %a, %b 1632// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> 1633// CHECK: ret <8 x i16> [[SEXT_I]] 1634uint16x8_t test_vcgeq_s16(int16x8_t a, int16x8_t b) { 1635 return vcgeq_s16(a, b); 1636} 1637 1638// CHECK-LABEL: define <4 x i32> @test_vcgeq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 1639// CHECK: [[CMP_I:%.*]] = icmp sge <4 x i32> %a, %b 1640// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 1641// CHECK: ret <4 x i32> [[SEXT_I]] 1642uint32x4_t test_vcgeq_s32(int32x4_t a, int32x4_t b) { 1643 return vcgeq_s32(a, b); 1644} 1645 1646// CHECK-LABEL: define <4 x i32> @test_vcgeq_f32(<4 x float> %a, <4 x float> %b) #0 { 1647// CHECK: [[CMP_I:%.*]] = fcmp oge <4 x float> %a, %b 1648// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 1649// CHECK: ret <4 x i32> [[SEXT_I]] 1650uint32x4_t test_vcgeq_f32(float32x4_t a, float32x4_t b) { 1651 return vcgeq_f32(a, b); 1652} 1653 1654// CHECK-LABEL: define <16 x i8> @test_vcgeq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 1655// CHECK: [[CMP_I:%.*]] = icmp uge <16 x i8> %a, %b 1656// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 1657// CHECK: ret <16 x i8> [[SEXT_I]] 1658uint8x16_t test_vcgeq_u8(uint8x16_t a, uint8x16_t b) { 1659 return vcgeq_u8(a, b); 1660} 1661 1662// CHECK-LABEL: define <8 x i16> @test_vcgeq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 1663// CHECK: [[CMP_I:%.*]] = icmp uge <8 x i16> %a, %b 1664// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> 1665// CHECK: ret <8 x i16> [[SEXT_I]] 1666uint16x8_t test_vcgeq_u16(uint16x8_t a, uint16x8_t b) { 1667 return vcgeq_u16(a, b); 1668} 1669 1670// CHECK-LABEL: define <4 x i32> @test_vcgeq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 1671// CHECK: [[CMP_I:%.*]] = icmp uge <4 x i32> %a, %b 1672// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 1673// CHECK: ret <4 x i32> [[SEXT_I]] 1674uint32x4_t test_vcgeq_u32(uint32x4_t a, uint32x4_t b) { 1675 return vcgeq_u32(a, b); 1676} 1677 1678 1679// CHECK-LABEL: define <8 x i8> @test_vcgt_s8(<8 x i8> %a, <8 x i8> %b) #0 { 1680// CHECK: [[CMP_I:%.*]] = icmp sgt <8 x i8> %a, %b 1681// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 1682// CHECK: ret <8 x i8> [[SEXT_I]] 1683uint8x8_t test_vcgt_s8(int8x8_t a, int8x8_t b) { 1684 return vcgt_s8(a, b); 1685} 1686 1687// CHECK-LABEL: define <4 x i16> @test_vcgt_s16(<4 x i16> %a, <4 x i16> %b) #0 { 1688// CHECK: [[CMP_I:%.*]] = icmp sgt <4 x i16> %a, %b 1689// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> 1690// CHECK: ret <4 x i16> [[SEXT_I]] 1691uint16x4_t test_vcgt_s16(int16x4_t a, int16x4_t b) { 1692 return vcgt_s16(a, b); 1693} 1694 1695// CHECK-LABEL: define <2 x i32> @test_vcgt_s32(<2 x i32> %a, <2 x i32> %b) #0 { 1696// CHECK: [[CMP_I:%.*]] = icmp sgt <2 x i32> %a, %b 1697// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 1698// CHECK: ret <2 x i32> [[SEXT_I]] 1699uint32x2_t test_vcgt_s32(int32x2_t a, int32x2_t b) { 1700 return vcgt_s32(a, b); 1701} 1702 1703// CHECK-LABEL: define <2 x i32> @test_vcgt_f32(<2 x float> %a, <2 x float> %b) #0 { 1704// CHECK: [[CMP_I:%.*]] = fcmp ogt <2 x float> %a, %b 1705// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 1706// CHECK: ret <2 x i32> [[SEXT_I]] 1707uint32x2_t test_vcgt_f32(float32x2_t a, float32x2_t b) { 1708 return vcgt_f32(a, b); 1709} 1710 1711// CHECK-LABEL: define <8 x i8> @test_vcgt_u8(<8 x i8> %a, <8 x i8> %b) #0 { 1712// CHECK: [[CMP_I:%.*]] = icmp ugt <8 x i8> %a, %b 1713// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 1714// CHECK: ret <8 x i8> [[SEXT_I]] 1715uint8x8_t test_vcgt_u8(uint8x8_t a, uint8x8_t b) { 1716 return vcgt_u8(a, b); 1717} 1718 1719// CHECK-LABEL: define <4 x i16> @test_vcgt_u16(<4 x i16> %a, <4 x i16> %b) #0 { 1720// CHECK: [[CMP_I:%.*]] = icmp ugt <4 x i16> %a, %b 1721// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> 1722// CHECK: ret <4 x i16> [[SEXT_I]] 1723uint16x4_t test_vcgt_u16(uint16x4_t a, uint16x4_t b) { 1724 return vcgt_u16(a, b); 1725} 1726 1727// CHECK-LABEL: define <2 x i32> @test_vcgt_u32(<2 x i32> %a, <2 x i32> %b) #0 { 1728// CHECK: [[CMP_I:%.*]] = icmp ugt <2 x i32> %a, %b 1729// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 1730// CHECK: ret <2 x i32> [[SEXT_I]] 1731uint32x2_t test_vcgt_u32(uint32x2_t a, uint32x2_t b) { 1732 return vcgt_u32(a, b); 1733} 1734 1735// CHECK-LABEL: define <16 x i8> @test_vcgtq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 1736// CHECK: [[CMP_I:%.*]] = icmp sgt <16 x i8> %a, %b 1737// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 1738// CHECK: ret <16 x i8> [[SEXT_I]] 1739uint8x16_t test_vcgtq_s8(int8x16_t a, int8x16_t b) { 1740 return vcgtq_s8(a, b); 1741} 1742 1743// CHECK-LABEL: define <8 x i16> @test_vcgtq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 1744// CHECK: [[CMP_I:%.*]] = icmp sgt <8 x i16> %a, %b 1745// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> 1746// CHECK: ret <8 x i16> [[SEXT_I]] 1747uint16x8_t test_vcgtq_s16(int16x8_t a, int16x8_t b) { 1748 return vcgtq_s16(a, b); 1749} 1750 1751// CHECK-LABEL: define <4 x i32> @test_vcgtq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 1752// CHECK: [[CMP_I:%.*]] = icmp sgt <4 x i32> %a, %b 1753// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 1754// CHECK: ret <4 x i32> [[SEXT_I]] 1755uint32x4_t test_vcgtq_s32(int32x4_t a, int32x4_t b) { 1756 return vcgtq_s32(a, b); 1757} 1758 1759// CHECK-LABEL: define <4 x i32> @test_vcgtq_f32(<4 x float> %a, <4 x float> %b) #0 { 1760// CHECK: [[CMP_I:%.*]] = fcmp ogt <4 x float> %a, %b 1761// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 1762// CHECK: ret <4 x i32> [[SEXT_I]] 1763uint32x4_t test_vcgtq_f32(float32x4_t a, float32x4_t b) { 1764 return vcgtq_f32(a, b); 1765} 1766 1767// CHECK-LABEL: define <16 x i8> @test_vcgtq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 1768// CHECK: [[CMP_I:%.*]] = icmp ugt <16 x i8> %a, %b 1769// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 1770// CHECK: ret <16 x i8> [[SEXT_I]] 1771uint8x16_t test_vcgtq_u8(uint8x16_t a, uint8x16_t b) { 1772 return vcgtq_u8(a, b); 1773} 1774 1775// CHECK-LABEL: define <8 x i16> @test_vcgtq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 1776// CHECK: [[CMP_I:%.*]] = icmp ugt <8 x i16> %a, %b 1777// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> 1778// CHECK: ret <8 x i16> [[SEXT_I]] 1779uint16x8_t test_vcgtq_u16(uint16x8_t a, uint16x8_t b) { 1780 return vcgtq_u16(a, b); 1781} 1782 1783// CHECK-LABEL: define <4 x i32> @test_vcgtq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 1784// CHECK: [[CMP_I:%.*]] = icmp ugt <4 x i32> %a, %b 1785// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 1786// CHECK: ret <4 x i32> [[SEXT_I]] 1787uint32x4_t test_vcgtq_u32(uint32x4_t a, uint32x4_t b) { 1788 return vcgtq_u32(a, b); 1789} 1790 1791 1792// CHECK-LABEL: define <8 x i8> @test_vcle_s8(<8 x i8> %a, <8 x i8> %b) #0 { 1793// CHECK: [[CMP_I:%.*]] = icmp sle <8 x i8> %a, %b 1794// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 1795// CHECK: ret <8 x i8> [[SEXT_I]] 1796uint8x8_t test_vcle_s8(int8x8_t a, int8x8_t b) { 1797 return vcle_s8(a, b); 1798} 1799 1800// CHECK-LABEL: define <4 x i16> @test_vcle_s16(<4 x i16> %a, <4 x i16> %b) #0 { 1801// CHECK: [[CMP_I:%.*]] = icmp sle <4 x i16> %a, %b 1802// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> 1803// CHECK: ret <4 x i16> [[SEXT_I]] 1804uint16x4_t test_vcle_s16(int16x4_t a, int16x4_t b) { 1805 return vcle_s16(a, b); 1806} 1807 1808// CHECK-LABEL: define <2 x i32> @test_vcle_s32(<2 x i32> %a, <2 x i32> %b) #0 { 1809// CHECK: [[CMP_I:%.*]] = icmp sle <2 x i32> %a, %b 1810// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 1811// CHECK: ret <2 x i32> [[SEXT_I]] 1812uint32x2_t test_vcle_s32(int32x2_t a, int32x2_t b) { 1813 return vcle_s32(a, b); 1814} 1815 1816// CHECK-LABEL: define <2 x i32> @test_vcle_f32(<2 x float> %a, <2 x float> %b) #0 { 1817// CHECK: [[CMP_I:%.*]] = fcmp ole <2 x float> %a, %b 1818// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 1819// CHECK: ret <2 x i32> [[SEXT_I]] 1820uint32x2_t test_vcle_f32(float32x2_t a, float32x2_t b) { 1821 return vcle_f32(a, b); 1822} 1823 1824// CHECK-LABEL: define <8 x i8> @test_vcle_u8(<8 x i8> %a, <8 x i8> %b) #0 { 1825// CHECK: [[CMP_I:%.*]] = icmp ule <8 x i8> %a, %b 1826// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 1827// CHECK: ret <8 x i8> [[SEXT_I]] 1828uint8x8_t test_vcle_u8(uint8x8_t a, uint8x8_t b) { 1829 return vcle_u8(a, b); 1830} 1831 1832// CHECK-LABEL: define <4 x i16> @test_vcle_u16(<4 x i16> %a, <4 x i16> %b) #0 { 1833// CHECK: [[CMP_I:%.*]] = icmp ule <4 x i16> %a, %b 1834// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> 1835// CHECK: ret <4 x i16> [[SEXT_I]] 1836uint16x4_t test_vcle_u16(uint16x4_t a, uint16x4_t b) { 1837 return vcle_u16(a, b); 1838} 1839 1840// CHECK-LABEL: define <2 x i32> @test_vcle_u32(<2 x i32> %a, <2 x i32> %b) #0 { 1841// CHECK: [[CMP_I:%.*]] = icmp ule <2 x i32> %a, %b 1842// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 1843// CHECK: ret <2 x i32> [[SEXT_I]] 1844uint32x2_t test_vcle_u32(uint32x2_t a, uint32x2_t b) { 1845 return vcle_u32(a, b); 1846} 1847 1848// CHECK-LABEL: define <16 x i8> @test_vcleq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 1849// CHECK: [[CMP_I:%.*]] = icmp sle <16 x i8> %a, %b 1850// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 1851// CHECK: ret <16 x i8> [[SEXT_I]] 1852uint8x16_t test_vcleq_s8(int8x16_t a, int8x16_t b) { 1853 return vcleq_s8(a, b); 1854} 1855 1856// CHECK-LABEL: define <8 x i16> @test_vcleq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 1857// CHECK: [[CMP_I:%.*]] = icmp sle <8 x i16> %a, %b 1858// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> 1859// CHECK: ret <8 x i16> [[SEXT_I]] 1860uint16x8_t test_vcleq_s16(int16x8_t a, int16x8_t b) { 1861 return vcleq_s16(a, b); 1862} 1863 1864// CHECK-LABEL: define <4 x i32> @test_vcleq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 1865// CHECK: [[CMP_I:%.*]] = icmp sle <4 x i32> %a, %b 1866// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 1867// CHECK: ret <4 x i32> [[SEXT_I]] 1868uint32x4_t test_vcleq_s32(int32x4_t a, int32x4_t b) { 1869 return vcleq_s32(a, b); 1870} 1871 1872// CHECK-LABEL: define <4 x i32> @test_vcleq_f32(<4 x float> %a, <4 x float> %b) #0 { 1873// CHECK: [[CMP_I:%.*]] = fcmp ole <4 x float> %a, %b 1874// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 1875// CHECK: ret <4 x i32> [[SEXT_I]] 1876uint32x4_t test_vcleq_f32(float32x4_t a, float32x4_t b) { 1877 return vcleq_f32(a, b); 1878} 1879 1880// CHECK-LABEL: define <16 x i8> @test_vcleq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 1881// CHECK: [[CMP_I:%.*]] = icmp ule <16 x i8> %a, %b 1882// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 1883// CHECK: ret <16 x i8> [[SEXT_I]] 1884uint8x16_t test_vcleq_u8(uint8x16_t a, uint8x16_t b) { 1885 return vcleq_u8(a, b); 1886} 1887 1888// CHECK-LABEL: define <8 x i16> @test_vcleq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 1889// CHECK: [[CMP_I:%.*]] = icmp ule <8 x i16> %a, %b 1890// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> 1891// CHECK: ret <8 x i16> [[SEXT_I]] 1892uint16x8_t test_vcleq_u16(uint16x8_t a, uint16x8_t b) { 1893 return vcleq_u16(a, b); 1894} 1895 1896// CHECK-LABEL: define <4 x i32> @test_vcleq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 1897// CHECK: [[CMP_I:%.*]] = icmp ule <4 x i32> %a, %b 1898// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 1899// CHECK: ret <4 x i32> [[SEXT_I]] 1900uint32x4_t test_vcleq_u32(uint32x4_t a, uint32x4_t b) { 1901 return vcleq_u32(a, b); 1902} 1903 1904 1905// CHECK-LABEL: define <8 x i8> @test_vcls_s8(<8 x i8> %a) #0 { 1906// CHECK: [[VCLS_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %a) #4 1907// CHECK: ret <8 x i8> [[VCLS_V_I]] 1908int8x8_t test_vcls_s8(int8x8_t a) { 1909 return vcls_s8(a); 1910} 1911 1912// CHECK-LABEL: define <4 x i16> @test_vcls_s16(<4 x i16> %a) #0 { 1913// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 1914// CHECK: [[VCLS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 1915// CHECK: [[VCLS_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> [[VCLS_V_I]]) #4 1916// CHECK: [[VCLS_V2_I:%.*]] = bitcast <4 x i16> [[VCLS_V1_I]] to <8 x i8> 1917// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLS_V2_I]] to <4 x i16> 1918// CHECK: ret <4 x i16> [[TMP1]] 1919int16x4_t test_vcls_s16(int16x4_t a) { 1920 return vcls_s16(a); 1921} 1922 1923// CHECK-LABEL: define <2 x i32> @test_vcls_s32(<2 x i32> %a) #0 { 1924// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 1925// CHECK: [[VCLS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 1926// CHECK: [[VCLS_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> [[VCLS_V_I]]) #4 1927// CHECK: [[VCLS_V2_I:%.*]] = bitcast <2 x i32> [[VCLS_V1_I]] to <8 x i8> 1928// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLS_V2_I]] to <2 x i32> 1929// CHECK: ret <2 x i32> [[TMP1]] 1930int32x2_t test_vcls_s32(int32x2_t a) { 1931 return vcls_s32(a); 1932} 1933 1934// CHECK-LABEL: define <16 x i8> @test_vclsq_s8(<16 x i8> %a) #0 { 1935// CHECK: [[VCLSQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %a) #4 1936// CHECK: ret <16 x i8> [[VCLSQ_V_I]] 1937int8x16_t test_vclsq_s8(int8x16_t a) { 1938 return vclsq_s8(a); 1939} 1940 1941// CHECK-LABEL: define <8 x i16> @test_vclsq_s16(<8 x i16> %a) #0 { 1942// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 1943// CHECK: [[VCLSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 1944// CHECK: [[VCLSQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> [[VCLSQ_V_I]]) #4 1945// CHECK: [[VCLSQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLSQ_V1_I]] to <16 x i8> 1946// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLSQ_V2_I]] to <8 x i16> 1947// CHECK: ret <8 x i16> [[TMP1]] 1948int16x8_t test_vclsq_s16(int16x8_t a) { 1949 return vclsq_s16(a); 1950} 1951 1952// CHECK-LABEL: define <4 x i32> @test_vclsq_s32(<4 x i32> %a) #0 { 1953// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 1954// CHECK: [[VCLSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 1955// CHECK: [[VCLSQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> [[VCLSQ_V_I]]) #4 1956// CHECK: [[VCLSQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLSQ_V1_I]] to <16 x i8> 1957// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLSQ_V2_I]] to <4 x i32> 1958// CHECK: ret <4 x i32> [[TMP1]] 1959int32x4_t test_vclsq_s32(int32x4_t a) { 1960 return vclsq_s32(a); 1961} 1962 1963 1964// CHECK-LABEL: define <8 x i8> @test_vclt_s8(<8 x i8> %a, <8 x i8> %b) #0 { 1965// CHECK: [[CMP_I:%.*]] = icmp slt <8 x i8> %a, %b 1966// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 1967// CHECK: ret <8 x i8> [[SEXT_I]] 1968uint8x8_t test_vclt_s8(int8x8_t a, int8x8_t b) { 1969 return vclt_s8(a, b); 1970} 1971 1972// CHECK-LABEL: define <4 x i16> @test_vclt_s16(<4 x i16> %a, <4 x i16> %b) #0 { 1973// CHECK: [[CMP_I:%.*]] = icmp slt <4 x i16> %a, %b 1974// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> 1975// CHECK: ret <4 x i16> [[SEXT_I]] 1976uint16x4_t test_vclt_s16(int16x4_t a, int16x4_t b) { 1977 return vclt_s16(a, b); 1978} 1979 1980// CHECK-LABEL: define <2 x i32> @test_vclt_s32(<2 x i32> %a, <2 x i32> %b) #0 { 1981// CHECK: [[CMP_I:%.*]] = icmp slt <2 x i32> %a, %b 1982// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 1983// CHECK: ret <2 x i32> [[SEXT_I]] 1984uint32x2_t test_vclt_s32(int32x2_t a, int32x2_t b) { 1985 return vclt_s32(a, b); 1986} 1987 1988// CHECK-LABEL: define <2 x i32> @test_vclt_f32(<2 x float> %a, <2 x float> %b) #0 { 1989// CHECK: [[CMP_I:%.*]] = fcmp olt <2 x float> %a, %b 1990// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 1991// CHECK: ret <2 x i32> [[SEXT_I]] 1992uint32x2_t test_vclt_f32(float32x2_t a, float32x2_t b) { 1993 return vclt_f32(a, b); 1994} 1995 1996// CHECK-LABEL: define <8 x i8> @test_vclt_u8(<8 x i8> %a, <8 x i8> %b) #0 { 1997// CHECK: [[CMP_I:%.*]] = icmp ult <8 x i8> %a, %b 1998// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 1999// CHECK: ret <8 x i8> [[SEXT_I]] 2000uint8x8_t test_vclt_u8(uint8x8_t a, uint8x8_t b) { 2001 return vclt_u8(a, b); 2002} 2003 2004// CHECK-LABEL: define <4 x i16> @test_vclt_u16(<4 x i16> %a, <4 x i16> %b) #0 { 2005// CHECK: [[CMP_I:%.*]] = icmp ult <4 x i16> %a, %b 2006// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> 2007// CHECK: ret <4 x i16> [[SEXT_I]] 2008uint16x4_t test_vclt_u16(uint16x4_t a, uint16x4_t b) { 2009 return vclt_u16(a, b); 2010} 2011 2012// CHECK-LABEL: define <2 x i32> @test_vclt_u32(<2 x i32> %a, <2 x i32> %b) #0 { 2013// CHECK: [[CMP_I:%.*]] = icmp ult <2 x i32> %a, %b 2014// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 2015// CHECK: ret <2 x i32> [[SEXT_I]] 2016uint32x2_t test_vclt_u32(uint32x2_t a, uint32x2_t b) { 2017 return vclt_u32(a, b); 2018} 2019 2020// CHECK-LABEL: define <16 x i8> @test_vcltq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 2021// CHECK: [[CMP_I:%.*]] = icmp slt <16 x i8> %a, %b 2022// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 2023// CHECK: ret <16 x i8> [[SEXT_I]] 2024uint8x16_t test_vcltq_s8(int8x16_t a, int8x16_t b) { 2025 return vcltq_s8(a, b); 2026} 2027 2028// CHECK-LABEL: define <8 x i16> @test_vcltq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 2029// CHECK: [[CMP_I:%.*]] = icmp slt <8 x i16> %a, %b 2030// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> 2031// CHECK: ret <8 x i16> [[SEXT_I]] 2032uint16x8_t test_vcltq_s16(int16x8_t a, int16x8_t b) { 2033 return vcltq_s16(a, b); 2034} 2035 2036// CHECK-LABEL: define <4 x i32> @test_vcltq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 2037// CHECK: [[CMP_I:%.*]] = icmp slt <4 x i32> %a, %b 2038// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 2039// CHECK: ret <4 x i32> [[SEXT_I]] 2040uint32x4_t test_vcltq_s32(int32x4_t a, int32x4_t b) { 2041 return vcltq_s32(a, b); 2042} 2043 2044// CHECK-LABEL: define <4 x i32> @test_vcltq_f32(<4 x float> %a, <4 x float> %b) #0 { 2045// CHECK: [[CMP_I:%.*]] = fcmp olt <4 x float> %a, %b 2046// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 2047// CHECK: ret <4 x i32> [[SEXT_I]] 2048uint32x4_t test_vcltq_f32(float32x4_t a, float32x4_t b) { 2049 return vcltq_f32(a, b); 2050} 2051 2052// CHECK-LABEL: define <16 x i8> @test_vcltq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 2053// CHECK: [[CMP_I:%.*]] = icmp ult <16 x i8> %a, %b 2054// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 2055// CHECK: ret <16 x i8> [[SEXT_I]] 2056uint8x16_t test_vcltq_u8(uint8x16_t a, uint8x16_t b) { 2057 return vcltq_u8(a, b); 2058} 2059 2060// CHECK-LABEL: define <8 x i16> @test_vcltq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 2061// CHECK: [[CMP_I:%.*]] = icmp ult <8 x i16> %a, %b 2062// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> 2063// CHECK: ret <8 x i16> [[SEXT_I]] 2064uint16x8_t test_vcltq_u16(uint16x8_t a, uint16x8_t b) { 2065 return vcltq_u16(a, b); 2066} 2067 2068// CHECK-LABEL: define <4 x i32> @test_vcltq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 2069// CHECK: [[CMP_I:%.*]] = icmp ult <4 x i32> %a, %b 2070// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 2071// CHECK: ret <4 x i32> [[SEXT_I]] 2072uint32x4_t test_vcltq_u32(uint32x4_t a, uint32x4_t b) { 2073 return vcltq_u32(a, b); 2074} 2075 2076 2077// CHECK-LABEL: define <8 x i8> @test_vclz_s8(<8 x i8> %a) #0 { 2078// CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false) #4 2079// CHECK: ret <8 x i8> [[VCLZ_V_I]] 2080int8x8_t test_vclz_s8(int8x8_t a) { 2081 return vclz_s8(a); 2082} 2083 2084// CHECK-LABEL: define <4 x i16> @test_vclz_s16(<4 x i16> %a) #0 { 2085// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 2086// CHECK: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2087// CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[VCLZ_V_I]], i1 false) #4 2088// CHECK: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8> 2089// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <4 x i16> 2090// CHECK: ret <4 x i16> [[TMP1]] 2091int16x4_t test_vclz_s16(int16x4_t a) { 2092 return vclz_s16(a); 2093} 2094 2095// CHECK-LABEL: define <2 x i32> @test_vclz_s32(<2 x i32> %a) #0 { 2096// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 2097// CHECK: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2098// CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[VCLZ_V_I]], i1 false) #4 2099// CHECK: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8> 2100// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <2 x i32> 2101// CHECK: ret <2 x i32> [[TMP1]] 2102int32x2_t test_vclz_s32(int32x2_t a) { 2103 return vclz_s32(a); 2104} 2105 2106// CHECK-LABEL: define <8 x i8> @test_vclz_u8(<8 x i8> %a) #0 { 2107// CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false) #4 2108// CHECK: ret <8 x i8> [[VCLZ_V_I]] 2109uint8x8_t test_vclz_u8(uint8x8_t a) { 2110 return vclz_u8(a); 2111} 2112 2113// CHECK-LABEL: define <4 x i16> @test_vclz_u16(<4 x i16> %a) #0 { 2114// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 2115// CHECK: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2116// CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[VCLZ_V_I]], i1 false) #4 2117// CHECK: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8> 2118// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <4 x i16> 2119// CHECK: ret <4 x i16> [[TMP1]] 2120uint16x4_t test_vclz_u16(uint16x4_t a) { 2121 return vclz_u16(a); 2122} 2123 2124// CHECK-LABEL: define <2 x i32> @test_vclz_u32(<2 x i32> %a) #0 { 2125// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 2126// CHECK: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2127// CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[VCLZ_V_I]], i1 false) #4 2128// CHECK: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8> 2129// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <2 x i32> 2130// CHECK: ret <2 x i32> [[TMP1]] 2131uint32x2_t test_vclz_u32(uint32x2_t a) { 2132 return vclz_u32(a); 2133} 2134 2135// CHECK-LABEL: define <16 x i8> @test_vclzq_s8(<16 x i8> %a) #0 { 2136// CHECK: [[VCLZQ_V_I:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) #4 2137// CHECK: ret <16 x i8> [[VCLZQ_V_I]] 2138int8x16_t test_vclzq_s8(int8x16_t a) { 2139 return vclzq_s8(a); 2140} 2141 2142// CHECK-LABEL: define <8 x i16> @test_vclzq_s16(<8 x i16> %a) #0 { 2143// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 2144// CHECK: [[VCLZQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 2145// CHECK: [[VCLZQ_V1_I:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[VCLZQ_V_I]], i1 false) #4 2146// CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLZQ_V1_I]] to <16 x i8> 2147// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLZQ_V2_I]] to <8 x i16> 2148// CHECK: ret <8 x i16> [[TMP1]] 2149int16x8_t test_vclzq_s16(int16x8_t a) { 2150 return vclzq_s16(a); 2151} 2152 2153// CHECK-LABEL: define <4 x i32> @test_vclzq_s32(<4 x i32> %a) #0 { 2154// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 2155// CHECK: [[VCLZQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 2156// CHECK: [[VCLZQ_V1_I:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[VCLZQ_V_I]], i1 false) #4 2157// CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLZQ_V1_I]] to <16 x i8> 2158// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLZQ_V2_I]] to <4 x i32> 2159// CHECK: ret <4 x i32> [[TMP1]] 2160int32x4_t test_vclzq_s32(int32x4_t a) { 2161 return vclzq_s32(a); 2162} 2163 2164// CHECK-LABEL: define <16 x i8> @test_vclzq_u8(<16 x i8> %a) #0 { 2165// CHECK: [[VCLZQ_V_I:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) #4 2166// CHECK: ret <16 x i8> [[VCLZQ_V_I]] 2167uint8x16_t test_vclzq_u8(uint8x16_t a) { 2168 return vclzq_u8(a); 2169} 2170 2171// CHECK-LABEL: define <8 x i16> @test_vclzq_u16(<8 x i16> %a) #0 { 2172// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 2173// CHECK: [[VCLZQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 2174// CHECK: [[VCLZQ_V1_I:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[VCLZQ_V_I]], i1 false) #4 2175// CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLZQ_V1_I]] to <16 x i8> 2176// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLZQ_V2_I]] to <8 x i16> 2177// CHECK: ret <8 x i16> [[TMP1]] 2178uint16x8_t test_vclzq_u16(uint16x8_t a) { 2179 return vclzq_u16(a); 2180} 2181 2182// CHECK-LABEL: define <4 x i32> @test_vclzq_u32(<4 x i32> %a) #0 { 2183// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 2184// CHECK: [[VCLZQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 2185// CHECK: [[VCLZQ_V1_I:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[VCLZQ_V_I]], i1 false) #4 2186// CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLZQ_V1_I]] to <16 x i8> 2187// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLZQ_V2_I]] to <4 x i32> 2188// CHECK: ret <4 x i32> [[TMP1]] 2189uint32x4_t test_vclzq_u32(uint32x4_t a) { 2190 return vclzq_u32(a); 2191} 2192 2193 2194// CHECK-LABEL: define <8 x i8> @test_vcnt_u8(<8 x i8> %a) #0 { 2195// CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a) #4 2196// CHECK: ret <8 x i8> [[VCNT_V_I]] 2197uint8x8_t test_vcnt_u8(uint8x8_t a) { 2198 return vcnt_u8(a); 2199} 2200 2201// CHECK-LABEL: define <8 x i8> @test_vcnt_s8(<8 x i8> %a) #0 { 2202// CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a) #4 2203// CHECK: ret <8 x i8> [[VCNT_V_I]] 2204int8x8_t test_vcnt_s8(int8x8_t a) { 2205 return vcnt_s8(a); 2206} 2207 2208// CHECK-LABEL: define <8 x i8> @test_vcnt_p8(<8 x i8> %a) #0 { 2209// CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a) #4 2210// CHECK: ret <8 x i8> [[VCNT_V_I]] 2211poly8x8_t test_vcnt_p8(poly8x8_t a) { 2212 return vcnt_p8(a); 2213} 2214 2215// CHECK-LABEL: define <16 x i8> @test_vcntq_u8(<16 x i8> %a) #0 { 2216// CHECK: [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) #4 2217// CHECK: ret <16 x i8> [[VCNTQ_V_I]] 2218uint8x16_t test_vcntq_u8(uint8x16_t a) { 2219 return vcntq_u8(a); 2220} 2221 2222// CHECK-LABEL: define <16 x i8> @test_vcntq_s8(<16 x i8> %a) #0 { 2223// CHECK: [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) #4 2224// CHECK: ret <16 x i8> [[VCNTQ_V_I]] 2225int8x16_t test_vcntq_s8(int8x16_t a) { 2226 return vcntq_s8(a); 2227} 2228 2229// CHECK-LABEL: define <16 x i8> @test_vcntq_p8(<16 x i8> %a) #0 { 2230// CHECK: [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) #4 2231// CHECK: ret <16 x i8> [[VCNTQ_V_I]] 2232poly8x16_t test_vcntq_p8(poly8x16_t a) { 2233 return vcntq_p8(a); 2234} 2235 2236 2237// CHECK-LABEL: define <16 x i8> @test_vcombine_s8(<8 x i8> %a, <8 x i8> %b) #0 { 2238// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2239// CHECK: ret <16 x i8> [[SHUFFLE_I]] 2240int8x16_t test_vcombine_s8(int8x8_t a, int8x8_t b) { 2241 return vcombine_s8(a, b); 2242} 2243 2244// CHECK-LABEL: define <8 x i16> @test_vcombine_s16(<4 x i16> %a, <4 x i16> %b) #0 { 2245// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 2246// CHECK: ret <8 x i16> [[SHUFFLE_I]] 2247int16x8_t test_vcombine_s16(int16x4_t a, int16x4_t b) { 2248 return vcombine_s16(a, b); 2249} 2250 2251// CHECK-LABEL: define <4 x i32> @test_vcombine_s32(<2 x i32> %a, <2 x i32> %b) #0 { 2252// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2253// CHECK: ret <4 x i32> [[SHUFFLE_I]] 2254int32x4_t test_vcombine_s32(int32x2_t a, int32x2_t b) { 2255 return vcombine_s32(a, b); 2256} 2257 2258// CHECK-LABEL: define <2 x i64> @test_vcombine_s64(<1 x i64> %a, <1 x i64> %b) #0 { 2259// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <1 x i64> %a, <1 x i64> %b, <2 x i32> <i32 0, i32 1> 2260// CHECK: ret <2 x i64> [[SHUFFLE_I]] 2261int64x2_t test_vcombine_s64(int64x1_t a, int64x1_t b) { 2262 return vcombine_s64(a, b); 2263} 2264 2265// CHECK-LABEL: define <8 x half> @test_vcombine_f16(<4 x half> %a, <4 x half> %b) #0 { 2266// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 2267// CHECK: ret <8 x half> [[SHUFFLE_I]] 2268float16x8_t test_vcombine_f16(float16x4_t a, float16x4_t b) { 2269 return vcombine_f16(a, b); 2270} 2271 2272// CHECK-LABEL: define <4 x float> @test_vcombine_f32(<2 x float> %a, <2 x float> %b) #0 { 2273// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2274// CHECK: ret <4 x float> [[SHUFFLE_I]] 2275float32x4_t test_vcombine_f32(float32x2_t a, float32x2_t b) { 2276 return vcombine_f32(a, b); 2277} 2278 2279// CHECK-LABEL: define <16 x i8> @test_vcombine_u8(<8 x i8> %a, <8 x i8> %b) #0 { 2280// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2281// CHECK: ret <16 x i8> [[SHUFFLE_I]] 2282uint8x16_t test_vcombine_u8(uint8x8_t a, uint8x8_t b) { 2283 return vcombine_u8(a, b); 2284} 2285 2286// CHECK-LABEL: define <8 x i16> @test_vcombine_u16(<4 x i16> %a, <4 x i16> %b) #0 { 2287// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 2288// CHECK: ret <8 x i16> [[SHUFFLE_I]] 2289uint16x8_t test_vcombine_u16(uint16x4_t a, uint16x4_t b) { 2290 return vcombine_u16(a, b); 2291} 2292 2293// CHECK-LABEL: define <4 x i32> @test_vcombine_u32(<2 x i32> %a, <2 x i32> %b) #0 { 2294// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2295// CHECK: ret <4 x i32> [[SHUFFLE_I]] 2296uint32x4_t test_vcombine_u32(uint32x2_t a, uint32x2_t b) { 2297 return vcombine_u32(a, b); 2298} 2299 2300// CHECK-LABEL: define <2 x i64> @test_vcombine_u64(<1 x i64> %a, <1 x i64> %b) #0 { 2301// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <1 x i64> %a, <1 x i64> %b, <2 x i32> <i32 0, i32 1> 2302// CHECK: ret <2 x i64> [[SHUFFLE_I]] 2303uint64x2_t test_vcombine_u64(uint64x1_t a, uint64x1_t b) { 2304 return vcombine_u64(a, b); 2305} 2306 2307// CHECK-LABEL: define <16 x i8> @test_vcombine_p8(<8 x i8> %a, <8 x i8> %b) #0 { 2308// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2309// CHECK: ret <16 x i8> [[SHUFFLE_I]] 2310poly8x16_t test_vcombine_p8(poly8x8_t a, poly8x8_t b) { 2311 return vcombine_p8(a, b); 2312} 2313 2314// CHECK-LABEL: define <8 x i16> @test_vcombine_p16(<4 x i16> %a, <4 x i16> %b) #0 { 2315// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 2316// CHECK: ret <8 x i16> [[SHUFFLE_I]] 2317poly16x8_t test_vcombine_p16(poly16x4_t a, poly16x4_t b) { 2318 return vcombine_p16(a, b); 2319} 2320 2321 2322// CHECK-LABEL: define <8 x i8> @test_vcreate_s8(i64 %a) #0 { 2323// CHECK: [[TMP0:%.*]] = bitcast i64 %a to <8 x i8> 2324// CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> [[TMP0]], i1 false) #4 2325// CHECK: ret <8 x i8> [[VCLZ_V_I]] 2326int8x8_t test_vcreate_s8(uint64_t a) { 2327 return vclz_s8(vcreate_s8(a)); 2328} 2329 2330// CHECK-LABEL: define <4 x i16> @test_vcreate_s16(i64 %a) #0 { 2331// CHECK: [[TMP0:%.*]] = bitcast i64 %a to <4 x i16> 2332// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> 2333// CHECK: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 2334// CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[VCLZ_V_I]], i1 false) #4 2335// CHECK: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8> 2336// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <4 x i16> 2337// CHECK: ret <4 x i16> [[TMP2]] 2338int16x4_t test_vcreate_s16(uint64_t a) { 2339 return vclz_s16(vcreate_s16(a)); 2340} 2341 2342// CHECK-LABEL: define <2 x i32> @test_vcreate_s32(i64 %a) #0 { 2343// CHECK: [[TMP0:%.*]] = bitcast i64 %a to <2 x i32> 2344// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8> 2345// CHECK: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 2346// CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[VCLZ_V_I]], i1 false) #4 2347// CHECK: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8> 2348// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <2 x i32> 2349// CHECK: ret <2 x i32> [[TMP2]] 2350int32x2_t test_vcreate_s32(uint64_t a) { 2351 return vclz_s32(vcreate_s32(a)); 2352} 2353 2354// CHECK-LABEL: define <4 x half> @test_vcreate_f16(i64 %a) #0 { 2355// CHECK: [[TMP0:%.*]] = bitcast i64 %a to <4 x half> 2356// CHECK: ret <4 x half> [[TMP0]] 2357float16x4_t test_vcreate_f16(uint64_t a) { 2358 return vcreate_f16(a); 2359} 2360 2361// CHECK-LABEL: define <2 x float> @test_vcreate_f32(i64 %a) #0 { 2362// CHECK: [[TMP0:%.*]] = bitcast i64 %a to <2 x float> 2363// CHECK: ret <2 x float> [[TMP0]] 2364float32x2_t test_vcreate_f32(uint64_t a) { 2365 return vcreate_f32(a); 2366} 2367 2368// CHECK-LABEL: define <8 x i8> @test_vcreate_u8(i64 %a) #0 { 2369// CHECK: [[TMP0:%.*]] = bitcast i64 %a to <8 x i8> 2370// CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> [[TMP0]], i1 false) #4 2371// CHECK: ret <8 x i8> [[VCLZ_V_I]] 2372uint8x8_t test_vcreate_u8(uint64_t a) { 2373 return vclz_s8(vcreate_u8(a)); 2374} 2375 2376// CHECK-LABEL: define <4 x i16> @test_vcreate_u16(i64 %a) #0 { 2377// CHECK: [[TMP0:%.*]] = bitcast i64 %a to <4 x i16> 2378// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> 2379// CHECK: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 2380// CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[VCLZ_V_I]], i1 false) #4 2381// CHECK: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8> 2382// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <4 x i16> 2383// CHECK: ret <4 x i16> [[TMP2]] 2384uint16x4_t test_vcreate_u16(uint64_t a) { 2385 return vclz_s16(vcreate_u16(a)); 2386} 2387 2388// CHECK-LABEL: define <2 x i32> @test_vcreate_u32(i64 %a) #0 { 2389// CHECK: [[TMP0:%.*]] = bitcast i64 %a to <2 x i32> 2390// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8> 2391// CHECK: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 2392// CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[VCLZ_V_I]], i1 false) #4 2393// CHECK: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8> 2394// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <2 x i32> 2395// CHECK: ret <2 x i32> [[TMP2]] 2396uint32x2_t test_vcreate_u32(uint64_t a) { 2397 return vclz_s32(vcreate_u32(a)); 2398} 2399 2400 2401// We have two ways of lowering that. Either with one 'vmov d, r, r' or 2402// with two 'vmov d[],r'. LLVM does the latter. We may want to be less 2403// strict about the matching pattern if it starts causing problem. 2404// CHECK-LABEL: define <1 x i64> @test_vcreate_u64(i64 %a) #0 { 2405// CHECK: [[TMP0:%.*]] = bitcast i64 %a to <1 x i64> 2406// CHECK: [[ADD_I:%.*]] = add <1 x i64> [[TMP0]], [[TMP0]] 2407// CHECK: ret <1 x i64> [[ADD_I]] 2408uint64x1_t test_vcreate_u64(uint64_t a) { 2409 uint64x1_t tmp = vcreate_u64(a); 2410 return vadd_u64(tmp, tmp); 2411 2412} 2413 2414// CHECK-LABEL: define <8 x i8> @test_vcreate_p8(i64 %a) #0 { 2415// CHECK: [[TMP0:%.*]] = bitcast i64 %a to <8 x i8> 2416// CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> [[TMP0]]) #4 2417// CHECK: ret <8 x i8> [[VCNT_V_I]] 2418poly8x8_t test_vcreate_p8(uint64_t a) { 2419 return vcnt_p8(vcreate_p8(a)); 2420} 2421 2422// CHECK-LABEL: define <4 x i16> @test_vcreate_p16(i64 %a) #0 { 2423// CHECK: [[TMP0:%.*]] = bitcast i64 %a to <4 x i16> 2424// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> 2425// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> 2426// CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> 2427// CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <8 x i8> [[TMP3]]) #4 2428// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x i16> 2429// CHECK: ret <4 x i16> [[TMP4]] 2430poly16x4_t test_vcreate_p16(uint64_t a) { 2431 poly16x4_t tmp = vcreate_p16(a); 2432 return vbsl_p16(tmp, tmp, tmp); 2433} 2434 2435// CHECK-LABEL: define <1 x i64> @test_vcreate_s64(i64 %a) #0 { 2436// CHECK: [[TMP0:%.*]] = bitcast i64 %a to <1 x i64> 2437// CHECK: [[ADD_I:%.*]] = add <1 x i64> [[TMP0]], [[TMP0]] 2438// CHECK: ret <1 x i64> [[ADD_I]] 2439int64x1_t test_vcreate_s64(uint64_t a) { 2440 int64x1_t tmp = vcreate_s64(a); 2441 return vadd_s64(tmp, tmp); 2442} 2443 2444 2445// CHECK-LABEL: define <4 x half> @test_vcvt_f16_f32(<4 x float> %a) #0 { 2446// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 2447// CHECK: [[VCVT_F16_F32_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 2448// CHECK: [[VCVT_F16_F321_I:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float> [[VCVT_F16_F32_I]]) #4 2449// CHECK: [[VCVT_F16_F322_I:%.*]] = bitcast <4 x i16> [[VCVT_F16_F321_I]] to <8 x i8> 2450// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCVT_F16_F322_I]] to <4 x half> 2451// CHECK: ret <4 x half> [[TMP1]] 2452float16x4_t test_vcvt_f16_f32(float32x4_t a) { 2453 return vcvt_f16_f32(a); 2454} 2455 2456 2457// CHECK-LABEL: define <2 x float> @test_vcvt_f32_s32(<2 x i32> %a) #0 { 2458// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 2459// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2460// CHECK: [[VCVT_I:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x float> 2461// CHECK: ret <2 x float> [[VCVT_I]] 2462float32x2_t test_vcvt_f32_s32(int32x2_t a) { 2463 return vcvt_f32_s32(a); 2464} 2465 2466// CHECK-LABEL: define <2 x float> @test_vcvt_f32_u32(<2 x i32> %a) #0 { 2467// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 2468// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2469// CHECK: [[VCVT_I:%.*]] = uitofp <2 x i32> [[TMP1]] to <2 x float> 2470// CHECK: ret <2 x float> [[VCVT_I]] 2471float32x2_t test_vcvt_f32_u32(uint32x2_t a) { 2472 return vcvt_f32_u32(a); 2473} 2474 2475// CHECK-LABEL: define <4 x float> @test_vcvtq_f32_s32(<4 x i32> %a) #0 { 2476// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 2477// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 2478// CHECK: [[VCVT_I:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x float> 2479// CHECK: ret <4 x float> [[VCVT_I]] 2480float32x4_t test_vcvtq_f32_s32(int32x4_t a) { 2481 return vcvtq_f32_s32(a); 2482} 2483 2484// CHECK-LABEL: define <4 x float> @test_vcvtq_f32_u32(<4 x i32> %a) #0 { 2485// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 2486// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 2487// CHECK: [[VCVT_I:%.*]] = uitofp <4 x i32> [[TMP1]] to <4 x float> 2488// CHECK: ret <4 x float> [[VCVT_I]] 2489float32x4_t test_vcvtq_f32_u32(uint32x4_t a) { 2490 return vcvtq_f32_u32(a); 2491} 2492 2493 2494// CHECK-LABEL: define <4 x float> @test_vcvt_f32_f16(<4 x half> %a) #0 { 2495// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8> 2496// CHECK: [[VCVT_F32_F16_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2497// CHECK: [[VCVT_F32_F161_I:%.*]] = call <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16> [[VCVT_F32_F16_I]]) #4 2498// CHECK: [[VCVT_F32_F162_I:%.*]] = bitcast <4 x float> [[VCVT_F32_F161_I]] to <16 x i8> 2499// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VCVT_F32_F162_I]] to <4 x float> 2500// CHECK: ret <4 x float> [[TMP1]] 2501float32x4_t test_vcvt_f32_f16(float16x4_t a) { 2502 return vcvt_f32_f16(a); 2503} 2504 2505 2506// CHECK-LABEL: define <2 x float> @test_vcvt_n_f32_s32(<2 x i32> %a) #0 { 2507// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 2508// CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2509// CHECK: [[VCVT_N1:%.*]] = call <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 1) 2510// CHECK: ret <2 x float> [[VCVT_N1]] 2511float32x2_t test_vcvt_n_f32_s32(int32x2_t a) { 2512 return vcvt_n_f32_s32(a, 1); 2513} 2514 2515// CHECK-LABEL: define <2 x float> @test_vcvt_n_f32_u32(<2 x i32> %a) #0 { 2516// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 2517// CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2518// CHECK: [[VCVT_N1:%.*]] = call <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 1) 2519// CHECK: ret <2 x float> [[VCVT_N1]] 2520float32x2_t test_vcvt_n_f32_u32(uint32x2_t a) { 2521 return vcvt_n_f32_u32(a, 1); 2522} 2523 2524// CHECK-LABEL: define <4 x float> @test_vcvtq_n_f32_s32(<4 x i32> %a) #0 { 2525// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 2526// CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 2527// CHECK: [[VCVT_N1:%.*]] = call <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 3) 2528// CHECK: ret <4 x float> [[VCVT_N1]] 2529float32x4_t test_vcvtq_n_f32_s32(int32x4_t a) { 2530 return vcvtq_n_f32_s32(a, 3); 2531} 2532 2533// CHECK-LABEL: define <4 x float> @test_vcvtq_n_f32_u32(<4 x i32> %a) #0 { 2534// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 2535// CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 2536// CHECK: [[VCVT_N1:%.*]] = call <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 3) 2537// CHECK: ret <4 x float> [[VCVT_N1]] 2538float32x4_t test_vcvtq_n_f32_u32(uint32x4_t a) { 2539 return vcvtq_n_f32_u32(a, 3); 2540} 2541 2542 2543// CHECK-LABEL: define <2 x i32> @test_vcvt_n_s32_f32(<2 x float> %a) #0 { 2544// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 2545// CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 2546// CHECK: [[VCVT_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 1) 2547// CHECK: ret <2 x i32> [[VCVT_N1]] 2548int32x2_t test_vcvt_n_s32_f32(float32x2_t a) { 2549 return vcvt_n_s32_f32(a, 1); 2550} 2551 2552// CHECK-LABEL: define <4 x i32> @test_vcvtq_n_s32_f32(<4 x float> %a) #0 { 2553// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 2554// CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 2555// CHECK: [[VCVT_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 3) 2556// CHECK: ret <4 x i32> [[VCVT_N1]] 2557int32x4_t test_vcvtq_n_s32_f32(float32x4_t a) { 2558 return vcvtq_n_s32_f32(a, 3); 2559} 2560 2561 2562// CHECK-LABEL: define <2 x i32> @test_vcvt_n_u32_f32(<2 x float> %a) #0 { 2563// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 2564// CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 2565// CHECK: [[VCVT_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 1) 2566// CHECK: ret <2 x i32> [[VCVT_N1]] 2567uint32x2_t test_vcvt_n_u32_f32(float32x2_t a) { 2568 return vcvt_n_u32_f32(a, 1); 2569} 2570 2571// CHECK-LABEL: define <4 x i32> @test_vcvtq_n_u32_f32(<4 x float> %a) #0 { 2572// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 2573// CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 2574// CHECK: [[VCVT_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 3) 2575// CHECK: ret <4 x i32> [[VCVT_N1]] 2576uint32x4_t test_vcvtq_n_u32_f32(float32x4_t a) { 2577 return vcvtq_n_u32_f32(a, 3); 2578} 2579 2580 2581// CHECK-LABEL: define <2 x i32> @test_vcvt_s32_f32(<2 x float> %a) #0 { 2582// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 2583// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 2584// CHECK: [[VCVT_I:%.*]] = fptosi <2 x float> [[TMP1]] to <2 x i32> 2585// CHECK: ret <2 x i32> [[VCVT_I]] 2586int32x2_t test_vcvt_s32_f32(float32x2_t a) { 2587 return vcvt_s32_f32(a); 2588} 2589 2590// CHECK-LABEL: define <4 x i32> @test_vcvtq_s32_f32(<4 x float> %a) #0 { 2591// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 2592// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 2593// CHECK: [[VCVT_I:%.*]] = fptosi <4 x float> [[TMP1]] to <4 x i32> 2594// CHECK: ret <4 x i32> [[VCVT_I]] 2595int32x4_t test_vcvtq_s32_f32(float32x4_t a) { 2596 return vcvtq_s32_f32(a); 2597} 2598 2599 2600// CHECK-LABEL: define <2 x i32> @test_vcvt_u32_f32(<2 x float> %a) #0 { 2601// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 2602// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 2603// CHECK: [[VCVT_I:%.*]] = fptoui <2 x float> [[TMP1]] to <2 x i32> 2604// CHECK: ret <2 x i32> [[VCVT_I]] 2605uint32x2_t test_vcvt_u32_f32(float32x2_t a) { 2606 return vcvt_u32_f32(a); 2607} 2608 2609// CHECK-LABEL: define <4 x i32> @test_vcvtq_u32_f32(<4 x float> %a) #0 { 2610// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 2611// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 2612// CHECK: [[VCVT_I:%.*]] = fptoui <4 x float> [[TMP1]] to <4 x i32> 2613// CHECK: ret <4 x i32> [[VCVT_I]] 2614uint32x4_t test_vcvtq_u32_f32(float32x4_t a) { 2615 return vcvtq_u32_f32(a); 2616} 2617 2618 2619// CHECK-LABEL: define <8 x i8> @test_vdup_lane_u8(<8 x i8> %a) #0 { 2620// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 2621// CHECK: ret <8 x i8> [[SHUFFLE]] 2622uint8x8_t test_vdup_lane_u8(uint8x8_t a) { 2623 return vdup_lane_u8(a, 7); 2624} 2625 2626// CHECK-LABEL: define <4 x i16> @test_vdup_lane_u16(<4 x i16> %a) #0 { 2627// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 2628// CHECK: ret <4 x i16> [[SHUFFLE]] 2629uint16x4_t test_vdup_lane_u16(uint16x4_t a) { 2630 return vdup_lane_u16(a, 3); 2631} 2632 2633// CHECK-LABEL: define <2 x i32> @test_vdup_lane_u32(<2 x i32> %a) #0 { 2634// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %a, <2 x i32> <i32 1, i32 1> 2635// CHECK: ret <2 x i32> [[SHUFFLE]] 2636uint32x2_t test_vdup_lane_u32(uint32x2_t a) { 2637 return vdup_lane_u32(a, 1); 2638} 2639 2640// CHECK-LABEL: define <8 x i8> @test_vdup_lane_s8(<8 x i8> %a) #0 { 2641// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 2642// CHECK: ret <8 x i8> [[SHUFFLE]] 2643int8x8_t test_vdup_lane_s8(int8x8_t a) { 2644 return vdup_lane_s8(a, 7); 2645} 2646 2647// CHECK-LABEL: define <4 x i16> @test_vdup_lane_s16(<4 x i16> %a) #0 { 2648// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 2649// CHECK: ret <4 x i16> [[SHUFFLE]] 2650int16x4_t test_vdup_lane_s16(int16x4_t a) { 2651 return vdup_lane_s16(a, 3); 2652} 2653 2654// CHECK-LABEL: define <2 x i32> @test_vdup_lane_s32(<2 x i32> %a) #0 { 2655// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %a, <2 x i32> <i32 1, i32 1> 2656// CHECK: ret <2 x i32> [[SHUFFLE]] 2657int32x2_t test_vdup_lane_s32(int32x2_t a) { 2658 return vdup_lane_s32(a, 1); 2659} 2660 2661// CHECK-LABEL: define <8 x i8> @test_vdup_lane_p8(<8 x i8> %a) #0 { 2662// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 2663// CHECK: ret <8 x i8> [[SHUFFLE]] 2664poly8x8_t test_vdup_lane_p8(poly8x8_t a) { 2665 return vdup_lane_p8(a, 7); 2666} 2667 2668// CHECK-LABEL: define <4 x i16> @test_vdup_lane_p16(<4 x i16> %a) #0 { 2669// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 2670// CHECK: ret <4 x i16> [[SHUFFLE]] 2671poly16x4_t test_vdup_lane_p16(poly16x4_t a) { 2672 return vdup_lane_p16(a, 3); 2673} 2674 2675// CHECK-LABEL: define <2 x float> @test_vdup_lane_f32(<2 x float> %a) #0 { 2676// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %a, <2 x float> %a, <2 x i32> <i32 1, i32 1> 2677// CHECK: ret <2 x float> [[SHUFFLE]] 2678float32x2_t test_vdup_lane_f32(float32x2_t a) { 2679 return vdup_lane_f32(a, 1); 2680} 2681 2682// CHECK-LABEL: define <16 x i8> @test_vdupq_lane_u8(<8 x i8> %a) #0 { 2683// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 2684// CHECK: ret <16 x i8> [[SHUFFLE]] 2685uint8x16_t test_vdupq_lane_u8(uint8x8_t a) { 2686 return vdupq_lane_u8(a, 7); 2687} 2688 2689// CHECK-LABEL: define <8 x i16> @test_vdupq_lane_u16(<4 x i16> %a) #0 { 2690// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 2691// CHECK: ret <8 x i16> [[SHUFFLE]] 2692uint16x8_t test_vdupq_lane_u16(uint16x4_t a) { 2693 return vdupq_lane_u16(a, 3); 2694} 2695 2696// CHECK-LABEL: define <4 x i32> @test_vdupq_lane_u32(<2 x i32> %a) #0 { 2697// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %a, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 2698// CHECK: ret <4 x i32> [[SHUFFLE]] 2699uint32x4_t test_vdupq_lane_u32(uint32x2_t a) { 2700 return vdupq_lane_u32(a, 1); 2701} 2702 2703// CHECK-LABEL: define <16 x i8> @test_vdupq_lane_s8(<8 x i8> %a) #0 { 2704// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 2705// CHECK: ret <16 x i8> [[SHUFFLE]] 2706int8x16_t test_vdupq_lane_s8(int8x8_t a) { 2707 return vdupq_lane_s8(a, 7); 2708} 2709 2710// CHECK-LABEL: define <8 x i16> @test_vdupq_lane_s16(<4 x i16> %a) #0 { 2711// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 2712// CHECK: ret <8 x i16> [[SHUFFLE]] 2713int16x8_t test_vdupq_lane_s16(int16x4_t a) { 2714 return vdupq_lane_s16(a, 3); 2715} 2716 2717// CHECK-LABEL: define <4 x i32> @test_vdupq_lane_s32(<2 x i32> %a) #0 { 2718// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %a, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 2719// CHECK: ret <4 x i32> [[SHUFFLE]] 2720int32x4_t test_vdupq_lane_s32(int32x2_t a) { 2721 return vdupq_lane_s32(a, 1); 2722} 2723 2724// CHECK-LABEL: define <16 x i8> @test_vdupq_lane_p8(<8 x i8> %a) #0 { 2725// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 2726// CHECK: ret <16 x i8> [[SHUFFLE]] 2727poly8x16_t test_vdupq_lane_p8(poly8x8_t a) { 2728 return vdupq_lane_p8(a, 7); 2729} 2730 2731// CHECK-LABEL: define <8 x i16> @test_vdupq_lane_p16(<4 x i16> %a) #0 { 2732// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 2733// CHECK: ret <8 x i16> [[SHUFFLE]] 2734poly16x8_t test_vdupq_lane_p16(poly16x4_t a) { 2735 return vdupq_lane_p16(a, 3); 2736} 2737 2738// CHECK-LABEL: define <4 x float> @test_vdupq_lane_f32(<2 x float> %a) #0 { 2739// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %a, <2 x float> %a, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 2740// CHECK: ret <4 x float> [[SHUFFLE]] 2741float32x4_t test_vdupq_lane_f32(float32x2_t a) { 2742 return vdupq_lane_f32(a, 1); 2743} 2744 2745// CHECK-LABEL: define <1 x i64> @test_vdup_lane_s64(<1 x i64> %a) #0 { 2746// CHECK: [[SHUFFLE:%.*]] = shufflevector <1 x i64> %a, <1 x i64> %a, <1 x i32> zeroinitializer 2747// CHECK: ret <1 x i64> [[SHUFFLE]] 2748int64x1_t test_vdup_lane_s64(int64x1_t a) { 2749 return vdup_lane_s64(a, 0); 2750} 2751 2752// CHECK-LABEL: define <1 x i64> @test_vdup_lane_u64(<1 x i64> %a) #0 { 2753// CHECK: [[SHUFFLE:%.*]] = shufflevector <1 x i64> %a, <1 x i64> %a, <1 x i32> zeroinitializer 2754// CHECK: ret <1 x i64> [[SHUFFLE]] 2755uint64x1_t test_vdup_lane_u64(uint64x1_t a) { 2756 return vdup_lane_u64(a, 0); 2757} 2758 2759// CHECK-LABEL: define <2 x i64> @test_vdupq_lane_s64(<1 x i64> %a) #0 { 2760// CHECK: [[SHUFFLE:%.*]] = shufflevector <1 x i64> %a, <1 x i64> %a, <2 x i32> zeroinitializer 2761// CHECK: ret <2 x i64> [[SHUFFLE]] 2762int64x2_t test_vdupq_lane_s64(int64x1_t a) { 2763 return vdupq_lane_s64(a, 0); 2764} 2765 2766// CHECK-LABEL: define <2 x i64> @test_vdupq_lane_u64(<1 x i64> %a) #0 { 2767// CHECK: [[SHUFFLE:%.*]] = shufflevector <1 x i64> %a, <1 x i64> %a, <2 x i32> zeroinitializer 2768// CHECK: ret <2 x i64> [[SHUFFLE]] 2769uint64x2_t test_vdupq_lane_u64(uint64x1_t a) { 2770 return vdupq_lane_u64(a, 0); 2771} 2772 2773 2774// CHECK-LABEL: define <8 x i8> @test_vdup_n_u8(i8 zeroext %a) #0 { 2775// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0 2776// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1 2777// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2 2778// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3 2779// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4 2780// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5 2781// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6 2782// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7 2783// CHECK: ret <8 x i8> [[VECINIT7_I]] 2784uint8x8_t test_vdup_n_u8(uint8_t a) { 2785 return vdup_n_u8(a); 2786} 2787 2788// CHECK-LABEL: define <4 x i16> @test_vdup_n_u16(i16 zeroext %a) #0 { 2789// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0 2790// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1 2791// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2 2792// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3 2793// CHECK: ret <4 x i16> [[VECINIT3_I]] 2794uint16x4_t test_vdup_n_u16(uint16_t a) { 2795 return vdup_n_u16(a); 2796} 2797 2798// CHECK-LABEL: define <2 x i32> @test_vdup_n_u32(i32 %a) #0 { 2799// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %a, i32 0 2800// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %a, i32 1 2801// CHECK: ret <2 x i32> [[VECINIT1_I]] 2802uint32x2_t test_vdup_n_u32(uint32_t a) { 2803 return vdup_n_u32(a); 2804} 2805 2806// CHECK-LABEL: define <8 x i8> @test_vdup_n_s8(i8 signext %a) #0 { 2807// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0 2808// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1 2809// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2 2810// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3 2811// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4 2812// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5 2813// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6 2814// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7 2815// CHECK: ret <8 x i8> [[VECINIT7_I]] 2816int8x8_t test_vdup_n_s8(int8_t a) { 2817 return vdup_n_s8(a); 2818} 2819 2820// CHECK-LABEL: define <4 x i16> @test_vdup_n_s16(i16 signext %a) #0 { 2821// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0 2822// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1 2823// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2 2824// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3 2825// CHECK: ret <4 x i16> [[VECINIT3_I]] 2826int16x4_t test_vdup_n_s16(int16_t a) { 2827 return vdup_n_s16(a); 2828} 2829 2830// CHECK-LABEL: define <2 x i32> @test_vdup_n_s32(i32 %a) #0 { 2831// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %a, i32 0 2832// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %a, i32 1 2833// CHECK: ret <2 x i32> [[VECINIT1_I]] 2834int32x2_t test_vdup_n_s32(int32_t a) { 2835 return vdup_n_s32(a); 2836} 2837 2838// CHECK-LABEL: define <8 x i8> @test_vdup_n_p8(i8 signext %a) #0 { 2839// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0 2840// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1 2841// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2 2842// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3 2843// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4 2844// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5 2845// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6 2846// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7 2847// CHECK: ret <8 x i8> [[VECINIT7_I]] 2848poly8x8_t test_vdup_n_p8(poly8_t a) { 2849 return vdup_n_p8(a); 2850} 2851 2852// CHECK-LABEL: define <4 x i16> @test_vdup_n_p16(i16 signext %a) #0 { 2853// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0 2854// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1 2855// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2 2856// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3 2857// CHECK: ret <4 x i16> [[VECINIT3_I]] 2858poly16x4_t test_vdup_n_p16(poly16_t a) { 2859 return vdup_n_p16(a); 2860} 2861 2862// CHECK-LABEL: define <4 x half> @test_vdup_n_f16(half* %a) #0 { 2863// CHECK: [[TMP0:%.*]] = load half, half* %a, align 2 2864// CHECK: [[VECINIT:%.*]] = insertelement <4 x half> undef, half [[TMP0]], i32 0 2865// CHECK: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP0]], i32 1 2866// CHECK: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[TMP0]], i32 2 2867// CHECK: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[TMP0]], i32 3 2868// CHECK: ret <4 x half> [[VECINIT3]] 2869float16x4_t test_vdup_n_f16(float16_t *a) { 2870 return vdup_n_f16(*a); 2871} 2872 2873// CHECK-LABEL: define <2 x float> @test_vdup_n_f32(float %a) #0 { 2874// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %a, i32 0 2875// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %a, i32 1 2876// CHECK: ret <2 x float> [[VECINIT1_I]] 2877float32x2_t test_vdup_n_f32(float32_t a) { 2878 return vdup_n_f32(a); 2879} 2880 2881// CHECK-LABEL: define <16 x i8> @test_vdupq_n_u8(i8 zeroext %a) #0 { 2882// CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0 2883// CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1 2884// CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2 2885// CHECK: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3 2886// CHECK: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4 2887// CHECK: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5 2888// CHECK: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6 2889// CHECK: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7 2890// CHECK: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8 2891// CHECK: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9 2892// CHECK: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10 2893// CHECK: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11 2894// CHECK: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12 2895// CHECK: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13 2896// CHECK: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14 2897// CHECK: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15 2898// CHECK: ret <16 x i8> [[VECINIT15_I]] 2899uint8x16_t test_vdupq_n_u8(uint8_t a) { 2900 return vdupq_n_u8(a); 2901} 2902 2903// CHECK-LABEL: define <8 x i16> @test_vdupq_n_u16(i16 zeroext %a) #0 { 2904// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0 2905// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1 2906// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2 2907// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3 2908// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4 2909// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5 2910// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6 2911// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7 2912// CHECK: ret <8 x i16> [[VECINIT7_I]] 2913uint16x8_t test_vdupq_n_u16(uint16_t a) { 2914 return vdupq_n_u16(a); 2915} 2916 2917// CHECK-LABEL: define <4 x i32> @test_vdupq_n_u32(i32 %a) #0 { 2918// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %a, i32 0 2919// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %a, i32 1 2920// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %a, i32 2 2921// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %a, i32 3 2922// CHECK: ret <4 x i32> [[VECINIT3_I]] 2923uint32x4_t test_vdupq_n_u32(uint32_t a) { 2924 return vdupq_n_u32(a); 2925} 2926 2927// CHECK-LABEL: define <16 x i8> @test_vdupq_n_s8(i8 signext %a) #0 { 2928// CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0 2929// CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1 2930// CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2 2931// CHECK: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3 2932// CHECK: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4 2933// CHECK: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5 2934// CHECK: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6 2935// CHECK: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7 2936// CHECK: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8 2937// CHECK: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9 2938// CHECK: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10 2939// CHECK: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11 2940// CHECK: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12 2941// CHECK: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13 2942// CHECK: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14 2943// CHECK: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15 2944// CHECK: ret <16 x i8> [[VECINIT15_I]] 2945int8x16_t test_vdupq_n_s8(int8_t a) { 2946 return vdupq_n_s8(a); 2947} 2948 2949// CHECK-LABEL: define <8 x i16> @test_vdupq_n_s16(i16 signext %a) #0 { 2950// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0 2951// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1 2952// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2 2953// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3 2954// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4 2955// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5 2956// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6 2957// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7 2958// CHECK: ret <8 x i16> [[VECINIT7_I]] 2959int16x8_t test_vdupq_n_s16(int16_t a) { 2960 return vdupq_n_s16(a); 2961} 2962 2963// CHECK-LABEL: define <4 x i32> @test_vdupq_n_s32(i32 %a) #0 { 2964// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %a, i32 0 2965// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %a, i32 1 2966// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %a, i32 2 2967// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %a, i32 3 2968// CHECK: ret <4 x i32> [[VECINIT3_I]] 2969int32x4_t test_vdupq_n_s32(int32_t a) { 2970 return vdupq_n_s32(a); 2971} 2972 2973// CHECK-LABEL: define <16 x i8> @test_vdupq_n_p8(i8 signext %a) #0 { 2974// CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0 2975// CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1 2976// CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2 2977// CHECK: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3 2978// CHECK: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4 2979// CHECK: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5 2980// CHECK: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6 2981// CHECK: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7 2982// CHECK: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8 2983// CHECK: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9 2984// CHECK: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10 2985// CHECK: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11 2986// CHECK: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12 2987// CHECK: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13 2988// CHECK: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14 2989// CHECK: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15 2990// CHECK: ret <16 x i8> [[VECINIT15_I]] 2991poly8x16_t test_vdupq_n_p8(poly8_t a) { 2992 return vdupq_n_p8(a); 2993} 2994 2995// CHECK-LABEL: define <8 x i16> @test_vdupq_n_p16(i16 signext %a) #0 { 2996// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0 2997// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1 2998// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2 2999// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3 3000// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4 3001// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5 3002// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6 3003// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7 3004// CHECK: ret <8 x i16> [[VECINIT7_I]] 3005poly16x8_t test_vdupq_n_p16(poly16_t a) { 3006 return vdupq_n_p16(a); 3007} 3008 3009// CHECK-LABEL: define <8 x half> @test_vdupq_n_f16(half* %a) #0 { 3010// CHECK: [[TMP0:%.*]] = load half, half* %a, align 2 3011// CHECK: [[VECINIT:%.*]] = insertelement <8 x half> undef, half [[TMP0]], i32 0 3012// CHECK: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP0]], i32 1 3013// CHECK: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[TMP0]], i32 2 3014// CHECK: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[TMP0]], i32 3 3015// CHECK: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[TMP0]], i32 4 3016// CHECK: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[TMP0]], i32 5 3017// CHECK: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[TMP0]], i32 6 3018// CHECK: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[TMP0]], i32 7 3019// CHECK: ret <8 x half> [[VECINIT7]] 3020float16x8_t test_vdupq_n_f16(float16_t *a) { 3021 return vdupq_n_f16(*a); 3022} 3023 3024// CHECK-LABEL: define <4 x float> @test_vdupq_n_f32(float %a) #0 { 3025// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %a, i32 0 3026// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %a, i32 1 3027// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %a, i32 2 3028// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %a, i32 3 3029// CHECK: ret <4 x float> [[VECINIT3_I]] 3030float32x4_t test_vdupq_n_f32(float32_t a) { 3031 return vdupq_n_f32(a); 3032} 3033 3034// CHECK-LABEL: define <1 x i64> @test_vdup_n_s64(i64 %a) #0 { 3035// CHECK: [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0 3036// CHECK: [[ADD_I:%.*]] = add <1 x i64> [[VECINIT_I]], [[VECINIT_I]] 3037// CHECK: ret <1 x i64> [[ADD_I]] 3038int64x1_t test_vdup_n_s64(int64_t a) { 3039 int64x1_t tmp = vdup_n_s64(a); 3040 return vadd_s64(tmp, tmp); 3041} 3042 3043// CHECK-LABEL: define <1 x i64> @test_vdup_n_u64(i64 %a) #0 { 3044// CHECK: [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0 3045// CHECK: [[ADD_I:%.*]] = add <1 x i64> [[VECINIT_I]], [[VECINIT_I]] 3046// CHECK: ret <1 x i64> [[ADD_I]] 3047uint64x1_t test_vdup_n_u64(uint64_t a) { 3048 int64x1_t tmp = vdup_n_u64(a); 3049 return vadd_s64(tmp, tmp); 3050 3051} 3052 3053// CHECK-LABEL: define <2 x i64> @test_vdupq_n_s64(i64 %a) #0 { 3054// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0 3055// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1 3056// CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VECINIT1_I]], [[VECINIT1_I]] 3057// CHECK: ret <2 x i64> [[ADD_I]] 3058int64x2_t test_vdupq_n_s64(int64_t a) { 3059 int64x2_t tmp = vdupq_n_s64(a); 3060 return vaddq_s64(tmp, tmp); 3061} 3062 3063// CHECK-LABEL: define <2 x i64> @test_vdupq_n_u64(i64 %a) #0 { 3064// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0 3065// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1 3066// CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VECINIT1_I]], [[VECINIT1_I]] 3067// CHECK: ret <2 x i64> [[ADD_I]] 3068uint64x2_t test_vdupq_n_u64(uint64_t a) { 3069 int64x2_t tmp = vdupq_n_u64(a); 3070 return vaddq_u64(tmp, tmp); 3071} 3072 3073 3074// CHECK-LABEL: define <8 x i8> @test_veor_s8(<8 x i8> %a, <8 x i8> %b) #0 { 3075// CHECK: [[XOR_I:%.*]] = xor <8 x i8> %a, %b 3076// CHECK: ret <8 x i8> [[XOR_I]] 3077int8x8_t test_veor_s8(int8x8_t a, int8x8_t b) { 3078 return veor_s8(a, b); 3079} 3080 3081// CHECK-LABEL: define <4 x i16> @test_veor_s16(<4 x i16> %a, <4 x i16> %b) #0 { 3082// CHECK: [[XOR_I:%.*]] = xor <4 x i16> %a, %b 3083// CHECK: ret <4 x i16> [[XOR_I]] 3084int16x4_t test_veor_s16(int16x4_t a, int16x4_t b) { 3085 return veor_s16(a, b); 3086} 3087 3088// CHECK-LABEL: define <2 x i32> @test_veor_s32(<2 x i32> %a, <2 x i32> %b) #0 { 3089// CHECK: [[XOR_I:%.*]] = xor <2 x i32> %a, %b 3090// CHECK: ret <2 x i32> [[XOR_I]] 3091int32x2_t test_veor_s32(int32x2_t a, int32x2_t b) { 3092 return veor_s32(a, b); 3093} 3094 3095// CHECK-LABEL: define <1 x i64> @test_veor_s64(<1 x i64> %a, <1 x i64> %b) #0 { 3096// CHECK: [[XOR_I:%.*]] = xor <1 x i64> %a, %b 3097// CHECK: ret <1 x i64> [[XOR_I]] 3098int64x1_t test_veor_s64(int64x1_t a, int64x1_t b) { 3099 return veor_s64(a, b); 3100} 3101 3102// CHECK-LABEL: define <8 x i8> @test_veor_u8(<8 x i8> %a, <8 x i8> %b) #0 { 3103// CHECK: [[XOR_I:%.*]] = xor <8 x i8> %a, %b 3104// CHECK: ret <8 x i8> [[XOR_I]] 3105uint8x8_t test_veor_u8(uint8x8_t a, uint8x8_t b) { 3106 return veor_u8(a, b); 3107} 3108 3109// CHECK-LABEL: define <4 x i16> @test_veor_u16(<4 x i16> %a, <4 x i16> %b) #0 { 3110// CHECK: [[XOR_I:%.*]] = xor <4 x i16> %a, %b 3111// CHECK: ret <4 x i16> [[XOR_I]] 3112uint16x4_t test_veor_u16(uint16x4_t a, uint16x4_t b) { 3113 return veor_u16(a, b); 3114} 3115 3116// CHECK-LABEL: define <2 x i32> @test_veor_u32(<2 x i32> %a, <2 x i32> %b) #0 { 3117// CHECK: [[XOR_I:%.*]] = xor <2 x i32> %a, %b 3118// CHECK: ret <2 x i32> [[XOR_I]] 3119uint32x2_t test_veor_u32(uint32x2_t a, uint32x2_t b) { 3120 return veor_u32(a, b); 3121} 3122 3123// CHECK-LABEL: define <1 x i64> @test_veor_u64(<1 x i64> %a, <1 x i64> %b) #0 { 3124// CHECK: [[XOR_I:%.*]] = xor <1 x i64> %a, %b 3125// CHECK: ret <1 x i64> [[XOR_I]] 3126uint64x1_t test_veor_u64(uint64x1_t a, uint64x1_t b) { 3127 return veor_u64(a, b); 3128} 3129 3130// CHECK-LABEL: define <16 x i8> @test_veorq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 3131// CHECK: [[XOR_I:%.*]] = xor <16 x i8> %a, %b 3132// CHECK: ret <16 x i8> [[XOR_I]] 3133int8x16_t test_veorq_s8(int8x16_t a, int8x16_t b) { 3134 return veorq_s8(a, b); 3135} 3136 3137// CHECK-LABEL: define <8 x i16> @test_veorq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 3138// CHECK: [[XOR_I:%.*]] = xor <8 x i16> %a, %b 3139// CHECK: ret <8 x i16> [[XOR_I]] 3140int16x8_t test_veorq_s16(int16x8_t a, int16x8_t b) { 3141 return veorq_s16(a, b); 3142} 3143 3144// CHECK-LABEL: define <4 x i32> @test_veorq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 3145// CHECK: [[XOR_I:%.*]] = xor <4 x i32> %a, %b 3146// CHECK: ret <4 x i32> [[XOR_I]] 3147int32x4_t test_veorq_s32(int32x4_t a, int32x4_t b) { 3148 return veorq_s32(a, b); 3149} 3150 3151// CHECK-LABEL: define <2 x i64> @test_veorq_s64(<2 x i64> %a, <2 x i64> %b) #0 { 3152// CHECK: [[XOR_I:%.*]] = xor <2 x i64> %a, %b 3153// CHECK: ret <2 x i64> [[XOR_I]] 3154int64x2_t test_veorq_s64(int64x2_t a, int64x2_t b) { 3155 return veorq_s64(a, b); 3156} 3157 3158// CHECK-LABEL: define <16 x i8> @test_veorq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 3159// CHECK: [[XOR_I:%.*]] = xor <16 x i8> %a, %b 3160// CHECK: ret <16 x i8> [[XOR_I]] 3161uint8x16_t test_veorq_u8(uint8x16_t a, uint8x16_t b) { 3162 return veorq_u8(a, b); 3163} 3164 3165// CHECK-LABEL: define <8 x i16> @test_veorq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 3166// CHECK: [[XOR_I:%.*]] = xor <8 x i16> %a, %b 3167// CHECK: ret <8 x i16> [[XOR_I]] 3168uint16x8_t test_veorq_u16(uint16x8_t a, uint16x8_t b) { 3169 return veorq_u16(a, b); 3170} 3171 3172// CHECK-LABEL: define <4 x i32> @test_veorq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 3173// CHECK: [[XOR_I:%.*]] = xor <4 x i32> %a, %b 3174// CHECK: ret <4 x i32> [[XOR_I]] 3175uint32x4_t test_veorq_u32(uint32x4_t a, uint32x4_t b) { 3176 return veorq_u32(a, b); 3177} 3178 3179// CHECK-LABEL: define <2 x i64> @test_veorq_u64(<2 x i64> %a, <2 x i64> %b) #0 { 3180// CHECK: [[XOR_I:%.*]] = xor <2 x i64> %a, %b 3181// CHECK: ret <2 x i64> [[XOR_I]] 3182uint64x2_t test_veorq_u64(uint64x2_t a, uint64x2_t b) { 3183 return veorq_u64(a, b); 3184} 3185 3186 3187// CHECK-LABEL: define <8 x i8> @test_vext_s8(<8 x i8> %a, <8 x i8> %b) #0 { 3188// CHECK: [[VEXT:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14> 3189// CHECK: ret <8 x i8> [[VEXT]] 3190int8x8_t test_vext_s8(int8x8_t a, int8x8_t b) { 3191 return vext_s8(a, b, 7); 3192} 3193 3194// CHECK-LABEL: define <8 x i8> @test_vext_u8(<8 x i8> %a, <8 x i8> %b) #0 { 3195// CHECK: [[VEXT:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14> 3196// CHECK: ret <8 x i8> [[VEXT]] 3197uint8x8_t test_vext_u8(uint8x8_t a, uint8x8_t b) { 3198 return vext_u8(a, b, 7); 3199} 3200 3201// CHECK-LABEL: define <8 x i8> @test_vext_p8(<8 x i8> %a, <8 x i8> %b) #0 { 3202// CHECK: [[VEXT:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14> 3203// CHECK: ret <8 x i8> [[VEXT]] 3204poly8x8_t test_vext_p8(poly8x8_t a, poly8x8_t b) { 3205 return vext_p8(a, b, 7); 3206} 3207 3208// CHECK-LABEL: define <4 x i16> @test_vext_s16(<4 x i16> %a, <4 x i16> %b) #0 { 3209// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3210// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 3211// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3212// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3213// CHECK: [[VEXT:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> 3214// CHECK: ret <4 x i16> [[VEXT]] 3215int16x4_t test_vext_s16(int16x4_t a, int16x4_t b) { 3216 return vext_s16(a, b, 3); 3217} 3218 3219// CHECK-LABEL: define <4 x i16> @test_vext_u16(<4 x i16> %a, <4 x i16> %b) #0 { 3220// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3221// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 3222// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3223// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3224// CHECK: [[VEXT:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> 3225// CHECK: ret <4 x i16> [[VEXT]] 3226uint16x4_t test_vext_u16(uint16x4_t a, uint16x4_t b) { 3227 return vext_u16(a, b, 3); 3228} 3229 3230// CHECK-LABEL: define <4 x i16> @test_vext_p16(<4 x i16> %a, <4 x i16> %b) #0 { 3231// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3232// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 3233// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3234// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3235// CHECK: [[VEXT:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> 3236// CHECK: ret <4 x i16> [[VEXT]] 3237poly16x4_t test_vext_p16(poly16x4_t a, poly16x4_t b) { 3238 return vext_p16(a, b, 3); 3239} 3240 3241// CHECK-LABEL: define <2 x i32> @test_vext_s32(<2 x i32> %a, <2 x i32> %b) #0 { 3242// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 3243// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 3244// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3245// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3246// CHECK: [[VEXT:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 1, i32 2> 3247// CHECK: ret <2 x i32> [[VEXT]] 3248int32x2_t test_vext_s32(int32x2_t a, int32x2_t b) { 3249 return vext_s32(a, b, 1); 3250} 3251 3252// CHECK-LABEL: define <2 x i32> @test_vext_u32(<2 x i32> %a, <2 x i32> %b) #0 { 3253// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 3254// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 3255// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3256// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3257// CHECK: [[VEXT:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 1, i32 2> 3258// CHECK: ret <2 x i32> [[VEXT]] 3259uint32x2_t test_vext_u32(uint32x2_t a, uint32x2_t b) { 3260 return vext_u32(a, b, 1); 3261} 3262 3263// CHECK-LABEL: define <1 x i64> @test_vext_s64(<1 x i64> %a, <1 x i64> %b) #0 { 3264// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 3265// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 3266// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 3267// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 3268// CHECK: [[VEXT:%.*]] = shufflevector <1 x i64> [[TMP2]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer 3269// CHECK: ret <1 x i64> [[VEXT]] 3270int64x1_t test_vext_s64(int64x1_t a, int64x1_t b) { 3271 return vext_s64(a, b, 0); 3272} 3273 3274// CHECK-LABEL: define <1 x i64> @test_vext_u64(<1 x i64> %a, <1 x i64> %b) #0 { 3275// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 3276// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 3277// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 3278// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 3279// CHECK: [[VEXT:%.*]] = shufflevector <1 x i64> [[TMP2]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer 3280// CHECK: ret <1 x i64> [[VEXT]] 3281uint64x1_t test_vext_u64(uint64x1_t a, uint64x1_t b) { 3282 return vext_u64(a, b, 0); 3283} 3284 3285// CHECK-LABEL: define <2 x float> @test_vext_f32(<2 x float> %a, <2 x float> %b) #0 { 3286// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 3287// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 3288// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 3289// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 3290// CHECK: [[VEXT:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP3]], <2 x i32> <i32 1, i32 2> 3291// CHECK: ret <2 x float> [[VEXT]] 3292float32x2_t test_vext_f32(float32x2_t a, float32x2_t b) { 3293 return vext_f32(a, b, 1); 3294} 3295 3296// CHECK-LABEL: define <16 x i8> @test_vextq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 3297// CHECK: [[VEXT:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30> 3298// CHECK: ret <16 x i8> [[VEXT]] 3299int8x16_t test_vextq_s8(int8x16_t a, int8x16_t b) { 3300 return vextq_s8(a, b, 15); 3301} 3302 3303// CHECK-LABEL: define <16 x i8> @test_vextq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 3304// CHECK: [[VEXT:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30> 3305// CHECK: ret <16 x i8> [[VEXT]] 3306uint8x16_t test_vextq_u8(uint8x16_t a, uint8x16_t b) { 3307 return vextq_u8(a, b, 15); 3308} 3309 3310// CHECK-LABEL: define <16 x i8> @test_vextq_p8(<16 x i8> %a, <16 x i8> %b) #0 { 3311// CHECK: [[VEXT:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30> 3312// CHECK: ret <16 x i8> [[VEXT]] 3313poly8x16_t test_vextq_p8(poly8x16_t a, poly8x16_t b) { 3314 return vextq_p8(a, b, 15); 3315} 3316 3317// CHECK-LABEL: define <8 x i16> @test_vextq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 3318// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3319// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 3320// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 3321// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 3322// CHECK: [[VEXT:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14> 3323// CHECK: ret <8 x i16> [[VEXT]] 3324int16x8_t test_vextq_s16(int16x8_t a, int16x8_t b) { 3325 return vextq_s16(a, b, 7); 3326} 3327 3328// CHECK-LABEL: define <8 x i16> @test_vextq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 3329// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3330// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 3331// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 3332// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 3333// CHECK: [[VEXT:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14> 3334// CHECK: ret <8 x i16> [[VEXT]] 3335uint16x8_t test_vextq_u16(uint16x8_t a, uint16x8_t b) { 3336 return vextq_u16(a, b, 7); 3337} 3338 3339// CHECK-LABEL: define <8 x i16> @test_vextq_p16(<8 x i16> %a, <8 x i16> %b) #0 { 3340// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3341// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 3342// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 3343// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 3344// CHECK: [[VEXT:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14> 3345// CHECK: ret <8 x i16> [[VEXT]] 3346poly16x8_t test_vextq_p16(poly16x8_t a, poly16x8_t b) { 3347 return vextq_p16(a, b, 7); 3348} 3349 3350// CHECK-LABEL: define <4 x i32> @test_vextq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 3351// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 3352// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 3353// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 3354// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 3355// CHECK: [[VEXT:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> 3356// CHECK: ret <4 x i32> [[VEXT]] 3357int32x4_t test_vextq_s32(int32x4_t a, int32x4_t b) { 3358 return vextq_s32(a, b, 3); 3359} 3360 3361// CHECK-LABEL: define <4 x i32> @test_vextq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 3362// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 3363// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 3364// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 3365// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 3366// CHECK: [[VEXT:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> 3367// CHECK: ret <4 x i32> [[VEXT]] 3368uint32x4_t test_vextq_u32(uint32x4_t a, uint32x4_t b) { 3369 return vextq_u32(a, b, 3); 3370} 3371 3372// CHECK-LABEL: define <2 x i64> @test_vextq_s64(<2 x i64> %a, <2 x i64> %b) #0 { 3373// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 3374// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 3375// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 3376// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 3377// CHECK: [[VEXT:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i32> <i32 1, i32 2> 3378// CHECK: ret <2 x i64> [[VEXT]] 3379int64x2_t test_vextq_s64(int64x2_t a, int64x2_t b) { 3380 return vextq_s64(a, b, 1); 3381} 3382 3383// CHECK-LABEL: define <2 x i64> @test_vextq_u64(<2 x i64> %a, <2 x i64> %b) #0 { 3384// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 3385// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 3386// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 3387// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 3388// CHECK: [[VEXT:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i32> <i32 1, i32 2> 3389// CHECK: ret <2 x i64> [[VEXT]] 3390uint64x2_t test_vextq_u64(uint64x2_t a, uint64x2_t b) { 3391 return vextq_u64(a, b, 1); 3392} 3393 3394// CHECK-LABEL: define <4 x float> @test_vextq_f32(<4 x float> %a, <4 x float> %b) #0 { 3395// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 3396// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 3397// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 3398// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 3399// CHECK: [[VEXT:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> 3400// CHECK: ret <4 x float> [[VEXT]] 3401float32x4_t test_vextq_f32(float32x4_t a, float32x4_t b) { 3402 return vextq_f32(a, b, 3); 3403} 3404 3405 3406// CHECK-LABEL: define <2 x float> @test_vfma_f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) #0 { 3407// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 3408// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 3409// CHECK: [[TMP2:%.*]] = bitcast <2 x float> %c to <8 x i8> 3410// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 3411// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 3412// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> 3413// CHECK: [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x float> [[TMP3]]) #4 3414// CHECK: ret <2 x float> [[TMP6]] 3415float32x2_t test_vfma_f32(float32x2_t a, float32x2_t b, float32x2_t c) { 3416 return vfma_f32(a, b, c); 3417} 3418 3419// CHECK-LABEL: define <4 x float> @test_vfmaq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #0 { 3420// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 3421// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 3422// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %c to <16 x i8> 3423// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 3424// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 3425// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> 3426// CHECK: [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x float> [[TMP3]]) #4 3427// CHECK: ret <4 x float> [[TMP6]] 3428float32x4_t test_vfmaq_f32(float32x4_t a, float32x4_t b, float32x4_t c) { 3429 return vfmaq_f32(a, b, c); 3430} 3431 3432// CHECK-LABEL: define <2 x float> @test_vfms_f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) #0 { 3433// CHECK: [[SUB_I:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %b 3434// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 3435// CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SUB_I]] to <8 x i8> 3436// CHECK: [[TMP2:%.*]] = bitcast <2 x float> %c to <8 x i8> 3437// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 3438// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 3439// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> 3440// CHECK: [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x float> [[TMP3]]) #4 3441// CHECK: ret <2 x float> [[TMP6]] 3442float32x2_t test_vfms_f32(float32x2_t a, float32x2_t b, float32x2_t c) { 3443 return vfms_f32(a, b, c); 3444} 3445 3446// CHECK-LABEL: define <4 x float> @test_vfmsq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #0 { 3447// CHECK: [[SUB_I:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b 3448// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 3449// CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SUB_I]] to <16 x i8> 3450// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %c to <16 x i8> 3451// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 3452// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 3453// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> 3454// CHECK: [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x float> [[TMP3]]) #4 3455// CHECK: ret <4 x float> [[TMP6]] 3456float32x4_t test_vfmsq_f32(float32x4_t a, float32x4_t b, float32x4_t c) { 3457 return vfmsq_f32(a, b, c); 3458} 3459 3460 3461// CHECK-LABEL: define <8 x i8> @test_vget_high_s8(<16 x i8> %a) #0 { 3462// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 3463// CHECK: ret <8 x i8> [[SHUFFLE_I]] 3464int8x8_t test_vget_high_s8(int8x16_t a) { 3465 return vget_high_s8(a); 3466} 3467 3468// CHECK-LABEL: define <4 x i16> @test_vget_high_s16(<8 x i16> %a) #0 { 3469// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 3470// CHECK: ret <4 x i16> [[SHUFFLE_I]] 3471int16x4_t test_vget_high_s16(int16x8_t a) { 3472 return vget_high_s16(a); 3473} 3474 3475// CHECK-LABEL: define <2 x i32> @test_vget_high_s32(<4 x i32> %a) #0 { 3476// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 3477// CHECK: ret <2 x i32> [[SHUFFLE_I]] 3478int32x2_t test_vget_high_s32(int32x4_t a) { 3479 return vget_high_s32(a); 3480} 3481 3482// CHECK-LABEL: define <1 x i64> @test_vget_high_s64(<2 x i64> %a) #0 { 3483// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> <i32 1> 3484// CHECK: ret <1 x i64> [[SHUFFLE_I]] 3485int64x1_t test_vget_high_s64(int64x2_t a) { 3486 return vget_high_s64(a); 3487} 3488 3489// CHECK-LABEL: define <4 x half> @test_vget_high_f16(<8 x half> %a) #0 { 3490// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 3491// CHECK: ret <4 x half> [[SHUFFLE_I]] 3492float16x4_t test_vget_high_f16(float16x8_t a) { 3493 return vget_high_f16(a); 3494} 3495 3496// CHECK-LABEL: define <2 x float> @test_vget_high_f32(<4 x float> %a) #0 { 3497// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %a, <2 x i32> <i32 2, i32 3> 3498// CHECK: ret <2 x float> [[SHUFFLE_I]] 3499float32x2_t test_vget_high_f32(float32x4_t a) { 3500 return vget_high_f32(a); 3501} 3502 3503// CHECK-LABEL: define <8 x i8> @test_vget_high_u8(<16 x i8> %a) #0 { 3504// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 3505// CHECK: ret <8 x i8> [[SHUFFLE_I]] 3506uint8x8_t test_vget_high_u8(uint8x16_t a) { 3507 return vget_high_u8(a); 3508} 3509 3510// CHECK-LABEL: define <4 x i16> @test_vget_high_u16(<8 x i16> %a) #0 { 3511// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 3512// CHECK: ret <4 x i16> [[SHUFFLE_I]] 3513uint16x4_t test_vget_high_u16(uint16x8_t a) { 3514 return vget_high_u16(a); 3515} 3516 3517// CHECK-LABEL: define <2 x i32> @test_vget_high_u32(<4 x i32> %a) #0 { 3518// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 3519// CHECK: ret <2 x i32> [[SHUFFLE_I]] 3520uint32x2_t test_vget_high_u32(uint32x4_t a) { 3521 return vget_high_u32(a); 3522} 3523 3524// CHECK-LABEL: define <1 x i64> @test_vget_high_u64(<2 x i64> %a) #0 { 3525// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> <i32 1> 3526// CHECK: ret <1 x i64> [[SHUFFLE_I]] 3527uint64x1_t test_vget_high_u64(uint64x2_t a) { 3528 return vget_high_u64(a); 3529} 3530 3531// CHECK-LABEL: define <8 x i8> @test_vget_high_p8(<16 x i8> %a) #0 { 3532// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 3533// CHECK: ret <8 x i8> [[SHUFFLE_I]] 3534poly8x8_t test_vget_high_p8(poly8x16_t a) { 3535 return vget_high_p8(a); 3536} 3537 3538// CHECK-LABEL: define <4 x i16> @test_vget_high_p16(<8 x i16> %a) #0 { 3539// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 3540// CHECK: ret <4 x i16> [[SHUFFLE_I]] 3541poly16x4_t test_vget_high_p16(poly16x8_t a) { 3542 return vget_high_p16(a); 3543} 3544 3545 3546// CHECK-LABEL: define zeroext i8 @test_vget_lane_u8(<8 x i8> %a) #0 { 3547// CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7 3548// CHECK: ret i8 [[VGET_LANE]] 3549uint8_t test_vget_lane_u8(uint8x8_t a) { 3550 return vget_lane_u8(a, 7); 3551} 3552 3553// CHECK-LABEL: define zeroext i16 @test_vget_lane_u16(<4 x i16> %a) #0 { 3554// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3555// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3556// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3 3557// CHECK: ret i16 [[VGET_LANE]] 3558uint16_t test_vget_lane_u16(uint16x4_t a) { 3559 return vget_lane_u16(a, 3); 3560} 3561 3562// CHECK-LABEL: define i32 @test_vget_lane_u32(<2 x i32> %a) #0 { 3563// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 3564// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3565// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 3566// CHECK: ret i32 [[VGET_LANE]] 3567uint32_t test_vget_lane_u32(uint32x2_t a) { 3568 return vget_lane_u32(a, 1); 3569} 3570 3571// CHECK-LABEL: define signext i8 @test_vget_lane_s8(<8 x i8> %a) #0 { 3572// CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7 3573// CHECK: ret i8 [[VGET_LANE]] 3574int8_t test_vget_lane_s8(int8x8_t a) { 3575 return vget_lane_s8(a, 7); 3576} 3577 3578// CHECK-LABEL: define signext i16 @test_vget_lane_s16(<4 x i16> %a) #0 { 3579// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3580// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3581// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3 3582// CHECK: ret i16 [[VGET_LANE]] 3583int16_t test_vget_lane_s16(int16x4_t a) { 3584 return vget_lane_s16(a, 3); 3585} 3586 3587// CHECK-LABEL: define i32 @test_vget_lane_s32(<2 x i32> %a) #0 { 3588// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 3589// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3590// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 3591// CHECK: ret i32 [[VGET_LANE]] 3592int32_t test_vget_lane_s32(int32x2_t a) { 3593 return vget_lane_s32(a, 1); 3594} 3595 3596// CHECK-LABEL: define signext i8 @test_vget_lane_p8(<8 x i8> %a) #0 { 3597// CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7 3598// CHECK: ret i8 [[VGET_LANE]] 3599poly8_t test_vget_lane_p8(poly8x8_t a) { 3600 return vget_lane_p8(a, 7); 3601} 3602 3603// CHECK-LABEL: define signext i16 @test_vget_lane_p16(<4 x i16> %a) #0 { 3604// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3605// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3606// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3 3607// CHECK: ret i16 [[VGET_LANE]] 3608poly16_t test_vget_lane_p16(poly16x4_t a) { 3609 return vget_lane_p16(a, 3); 3610} 3611 3612// CHECK-LABEL: define float @test_vget_lane_f32(<2 x float> %a) #0 { 3613// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 3614// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 3615// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x float> [[TMP1]], i32 1 3616// CHECK: ret float [[VGET_LANE]] 3617float32_t test_vget_lane_f32(float32x2_t a) { 3618 return vget_lane_f32(a, 1); 3619} 3620 3621// CHECK-LABEL: define float @test_vget_lane_f16(<4 x half> %a) #0 { 3622// CHECK: [[__REINT_242:%.*]] = alloca <4 x half>, align 8 3623// CHECK: [[__REINT1_242:%.*]] = alloca i16, align 2 3624// CHECK: store <4 x half> %a, <4 x half>* [[__REINT_242]], align 8 3625// CHECK: [[TMP0:%.*]] = bitcast <4 x half>* [[__REINT_242]] to <4 x i16>* 3626// CHECK: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 8 3627// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8> 3628// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 3629// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP3]], i32 1 3630// CHECK: store i16 [[VGET_LANE]], i16* [[__REINT1_242]], align 2 3631// CHECK: [[TMP4:%.*]] = bitcast i16* [[__REINT1_242]] to half* 3632// CHECK: [[TMP5:%.*]] = load half, half* [[TMP4]], align 2 3633// CHECK: [[CONV:%.*]] = fpext half [[TMP5]] to float 3634// CHECK: ret float [[CONV]] 3635float32_t test_vget_lane_f16(float16x4_t a) { 3636 return vget_lane_f16(a, 1); 3637} 3638 3639// CHECK-LABEL: define zeroext i8 @test_vgetq_lane_u8(<16 x i8> %a) #0 { 3640// CHECK: [[VGET_LANE:%.*]] = extractelement <16 x i8> %a, i32 15 3641// CHECK: ret i8 [[VGET_LANE]] 3642uint8_t test_vgetq_lane_u8(uint8x16_t a) { 3643 return vgetq_lane_u8(a, 15); 3644} 3645 3646// CHECK-LABEL: define zeroext i16 @test_vgetq_lane_u16(<8 x i16> %a) #0 { 3647// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3648// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 3649// CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7 3650// CHECK: ret i16 [[VGET_LANE]] 3651uint16_t test_vgetq_lane_u16(uint16x8_t a) { 3652 return vgetq_lane_u16(a, 7); 3653} 3654 3655// CHECK-LABEL: define i32 @test_vgetq_lane_u32(<4 x i32> %a) #0 { 3656// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 3657// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 3658// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 3659// CHECK: ret i32 [[VGET_LANE]] 3660uint32_t test_vgetq_lane_u32(uint32x4_t a) { 3661 return vgetq_lane_u32(a, 3); 3662} 3663 3664// CHECK-LABEL: define signext i8 @test_vgetq_lane_s8(<16 x i8> %a) #0 { 3665// CHECK: [[VGET_LANE:%.*]] = extractelement <16 x i8> %a, i32 15 3666// CHECK: ret i8 [[VGET_LANE]] 3667int8_t test_vgetq_lane_s8(int8x16_t a) { 3668 return vgetq_lane_s8(a, 15); 3669} 3670 3671// CHECK-LABEL: define signext i16 @test_vgetq_lane_s16(<8 x i16> %a) #0 { 3672// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3673// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 3674// CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7 3675// CHECK: ret i16 [[VGET_LANE]] 3676int16_t test_vgetq_lane_s16(int16x8_t a) { 3677 return vgetq_lane_s16(a, 7); 3678} 3679 3680// CHECK-LABEL: define i32 @test_vgetq_lane_s32(<4 x i32> %a) #0 { 3681// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 3682// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 3683// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 3684// CHECK: ret i32 [[VGET_LANE]] 3685int32_t test_vgetq_lane_s32(int32x4_t a) { 3686 return vgetq_lane_s32(a, 3); 3687} 3688 3689// CHECK-LABEL: define signext i8 @test_vgetq_lane_p8(<16 x i8> %a) #0 { 3690// CHECK: [[VGET_LANE:%.*]] = extractelement <16 x i8> %a, i32 15 3691// CHECK: ret i8 [[VGET_LANE]] 3692poly8_t test_vgetq_lane_p8(poly8x16_t a) { 3693 return vgetq_lane_p8(a, 15); 3694} 3695 3696// CHECK-LABEL: define signext i16 @test_vgetq_lane_p16(<8 x i16> %a) #0 { 3697// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3698// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 3699// CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7 3700// CHECK: ret i16 [[VGET_LANE]] 3701poly16_t test_vgetq_lane_p16(poly16x8_t a) { 3702 return vgetq_lane_p16(a, 7); 3703} 3704 3705// CHECK-LABEL: define float @test_vgetq_lane_f32(<4 x float> %a) #0 { 3706// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 3707// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 3708// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32 3 3709// CHECK: ret float [[VGET_LANE]] 3710float32_t test_vgetq_lane_f32(float32x4_t a) { 3711 return vgetq_lane_f32(a, 3); 3712} 3713 3714// CHECK-LABEL: define float @test_vgetq_lane_f16(<8 x half> %a) #0 { 3715// CHECK: [[__REINT_244:%.*]] = alloca <8 x half>, align 16 3716// CHECK: [[__REINT1_244:%.*]] = alloca i16, align 2 3717// CHECK: store <8 x half> %a, <8 x half>* [[__REINT_244]], align 16 3718// CHECK: [[TMP0:%.*]] = bitcast <8 x half>* [[__REINT_244]] to <8 x i16>* 3719// CHECK: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 16 3720// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8> 3721// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> 3722// CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i16> [[TMP3]], i32 3 3723// CHECK: store i16 [[VGET_LANE]], i16* [[__REINT1_244]], align 2 3724// CHECK: [[TMP4:%.*]] = bitcast i16* [[__REINT1_244]] to half* 3725// CHECK: [[TMP5:%.*]] = load half, half* [[TMP4]], align 2 3726// CHECK: [[CONV:%.*]] = fpext half [[TMP5]] to float 3727// CHECK: ret float [[CONV]] 3728float32_t test_vgetq_lane_f16(float16x8_t a) { 3729 return vgetq_lane_f16(a, 3); 3730} 3731 3732// The optimizer is able to remove all moves now. 3733// CHECK-LABEL: define i64 @test_vget_lane_s64(<1 x i64> %a) #0 { 3734// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 3735// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 3736// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0 3737// CHECK: ret i64 [[VGET_LANE]] 3738int64_t test_vget_lane_s64(int64x1_t a) { 3739 return vget_lane_s64(a, 0); 3740} 3741 3742// The optimizer is able to remove all moves now. 3743// CHECK-LABEL: define i64 @test_vget_lane_u64(<1 x i64> %a) #0 { 3744// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 3745// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 3746// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0 3747// CHECK: ret i64 [[VGET_LANE]] 3748uint64_t test_vget_lane_u64(uint64x1_t a) { 3749 return vget_lane_u64(a, 0); 3750} 3751 3752// CHECK-LABEL: define i64 @test_vgetq_lane_s64(<2 x i64> %a) #0 { 3753// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 3754// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 3755// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 3756// CHECK: ret i64 [[VGET_LANE]] 3757int64_t test_vgetq_lane_s64(int64x2_t a) { 3758 return vgetq_lane_s64(a, 1); 3759} 3760 3761// CHECK-LABEL: define i64 @test_vgetq_lane_u64(<2 x i64> %a) #0 { 3762// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 3763// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 3764// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 3765// CHECK: ret i64 [[VGET_LANE]] 3766uint64_t test_vgetq_lane_u64(uint64x2_t a) { 3767 return vgetq_lane_u64(a, 1); 3768} 3769 3770 3771// CHECK-LABEL: define <8 x i8> @test_vget_low_s8(<16 x i8> %a) #0 { 3772// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 3773// CHECK: ret <8 x i8> [[SHUFFLE_I]] 3774int8x8_t test_vget_low_s8(int8x16_t a) { 3775 return vget_low_s8(a); 3776} 3777 3778// CHECK-LABEL: define <4 x i16> @test_vget_low_s16(<8 x i16> %a) #0 { 3779// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 3780// CHECK: ret <4 x i16> [[SHUFFLE_I]] 3781int16x4_t test_vget_low_s16(int16x8_t a) { 3782 return vget_low_s16(a); 3783} 3784 3785// CHECK-LABEL: define <2 x i32> @test_vget_low_s32(<4 x i32> %a) #0 { 3786// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 0, i32 1> 3787// CHECK: ret <2 x i32> [[SHUFFLE_I]] 3788int32x2_t test_vget_low_s32(int32x4_t a) { 3789 return vget_low_s32(a); 3790} 3791 3792// CHECK-LABEL: define <1 x i64> @test_vget_low_s64(<2 x i64> %a) #0 { 3793// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> zeroinitializer 3794// CHECK: ret <1 x i64> [[SHUFFLE_I]] 3795int64x1_t test_vget_low_s64(int64x2_t a) { 3796 return vget_low_s64(a); 3797} 3798 3799// CHECK-LABEL: define <4 x half> @test_vget_low_f16(<8 x half> %a) #0 { 3800// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 3801// CHECK: ret <4 x half> [[SHUFFLE_I]] 3802float16x4_t test_vget_low_f16(float16x8_t a) { 3803 return vget_low_f16(a); 3804} 3805 3806// CHECK-LABEL: define <2 x float> @test_vget_low_f32(<4 x float> %a) #0 { 3807// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %a, <2 x i32> <i32 0, i32 1> 3808// CHECK: ret <2 x float> [[SHUFFLE_I]] 3809float32x2_t test_vget_low_f32(float32x4_t a) { 3810 return vget_low_f32(a); 3811} 3812 3813// CHECK-LABEL: define <8 x i8> @test_vget_low_u8(<16 x i8> %a) #0 { 3814// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 3815// CHECK: ret <8 x i8> [[SHUFFLE_I]] 3816uint8x8_t test_vget_low_u8(uint8x16_t a) { 3817 return vget_low_u8(a); 3818} 3819 3820// CHECK-LABEL: define <4 x i16> @test_vget_low_u16(<8 x i16> %a) #0 { 3821// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 3822// CHECK: ret <4 x i16> [[SHUFFLE_I]] 3823uint16x4_t test_vget_low_u16(uint16x8_t a) { 3824 return vget_low_u16(a); 3825} 3826 3827// CHECK-LABEL: define <2 x i32> @test_vget_low_u32(<4 x i32> %a) #0 { 3828// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 0, i32 1> 3829// CHECK: ret <2 x i32> [[SHUFFLE_I]] 3830uint32x2_t test_vget_low_u32(uint32x4_t a) { 3831 return vget_low_u32(a); 3832} 3833 3834// CHECK-LABEL: define <1 x i64> @test_vget_low_u64(<2 x i64> %a) #0 { 3835// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> zeroinitializer 3836// CHECK: ret <1 x i64> [[SHUFFLE_I]] 3837uint64x1_t test_vget_low_u64(uint64x2_t a) { 3838 return vget_low_u64(a); 3839} 3840 3841// CHECK-LABEL: define <8 x i8> @test_vget_low_p8(<16 x i8> %a) #0 { 3842// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 3843// CHECK: ret <8 x i8> [[SHUFFLE_I]] 3844poly8x8_t test_vget_low_p8(poly8x16_t a) { 3845 return vget_low_p8(a); 3846} 3847 3848// CHECK-LABEL: define <4 x i16> @test_vget_low_p16(<8 x i16> %a) #0 { 3849// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 3850// CHECK: ret <4 x i16> [[SHUFFLE_I]] 3851poly16x4_t test_vget_low_p16(poly16x8_t a) { 3852 return vget_low_p16(a); 3853} 3854 3855 3856// CHECK-LABEL: define <8 x i8> @test_vhadd_s8(<8 x i8> %a, <8 x i8> %b) #0 { 3857// CHECK: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8> %a, <8 x i8> %b) #4 3858// CHECK: ret <8 x i8> [[VHADD_V_I]] 3859int8x8_t test_vhadd_s8(int8x8_t a, int8x8_t b) { 3860 return vhadd_s8(a, b); 3861} 3862 3863// CHECK-LABEL: define <4 x i16> @test_vhadd_s16(<4 x i16> %a, <4 x i16> %b) #0 { 3864// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3865// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 3866// CHECK: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3867// CHECK: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3868// CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16> [[VHADD_V_I]], <4 x i16> [[VHADD_V1_I]]) #4 3869// CHECK: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8> 3870// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <4 x i16> 3871// CHECK: ret <4 x i16> [[TMP2]] 3872int16x4_t test_vhadd_s16(int16x4_t a, int16x4_t b) { 3873 return vhadd_s16(a, b); 3874} 3875 3876// CHECK-LABEL: define <2 x i32> @test_vhadd_s32(<2 x i32> %a, <2 x i32> %b) #0 { 3877// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 3878// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 3879// CHECK: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3880// CHECK: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3881// CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhadds.v2i32(<2 x i32> [[VHADD_V_I]], <2 x i32> [[VHADD_V1_I]]) #4 3882// CHECK: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8> 3883// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <2 x i32> 3884// CHECK: ret <2 x i32> [[TMP2]] 3885int32x2_t test_vhadd_s32(int32x2_t a, int32x2_t b) { 3886 return vhadd_s32(a, b); 3887} 3888 3889// CHECK-LABEL: define <8 x i8> @test_vhadd_u8(<8 x i8> %a, <8 x i8> %b) #0 { 3890// CHECK: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 3891// CHECK: ret <8 x i8> [[VHADD_V_I]] 3892uint8x8_t test_vhadd_u8(uint8x8_t a, uint8x8_t b) { 3893 return vhadd_u8(a, b); 3894} 3895 3896// CHECK-LABEL: define <4 x i16> @test_vhadd_u16(<4 x i16> %a, <4 x i16> %b) #0 { 3897// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3898// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 3899// CHECK: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3900// CHECK: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3901// CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16> [[VHADD_V_I]], <4 x i16> [[VHADD_V1_I]]) #4 3902// CHECK: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8> 3903// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <4 x i16> 3904// CHECK: ret <4 x i16> [[TMP2]] 3905uint16x4_t test_vhadd_u16(uint16x4_t a, uint16x4_t b) { 3906 return vhadd_u16(a, b); 3907} 3908 3909// CHECK-LABEL: define <2 x i32> @test_vhadd_u32(<2 x i32> %a, <2 x i32> %b) #0 { 3910// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 3911// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 3912// CHECK: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3913// CHECK: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3914// CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhaddu.v2i32(<2 x i32> [[VHADD_V_I]], <2 x i32> [[VHADD_V1_I]]) #4 3915// CHECK: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8> 3916// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <2 x i32> 3917// CHECK: ret <2 x i32> [[TMP2]] 3918uint32x2_t test_vhadd_u32(uint32x2_t a, uint32x2_t b) { 3919 return vhadd_u32(a, b); 3920} 3921 3922// CHECK-LABEL: define <16 x i8> @test_vhaddq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 3923// CHECK: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8> %a, <16 x i8> %b) #4 3924// CHECK: ret <16 x i8> [[VHADDQ_V_I]] 3925int8x16_t test_vhaddq_s8(int8x16_t a, int8x16_t b) { 3926 return vhaddq_s8(a, b); 3927} 3928 3929// CHECK-LABEL: define <8 x i16> @test_vhaddq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 3930// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3931// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 3932// CHECK: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 3933// CHECK: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 3934// CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16> [[VHADDQ_V_I]], <8 x i16> [[VHADDQ_V1_I]]) #4 3935// CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8> 3936// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <8 x i16> 3937// CHECK: ret <8 x i16> [[TMP2]] 3938int16x8_t test_vhaddq_s16(int16x8_t a, int16x8_t b) { 3939 return vhaddq_s16(a, b); 3940} 3941 3942// CHECK-LABEL: define <4 x i32> @test_vhaddq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 3943// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 3944// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 3945// CHECK: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 3946// CHECK: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 3947// CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32> [[VHADDQ_V_I]], <4 x i32> [[VHADDQ_V1_I]]) #4 3948// CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8> 3949// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <4 x i32> 3950// CHECK: ret <4 x i32> [[TMP2]] 3951int32x4_t test_vhaddq_s32(int32x4_t a, int32x4_t b) { 3952 return vhaddq_s32(a, b); 3953} 3954 3955// CHECK-LABEL: define <16 x i8> @test_vhaddq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 3956// CHECK: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8> %a, <16 x i8> %b) #4 3957// CHECK: ret <16 x i8> [[VHADDQ_V_I]] 3958uint8x16_t test_vhaddq_u8(uint8x16_t a, uint8x16_t b) { 3959 return vhaddq_u8(a, b); 3960} 3961 3962// CHECK-LABEL: define <8 x i16> @test_vhaddq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 3963// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3964// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 3965// CHECK: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 3966// CHECK: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 3967// CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16> [[VHADDQ_V_I]], <8 x i16> [[VHADDQ_V1_I]]) #4 3968// CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8> 3969// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <8 x i16> 3970// CHECK: ret <8 x i16> [[TMP2]] 3971uint16x8_t test_vhaddq_u16(uint16x8_t a, uint16x8_t b) { 3972 return vhaddq_u16(a, b); 3973} 3974 3975// CHECK-LABEL: define <4 x i32> @test_vhaddq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 3976// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 3977// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 3978// CHECK: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 3979// CHECK: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 3980// CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32> [[VHADDQ_V_I]], <4 x i32> [[VHADDQ_V1_I]]) #4 3981// CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8> 3982// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <4 x i32> 3983// CHECK: ret <4 x i32> [[TMP2]] 3984uint32x4_t test_vhaddq_u32(uint32x4_t a, uint32x4_t b) { 3985 return vhaddq_u32(a, b); 3986} 3987 3988 3989// CHECK-LABEL: define <8 x i8> @test_vhsub_s8(<8 x i8> %a, <8 x i8> %b) #0 { 3990// CHECK: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhsubs.v8i8(<8 x i8> %a, <8 x i8> %b) #4 3991// CHECK: ret <8 x i8> [[VHSUB_V_I]] 3992int8x8_t test_vhsub_s8(int8x8_t a, int8x8_t b) { 3993 return vhsub_s8(a, b); 3994} 3995 3996// CHECK-LABEL: define <4 x i16> @test_vhsub_s16(<4 x i16> %a, <4 x i16> %b) #0 { 3997// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3998// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 3999// CHECK: [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 4000// CHECK: [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4001// CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhsubs.v4i16(<4 x i16> [[VHSUB_V_I]], <4 x i16> [[VHSUB_V1_I]]) #4 4002// CHECK: [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8> 4003// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <4 x i16> 4004// CHECK: ret <4 x i16> [[TMP2]] 4005int16x4_t test_vhsub_s16(int16x4_t a, int16x4_t b) { 4006 return vhsub_s16(a, b); 4007} 4008 4009// CHECK-LABEL: define <2 x i32> @test_vhsub_s32(<2 x i32> %a, <2 x i32> %b) #0 { 4010// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 4011// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 4012// CHECK: [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 4013// CHECK: [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 4014// CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhsubs.v2i32(<2 x i32> [[VHSUB_V_I]], <2 x i32> [[VHSUB_V1_I]]) #4 4015// CHECK: [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8> 4016// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <2 x i32> 4017// CHECK: ret <2 x i32> [[TMP2]] 4018int32x2_t test_vhsub_s32(int32x2_t a, int32x2_t b) { 4019 return vhsub_s32(a, b); 4020} 4021 4022// CHECK-LABEL: define <8 x i8> @test_vhsub_u8(<8 x i8> %a, <8 x i8> %b) #0 { 4023// CHECK: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhsubu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 4024// CHECK: ret <8 x i8> [[VHSUB_V_I]] 4025uint8x8_t test_vhsub_u8(uint8x8_t a, uint8x8_t b) { 4026 return vhsub_u8(a, b); 4027} 4028 4029// CHECK-LABEL: define <4 x i16> @test_vhsub_u16(<4 x i16> %a, <4 x i16> %b) #0 { 4030// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 4031// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 4032// CHECK: [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 4033// CHECK: [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4034// CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhsubu.v4i16(<4 x i16> [[VHSUB_V_I]], <4 x i16> [[VHSUB_V1_I]]) #4 4035// CHECK: [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8> 4036// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <4 x i16> 4037// CHECK: ret <4 x i16> [[TMP2]] 4038uint16x4_t test_vhsub_u16(uint16x4_t a, uint16x4_t b) { 4039 return vhsub_u16(a, b); 4040} 4041 4042// CHECK-LABEL: define <2 x i32> @test_vhsub_u32(<2 x i32> %a, <2 x i32> %b) #0 { 4043// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 4044// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 4045// CHECK: [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 4046// CHECK: [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 4047// CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhsubu.v2i32(<2 x i32> [[VHSUB_V_I]], <2 x i32> [[VHSUB_V1_I]]) #4 4048// CHECK: [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8> 4049// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <2 x i32> 4050// CHECK: ret <2 x i32> [[TMP2]] 4051uint32x2_t test_vhsub_u32(uint32x2_t a, uint32x2_t b) { 4052 return vhsub_u32(a, b); 4053} 4054 4055// CHECK-LABEL: define <16 x i8> @test_vhsubq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 4056// CHECK: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhsubs.v16i8(<16 x i8> %a, <16 x i8> %b) #4 4057// CHECK: ret <16 x i8> [[VHSUBQ_V_I]] 4058int8x16_t test_vhsubq_s8(int8x16_t a, int8x16_t b) { 4059 return vhsubq_s8(a, b); 4060} 4061 4062// CHECK-LABEL: define <8 x i16> @test_vhsubq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 4063// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 4064// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 4065// CHECK: [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 4066// CHECK: [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 4067// CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhsubs.v8i16(<8 x i16> [[VHSUBQ_V_I]], <8 x i16> [[VHSUBQ_V1_I]]) #4 4068// CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8> 4069// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <8 x i16> 4070// CHECK: ret <8 x i16> [[TMP2]] 4071int16x8_t test_vhsubq_s16(int16x8_t a, int16x8_t b) { 4072 return vhsubq_s16(a, b); 4073} 4074 4075// CHECK-LABEL: define <4 x i32> @test_vhsubq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 4076// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 4077// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 4078// CHECK: [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 4079// CHECK: [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 4080// CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhsubs.v4i32(<4 x i32> [[VHSUBQ_V_I]], <4 x i32> [[VHSUBQ_V1_I]]) #4 4081// CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8> 4082// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <4 x i32> 4083// CHECK: ret <4 x i32> [[TMP2]] 4084int32x4_t test_vhsubq_s32(int32x4_t a, int32x4_t b) { 4085 return vhsubq_s32(a, b); 4086} 4087 4088// CHECK-LABEL: define <16 x i8> @test_vhsubq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 4089// CHECK: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhsubu.v16i8(<16 x i8> %a, <16 x i8> %b) #4 4090// CHECK: ret <16 x i8> [[VHSUBQ_V_I]] 4091uint8x16_t test_vhsubq_u8(uint8x16_t a, uint8x16_t b) { 4092 return vhsubq_u8(a, b); 4093} 4094 4095// CHECK-LABEL: define <8 x i16> @test_vhsubq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 4096// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 4097// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 4098// CHECK: [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 4099// CHECK: [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 4100// CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhsubu.v8i16(<8 x i16> [[VHSUBQ_V_I]], <8 x i16> [[VHSUBQ_V1_I]]) #4 4101// CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8> 4102// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <8 x i16> 4103// CHECK: ret <8 x i16> [[TMP2]] 4104uint16x8_t test_vhsubq_u16(uint16x8_t a, uint16x8_t b) { 4105 return vhsubq_u16(a, b); 4106} 4107 4108// CHECK-LABEL: define <4 x i32> @test_vhsubq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 4109// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 4110// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 4111// CHECK: [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 4112// CHECK: [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 4113// CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhsubu.v4i32(<4 x i32> [[VHSUBQ_V_I]], <4 x i32> [[VHSUBQ_V1_I]]) #4 4114// CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8> 4115// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <4 x i32> 4116// CHECK: ret <4 x i32> [[TMP2]] 4117uint32x4_t test_vhsubq_u32(uint32x4_t a, uint32x4_t b) { 4118 return vhsubq_u32(a, b); 4119} 4120 4121 4122// CHECK-LABEL: define <16 x i8> @test_vld1q_u8(i8* %a) #0 { 4123// CHECK: [[VLD1:%.*]] = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* %a, i32 1) 4124// CHECK: ret <16 x i8> [[VLD1]] 4125uint8x16_t test_vld1q_u8(uint8_t const * a) { 4126 return vld1q_u8(a); 4127} 4128 4129// CHECK-LABEL: define <8 x i16> @test_vld1q_u16(i16* %a) #0 { 4130// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 4131// CHECK: [[VLD1:%.*]] = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* [[TMP0]], i32 2) 4132// CHECK: ret <8 x i16> [[VLD1]] 4133uint16x8_t test_vld1q_u16(uint16_t const * a) { 4134 return vld1q_u16(a); 4135} 4136 4137// CHECK-LABEL: define <4 x i32> @test_vld1q_u32(i32* %a) #0 { 4138// CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 4139// CHECK: [[VLD1:%.*]] = call <4 x i32> @llvm.arm.neon.vld1.v4i32.p0i8(i8* [[TMP0]], i32 4) 4140// CHECK: ret <4 x i32> [[VLD1]] 4141uint32x4_t test_vld1q_u32(uint32_t const * a) { 4142 return vld1q_u32(a); 4143} 4144 4145// CHECK-LABEL: define <2 x i64> @test_vld1q_u64(i64* %a) #0 { 4146// CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 4147// CHECK: [[VLD1:%.*]] = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8(i8* [[TMP0]], i32 4) 4148// CHECK: ret <2 x i64> [[VLD1]] 4149uint64x2_t test_vld1q_u64(uint64_t const * a) { 4150 return vld1q_u64(a); 4151} 4152 4153// CHECK-LABEL: define <16 x i8> @test_vld1q_s8(i8* %a) #0 { 4154// CHECK: [[VLD1:%.*]] = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* %a, i32 1) 4155// CHECK: ret <16 x i8> [[VLD1]] 4156int8x16_t test_vld1q_s8(int8_t const * a) { 4157 return vld1q_s8(a); 4158} 4159 4160// CHECK-LABEL: define <8 x i16> @test_vld1q_s16(i16* %a) #0 { 4161// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 4162// CHECK: [[VLD1:%.*]] = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* [[TMP0]], i32 2) 4163// CHECK: ret <8 x i16> [[VLD1]] 4164int16x8_t test_vld1q_s16(int16_t const * a) { 4165 return vld1q_s16(a); 4166} 4167 4168// CHECK-LABEL: define <4 x i32> @test_vld1q_s32(i32* %a) #0 { 4169// CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 4170// CHECK: [[VLD1:%.*]] = call <4 x i32> @llvm.arm.neon.vld1.v4i32.p0i8(i8* [[TMP0]], i32 4) 4171// CHECK: ret <4 x i32> [[VLD1]] 4172int32x4_t test_vld1q_s32(int32_t const * a) { 4173 return vld1q_s32(a); 4174} 4175 4176// CHECK-LABEL: define <2 x i64> @test_vld1q_s64(i64* %a) #0 { 4177// CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 4178// CHECK: [[VLD1:%.*]] = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8(i8* [[TMP0]], i32 4) 4179// CHECK: ret <2 x i64> [[VLD1]] 4180int64x2_t test_vld1q_s64(int64_t const * a) { 4181 return vld1q_s64(a); 4182} 4183 4184// CHECK-LABEL: define <8 x half> @test_vld1q_f16(half* %a) #0 { 4185// CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* 4186// CHECK: [[VLD1:%.*]] = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* [[TMP0]], i32 2) 4187// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[VLD1]] to <8 x half> 4188// CHECK: ret <8 x half> [[TMP1]] 4189float16x8_t test_vld1q_f16(float16_t const * a) { 4190 return vld1q_f16(a); 4191} 4192 4193// CHECK-LABEL: define <4 x float> @test_vld1q_f32(float* %a) #0 { 4194// CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* 4195// CHECK: [[VLD1:%.*]] = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* [[TMP0]], i32 4) 4196// CHECK: ret <4 x float> [[VLD1]] 4197float32x4_t test_vld1q_f32(float32_t const * a) { 4198 return vld1q_f32(a); 4199} 4200 4201// CHECK-LABEL: define <16 x i8> @test_vld1q_p8(i8* %a) #0 { 4202// CHECK: [[VLD1:%.*]] = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* %a, i32 1) 4203// CHECK: ret <16 x i8> [[VLD1]] 4204poly8x16_t test_vld1q_p8(poly8_t const * a) { 4205 return vld1q_p8(a); 4206} 4207 4208// CHECK-LABEL: define <8 x i16> @test_vld1q_p16(i16* %a) #0 { 4209// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 4210// CHECK: [[VLD1:%.*]] = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* [[TMP0]], i32 2) 4211// CHECK: ret <8 x i16> [[VLD1]] 4212poly16x8_t test_vld1q_p16(poly16_t const * a) { 4213 return vld1q_p16(a); 4214} 4215 4216// CHECK-LABEL: define <8 x i8> @test_vld1_u8(i8* %a) #0 { 4217// CHECK: [[VLD1:%.*]] = call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %a, i32 1) 4218// CHECK: ret <8 x i8> [[VLD1]] 4219uint8x8_t test_vld1_u8(uint8_t const * a) { 4220 return vld1_u8(a); 4221} 4222 4223// CHECK-LABEL: define <4 x i16> @test_vld1_u16(i16* %a) #0 { 4224// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 4225// CHECK: [[VLD1:%.*]] = call <4 x i16> @llvm.arm.neon.vld1.v4i16.p0i8(i8* [[TMP0]], i32 2) 4226// CHECK: ret <4 x i16> [[VLD1]] 4227uint16x4_t test_vld1_u16(uint16_t const * a) { 4228 return vld1_u16(a); 4229} 4230 4231// CHECK-LABEL: define <2 x i32> @test_vld1_u32(i32* %a) #0 { 4232// CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 4233// CHECK: [[VLD1:%.*]] = call <2 x i32> @llvm.arm.neon.vld1.v2i32.p0i8(i8* [[TMP0]], i32 4) 4234// CHECK: ret <2 x i32> [[VLD1]] 4235uint32x2_t test_vld1_u32(uint32_t const * a) { 4236 return vld1_u32(a); 4237} 4238 4239// CHECK-LABEL: define <1 x i64> @test_vld1_u64(i64* %a) #0 { 4240// CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 4241// CHECK: [[VLD1:%.*]] = call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* [[TMP0]], i32 4) 4242// CHECK: ret <1 x i64> [[VLD1]] 4243uint64x1_t test_vld1_u64(uint64_t const * a) { 4244 return vld1_u64(a); 4245} 4246 4247// CHECK-LABEL: define <8 x i8> @test_vld1_s8(i8* %a) #0 { 4248// CHECK: [[VLD1:%.*]] = call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %a, i32 1) 4249// CHECK: ret <8 x i8> [[VLD1]] 4250int8x8_t test_vld1_s8(int8_t const * a) { 4251 return vld1_s8(a); 4252} 4253 4254// CHECK-LABEL: define <4 x i16> @test_vld1_s16(i16* %a) #0 { 4255// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 4256// CHECK: [[VLD1:%.*]] = call <4 x i16> @llvm.arm.neon.vld1.v4i16.p0i8(i8* [[TMP0]], i32 2) 4257// CHECK: ret <4 x i16> [[VLD1]] 4258int16x4_t test_vld1_s16(int16_t const * a) { 4259 return vld1_s16(a); 4260} 4261 4262// CHECK-LABEL: define <2 x i32> @test_vld1_s32(i32* %a) #0 { 4263// CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 4264// CHECK: [[VLD1:%.*]] = call <2 x i32> @llvm.arm.neon.vld1.v2i32.p0i8(i8* [[TMP0]], i32 4) 4265// CHECK: ret <2 x i32> [[VLD1]] 4266int32x2_t test_vld1_s32(int32_t const * a) { 4267 return vld1_s32(a); 4268} 4269 4270// CHECK-LABEL: define <1 x i64> @test_vld1_s64(i64* %a) #0 { 4271// CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 4272// CHECK: [[VLD1:%.*]] = call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* [[TMP0]], i32 4) 4273// CHECK: ret <1 x i64> [[VLD1]] 4274int64x1_t test_vld1_s64(int64_t const * a) { 4275 return vld1_s64(a); 4276} 4277 4278// CHECK-LABEL: define <4 x half> @test_vld1_f16(half* %a) #0 { 4279// CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* 4280// CHECK: [[VLD1:%.*]] = call <4 x i16> @llvm.arm.neon.vld1.v4i16.p0i8(i8* [[TMP0]], i32 2) 4281// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VLD1]] to <4 x half> 4282// CHECK: ret <4 x half> [[TMP1]] 4283float16x4_t test_vld1_f16(float16_t const * a) { 4284 return vld1_f16(a); 4285} 4286 4287// CHECK-LABEL: define <2 x float> @test_vld1_f32(float* %a) #0 { 4288// CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* 4289// CHECK: [[VLD1:%.*]] = call <2 x float> @llvm.arm.neon.vld1.v2f32.p0i8(i8* [[TMP0]], i32 4) 4290// CHECK: ret <2 x float> [[VLD1]] 4291float32x2_t test_vld1_f32(float32_t const * a) { 4292 return vld1_f32(a); 4293} 4294 4295// CHECK-LABEL: define <8 x i8> @test_vld1_p8(i8* %a) #0 { 4296// CHECK: [[VLD1:%.*]] = call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %a, i32 1) 4297// CHECK: ret <8 x i8> [[VLD1]] 4298poly8x8_t test_vld1_p8(poly8_t const * a) { 4299 return vld1_p8(a); 4300} 4301 4302// CHECK-LABEL: define <4 x i16> @test_vld1_p16(i16* %a) #0 { 4303// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 4304// CHECK: [[VLD1:%.*]] = call <4 x i16> @llvm.arm.neon.vld1.v4i16.p0i8(i8* [[TMP0]], i32 2) 4305// CHECK: ret <4 x i16> [[VLD1]] 4306poly16x4_t test_vld1_p16(poly16_t const * a) { 4307 return vld1_p16(a); 4308} 4309 4310 4311// CHECK-LABEL: define <16 x i8> @test_vld1q_dup_u8(i8* %a) #0 { 4312// CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1 4313// CHECK: [[TMP1:%.*]] = insertelement <16 x i8> undef, i8 [[TMP0]], i32 0 4314// CHECK: [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer 4315// CHECK: ret <16 x i8> [[LANE]] 4316uint8x16_t test_vld1q_dup_u8(uint8_t const * a) { 4317 return vld1q_dup_u8(a); 4318} 4319 4320// CHECK-LABEL: define <8 x i16> @test_vld1q_dup_u16(i16* %a) #0 { 4321// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 4322// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16* 4323// CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 4324// CHECK: [[TMP3:%.*]] = insertelement <8 x i16> undef, i16 [[TMP2]], i32 0 4325// CHECK: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP3]], <8 x i32> zeroinitializer 4326// CHECK: ret <8 x i16> [[LANE]] 4327uint16x8_t test_vld1q_dup_u16(uint16_t const * a) { 4328 return vld1q_dup_u16(a); 4329} 4330 4331// CHECK-LABEL: define <4 x i32> @test_vld1q_dup_u32(i32* %a) #0 { 4332// CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 4333// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32* 4334// CHECK: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 4335// CHECK: [[TMP3:%.*]] = insertelement <4 x i32> undef, i32 [[TMP2]], i32 0 4336// CHECK: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP3]], <4 x i32> zeroinitializer 4337// CHECK: ret <4 x i32> [[LANE]] 4338uint32x4_t test_vld1q_dup_u32(uint32_t const * a) { 4339 return vld1q_dup_u32(a); 4340} 4341 4342// CHECK-LABEL: define <2 x i64> @test_vld1q_dup_u64(i64* %a) #0 { 4343// CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 4344// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i64* 4345// CHECK: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 4 4346// CHECK: [[TMP3:%.*]] = insertelement <2 x i64> undef, i64 [[TMP2]], i32 0 4347// CHECK: [[LANE:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP3]], <2 x i32> zeroinitializer 4348// CHECK: ret <2 x i64> [[LANE]] 4349uint64x2_t test_vld1q_dup_u64(uint64_t const * a) { 4350 return vld1q_dup_u64(a); 4351} 4352 4353// CHECK-LABEL: define <16 x i8> @test_vld1q_dup_s8(i8* %a) #0 { 4354// CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1 4355// CHECK: [[TMP1:%.*]] = insertelement <16 x i8> undef, i8 [[TMP0]], i32 0 4356// CHECK: [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer 4357// CHECK: ret <16 x i8> [[LANE]] 4358int8x16_t test_vld1q_dup_s8(int8_t const * a) { 4359 return vld1q_dup_s8(a); 4360} 4361 4362// CHECK-LABEL: define <8 x i16> @test_vld1q_dup_s16(i16* %a) #0 { 4363// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 4364// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16* 4365// CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 4366// CHECK: [[TMP3:%.*]] = insertelement <8 x i16> undef, i16 [[TMP2]], i32 0 4367// CHECK: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP3]], <8 x i32> zeroinitializer 4368// CHECK: ret <8 x i16> [[LANE]] 4369int16x8_t test_vld1q_dup_s16(int16_t const * a) { 4370 return vld1q_dup_s16(a); 4371} 4372 4373// CHECK-LABEL: define <4 x i32> @test_vld1q_dup_s32(i32* %a) #0 { 4374// CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 4375// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32* 4376// CHECK: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 4377// CHECK: [[TMP3:%.*]] = insertelement <4 x i32> undef, i32 [[TMP2]], i32 0 4378// CHECK: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP3]], <4 x i32> zeroinitializer 4379// CHECK: ret <4 x i32> [[LANE]] 4380int32x4_t test_vld1q_dup_s32(int32_t const * a) { 4381 return vld1q_dup_s32(a); 4382} 4383 4384// CHECK-LABEL: define <2 x i64> @test_vld1q_dup_s64(i64* %a) #0 { 4385// CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 4386// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i64* 4387// CHECK: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 4 4388// CHECK: [[TMP3:%.*]] = insertelement <2 x i64> undef, i64 [[TMP2]], i32 0 4389// CHECK: [[LANE:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP3]], <2 x i32> zeroinitializer 4390// CHECK: ret <2 x i64> [[LANE]] 4391int64x2_t test_vld1q_dup_s64(int64_t const * a) { 4392 return vld1q_dup_s64(a); 4393} 4394 4395// CHECK-LABEL: define <8 x half> @test_vld1q_dup_f16(half* %a) #0 { 4396// CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* 4397// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16* 4398// CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 4399// CHECK: [[TMP3:%.*]] = insertelement <8 x i16> undef, i16 [[TMP2]], i32 0 4400// CHECK: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP3]], <8 x i32> zeroinitializer 4401// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[LANE]] to <8 x half> 4402// CHECK: ret <8 x half> [[TMP4]] 4403float16x8_t test_vld1q_dup_f16(float16_t const * a) { 4404 return vld1q_dup_f16(a); 4405} 4406 4407// CHECK-LABEL: define <4 x float> @test_vld1q_dup_f32(float* %a) #0 { 4408// CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* 4409// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to float* 4410// CHECK: [[TMP2:%.*]] = load float, float* [[TMP1]], align 4 4411// CHECK: [[TMP3:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0 4412// CHECK: [[LANE:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP3]], <4 x i32> zeroinitializer 4413// CHECK: ret <4 x float> [[LANE]] 4414float32x4_t test_vld1q_dup_f32(float32_t const * a) { 4415 return vld1q_dup_f32(a); 4416} 4417 4418// CHECK-LABEL: define <16 x i8> @test_vld1q_dup_p8(i8* %a) #0 { 4419// CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1 4420// CHECK: [[TMP1:%.*]] = insertelement <16 x i8> undef, i8 [[TMP0]], i32 0 4421// CHECK: [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer 4422// CHECK: ret <16 x i8> [[LANE]] 4423poly8x16_t test_vld1q_dup_p8(poly8_t const * a) { 4424 return vld1q_dup_p8(a); 4425} 4426 4427// CHECK-LABEL: define <8 x i16> @test_vld1q_dup_p16(i16* %a) #0 { 4428// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 4429// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16* 4430// CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 4431// CHECK: [[TMP3:%.*]] = insertelement <8 x i16> undef, i16 [[TMP2]], i32 0 4432// CHECK: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP3]], <8 x i32> zeroinitializer 4433// CHECK: ret <8 x i16> [[LANE]] 4434poly16x8_t test_vld1q_dup_p16(poly16_t const * a) { 4435 return vld1q_dup_p16(a); 4436} 4437 4438// CHECK-LABEL: define <8 x i8> @test_vld1_dup_u8(i8* %a) #0 { 4439// CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1 4440// CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 [[TMP0]], i32 0 4441// CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer 4442// CHECK: ret <8 x i8> [[LANE]] 4443uint8x8_t test_vld1_dup_u8(uint8_t const * a) { 4444 return vld1_dup_u8(a); 4445} 4446 4447// CHECK-LABEL: define <4 x i16> @test_vld1_dup_u16(i16* %a) #0 { 4448// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 4449// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16* 4450// CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 4451// CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[TMP2]], i32 0 4452// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> [[TMP3]], <4 x i32> zeroinitializer 4453// CHECK: ret <4 x i16> [[LANE]] 4454uint16x4_t test_vld1_dup_u16(uint16_t const * a) { 4455 return vld1_dup_u16(a); 4456} 4457 4458// CHECK-LABEL: define <2 x i32> @test_vld1_dup_u32(i32* %a) #0 { 4459// CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 4460// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32* 4461// CHECK: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 4462// CHECK: [[TMP3:%.*]] = insertelement <2 x i32> undef, i32 [[TMP2]], i32 0 4463// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP3]], <2 x i32> zeroinitializer 4464// CHECK: ret <2 x i32> [[LANE]] 4465uint32x2_t test_vld1_dup_u32(uint32_t const * a) { 4466 return vld1_dup_u32(a); 4467} 4468 4469// CHECK-LABEL: define <1 x i64> @test_vld1_dup_u64(i64* %a) #0 { 4470// CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 4471// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i64* 4472// CHECK: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 4 4473// CHECK: [[TMP3:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0 4474// CHECK: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP3]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer 4475// CHECK: ret <1 x i64> [[LANE]] 4476uint64x1_t test_vld1_dup_u64(uint64_t const * a) { 4477 return vld1_dup_u64(a); 4478} 4479 4480// CHECK-LABEL: define <8 x i8> @test_vld1_dup_s8(i8* %a) #0 { 4481// CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1 4482// CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 [[TMP0]], i32 0 4483// CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer 4484// CHECK: ret <8 x i8> [[LANE]] 4485int8x8_t test_vld1_dup_s8(int8_t const * a) { 4486 return vld1_dup_s8(a); 4487} 4488 4489// CHECK-LABEL: define <4 x i16> @test_vld1_dup_s16(i16* %a) #0 { 4490// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 4491// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16* 4492// CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 4493// CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[TMP2]], i32 0 4494// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> [[TMP3]], <4 x i32> zeroinitializer 4495// CHECK: ret <4 x i16> [[LANE]] 4496int16x4_t test_vld1_dup_s16(int16_t const * a) { 4497 return vld1_dup_s16(a); 4498} 4499 4500// CHECK-LABEL: define <2 x i32> @test_vld1_dup_s32(i32* %a) #0 { 4501// CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 4502// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32* 4503// CHECK: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 4504// CHECK: [[TMP3:%.*]] = insertelement <2 x i32> undef, i32 [[TMP2]], i32 0 4505// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP3]], <2 x i32> zeroinitializer 4506// CHECK: ret <2 x i32> [[LANE]] 4507int32x2_t test_vld1_dup_s32(int32_t const * a) { 4508 return vld1_dup_s32(a); 4509} 4510 4511// CHECK-LABEL: define <1 x i64> @test_vld1_dup_s64(i64* %a) #0 { 4512// CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 4513// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i64* 4514// CHECK: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 4 4515// CHECK: [[TMP3:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0 4516// CHECK: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP3]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer 4517// CHECK: ret <1 x i64> [[LANE]] 4518int64x1_t test_vld1_dup_s64(int64_t const * a) { 4519 return vld1_dup_s64(a); 4520} 4521 4522// CHECK-LABEL: define <4 x half> @test_vld1_dup_f16(half* %a) #0 { 4523// CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* 4524// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16* 4525// CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 4526// CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[TMP2]], i32 0 4527// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> [[TMP3]], <4 x i32> zeroinitializer 4528// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <4 x half> 4529// CHECK: ret <4 x half> [[TMP4]] 4530float16x4_t test_vld1_dup_f16(float16_t const * a) { 4531 return vld1_dup_f16(a); 4532} 4533 4534// CHECK-LABEL: define <2 x float> @test_vld1_dup_f32(float* %a) #0 { 4535// CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* 4536// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to float* 4537// CHECK: [[TMP2:%.*]] = load float, float* [[TMP1]], align 4 4538// CHECK: [[TMP3:%.*]] = insertelement <2 x float> undef, float [[TMP2]], i32 0 4539// CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <2 x i32> zeroinitializer 4540// CHECK: ret <2 x float> [[LANE]] 4541float32x2_t test_vld1_dup_f32(float32_t const * a) { 4542 return vld1_dup_f32(a); 4543} 4544 4545// CHECK-LABEL: define <8 x i8> @test_vld1_dup_p8(i8* %a) #0 { 4546// CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1 4547// CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 [[TMP0]], i32 0 4548// CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer 4549// CHECK: ret <8 x i8> [[LANE]] 4550poly8x8_t test_vld1_dup_p8(poly8_t const * a) { 4551 return vld1_dup_p8(a); 4552} 4553 4554// CHECK-LABEL: define <4 x i16> @test_vld1_dup_p16(i16* %a) #0 { 4555// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 4556// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16* 4557// CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 4558// CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[TMP2]], i32 0 4559// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> [[TMP3]], <4 x i32> zeroinitializer 4560// CHECK: ret <4 x i16> [[LANE]] 4561poly16x4_t test_vld1_dup_p16(poly16_t const * a) { 4562 return vld1_dup_p16(a); 4563} 4564 4565 4566// CHECK-LABEL: define <16 x i8> @test_vld1q_lane_u8(i8* %a, <16 x i8> %b) #0 { 4567// CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1 4568// CHECK: [[VLD1_LANE:%.*]] = insertelement <16 x i8> %b, i8 [[TMP0]], i32 15 4569// CHECK: ret <16 x i8> [[VLD1_LANE]] 4570uint8x16_t test_vld1q_lane_u8(uint8_t const * a, uint8x16_t b) { 4571 return vld1q_lane_u8(a, b, 15); 4572} 4573 4574// CHECK-LABEL: define <8 x i16> @test_vld1q_lane_u16(i16* %a, <8 x i16> %b) #0 { 4575// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 4576// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 4577// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 4578// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16* 4579// CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 2 4580// CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP4]], i32 7 4581// CHECK: ret <8 x i16> [[VLD1_LANE]] 4582uint16x8_t test_vld1q_lane_u16(uint16_t const * a, uint16x8_t b) { 4583 return vld1q_lane_u16(a, b, 7); 4584} 4585 4586// CHECK-LABEL: define <4 x i32> @test_vld1q_lane_u32(i32* %a, <4 x i32> %b) #0 { 4587// CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 4588// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 4589// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 4590// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i32* 4591// CHECK: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 4592// CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[TMP4]], i32 3 4593// CHECK: ret <4 x i32> [[VLD1_LANE]] 4594uint32x4_t test_vld1q_lane_u32(uint32_t const * a, uint32x4_t b) { 4595 return vld1q_lane_u32(a, b, 3); 4596} 4597 4598// CHECK-LABEL: define <2 x i64> @test_vld1q_lane_u64(i64* %a, <2 x i64> %b) #0 { 4599// CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 4600// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 4601// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 4602// CHECK: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP2]], <1 x i32> zeroinitializer 4603// CHECK: [[TMP4:%.*]] = call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* [[TMP0]], i32 4) 4604// CHECK: [[VLD1Q_LANE:%.*]] = shufflevector <1 x i64> [[TMP3]], <1 x i64> [[TMP4]], <2 x i32> <i32 0, i32 1> 4605// CHECK: ret <2 x i64> [[VLD1Q_LANE]] 4606uint64x2_t test_vld1q_lane_u64(uint64_t const * a, uint64x2_t b) { 4607 return vld1q_lane_u64(a, b, 1); 4608} 4609 4610// CHECK-LABEL: define <16 x i8> @test_vld1q_lane_s8(i8* %a, <16 x i8> %b) #0 { 4611// CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1 4612// CHECK: [[VLD1_LANE:%.*]] = insertelement <16 x i8> %b, i8 [[TMP0]], i32 15 4613// CHECK: ret <16 x i8> [[VLD1_LANE]] 4614int8x16_t test_vld1q_lane_s8(int8_t const * a, int8x16_t b) { 4615 return vld1q_lane_s8(a, b, 15); 4616} 4617 4618// CHECK-LABEL: define <8 x i16> @test_vld1q_lane_s16(i16* %a, <8 x i16> %b) #0 { 4619// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 4620// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 4621// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 4622// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16* 4623// CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 2 4624// CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP4]], i32 7 4625// CHECK: ret <8 x i16> [[VLD1_LANE]] 4626int16x8_t test_vld1q_lane_s16(int16_t const * a, int16x8_t b) { 4627 return vld1q_lane_s16(a, b, 7); 4628} 4629 4630// CHECK-LABEL: define <4 x i32> @test_vld1q_lane_s32(i32* %a, <4 x i32> %b) #0 { 4631// CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 4632// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 4633// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 4634// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i32* 4635// CHECK: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 4636// CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[TMP4]], i32 3 4637// CHECK: ret <4 x i32> [[VLD1_LANE]] 4638int32x4_t test_vld1q_lane_s32(int32_t const * a, int32x4_t b) { 4639 return vld1q_lane_s32(a, b, 3); 4640} 4641 4642// CHECK-LABEL: define <2 x i64> @test_vld1q_lane_s64(i64* %a, <2 x i64> %b) #0 { 4643// CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 4644// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 4645// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 4646// CHECK: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP2]], <1 x i32> zeroinitializer 4647// CHECK: [[TMP4:%.*]] = call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* [[TMP0]], i32 4) 4648// CHECK: [[VLD1Q_LANE:%.*]] = shufflevector <1 x i64> [[TMP3]], <1 x i64> [[TMP4]], <2 x i32> <i32 0, i32 1> 4649// CHECK: ret <2 x i64> [[VLD1Q_LANE]] 4650int64x2_t test_vld1q_lane_s64(int64_t const * a, int64x2_t b) { 4651 return vld1q_lane_s64(a, b, 1); 4652} 4653 4654// CHECK-LABEL: define <8 x half> @test_vld1q_lane_f16(half* %a, <8 x half> %b) #0 { 4655// CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* 4656// CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8> 4657// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 4658// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16* 4659// CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 2 4660// CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP4]], i32 7 4661// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[VLD1_LANE]] to <8 x half> 4662// CHECK: ret <8 x half> [[TMP5]] 4663float16x8_t test_vld1q_lane_f16(float16_t const * a, float16x8_t b) { 4664 return vld1q_lane_f16(a, b, 7); 4665} 4666 4667// CHECK-LABEL: define <4 x float> @test_vld1q_lane_f32(float* %a, <4 x float> %b) #0 { 4668// CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* 4669// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 4670// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 4671// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to float* 4672// CHECK: [[TMP4:%.*]] = load float, float* [[TMP3]], align 4 4673// CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x float> [[TMP2]], float [[TMP4]], i32 3 4674// CHECK: ret <4 x float> [[VLD1_LANE]] 4675float32x4_t test_vld1q_lane_f32(float32_t const * a, float32x4_t b) { 4676 return vld1q_lane_f32(a, b, 3); 4677} 4678 4679// CHECK-LABEL: define <16 x i8> @test_vld1q_lane_p8(i8* %a, <16 x i8> %b) #0 { 4680// CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1 4681// CHECK: [[VLD1_LANE:%.*]] = insertelement <16 x i8> %b, i8 [[TMP0]], i32 15 4682// CHECK: ret <16 x i8> [[VLD1_LANE]] 4683poly8x16_t test_vld1q_lane_p8(poly8_t const * a, poly8x16_t b) { 4684 return vld1q_lane_p8(a, b, 15); 4685} 4686 4687// CHECK-LABEL: define <8 x i16> @test_vld1q_lane_p16(i16* %a, <8 x i16> %b) #0 { 4688// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 4689// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 4690// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 4691// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16* 4692// CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 2 4693// CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP4]], i32 7 4694// CHECK: ret <8 x i16> [[VLD1_LANE]] 4695poly16x8_t test_vld1q_lane_p16(poly16_t const * a, poly16x8_t b) { 4696 return vld1q_lane_p16(a, b, 7); 4697} 4698 4699// CHECK-LABEL: define <8 x i8> @test_vld1_lane_u8(i8* %a, <8 x i8> %b) #0 { 4700// CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1 4701// CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i8> %b, i8 [[TMP0]], i32 7 4702// CHECK: ret <8 x i8> [[VLD1_LANE]] 4703uint8x8_t test_vld1_lane_u8(uint8_t const * a, uint8x8_t b) { 4704 return vld1_lane_u8(a, b, 7); 4705} 4706 4707// CHECK-LABEL: define <4 x i16> @test_vld1_lane_u16(i16* %a, <4 x i16> %b) #0 { 4708// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 4709// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 4710// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4711// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16* 4712// CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 2 4713// CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[TMP4]], i32 3 4714// CHECK: ret <4 x i16> [[VLD1_LANE]] 4715uint16x4_t test_vld1_lane_u16(uint16_t const * a, uint16x4_t b) { 4716 return vld1_lane_u16(a, b, 3); 4717} 4718 4719// CHECK-LABEL: define <2 x i32> @test_vld1_lane_u32(i32* %a, <2 x i32> %b) #0 { 4720// CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 4721// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 4722// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 4723// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i32* 4724// CHECK: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 4725// CHECK: [[VLD1_LANE:%.*]] = insertelement <2 x i32> [[TMP2]], i32 [[TMP4]], i32 1 4726// CHECK: ret <2 x i32> [[VLD1_LANE]] 4727uint32x2_t test_vld1_lane_u32(uint32_t const * a, uint32x2_t b) { 4728 return vld1_lane_u32(a, b, 1); 4729} 4730 4731// CHECK-LABEL: define <1 x i64> @test_vld1_lane_u64(i64* %a, <1 x i64> %b) #0 { 4732// CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 4733// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 4734// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 4735// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i64* 4736// CHECK: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 4 4737// CHECK: [[VLD1_LANE:%.*]] = insertelement <1 x i64> [[TMP2]], i64 [[TMP4]], i32 0 4738// CHECK: ret <1 x i64> [[VLD1_LANE]] 4739uint64x1_t test_vld1_lane_u64(uint64_t const * a, uint64x1_t b) { 4740 return vld1_lane_u64(a, b, 0); 4741} 4742 4743// CHECK-LABEL: define <8 x i8> @test_vld1_lane_s8(i8* %a, <8 x i8> %b) #0 { 4744// CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1 4745// CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i8> %b, i8 [[TMP0]], i32 7 4746// CHECK: ret <8 x i8> [[VLD1_LANE]] 4747int8x8_t test_vld1_lane_s8(int8_t const * a, int8x8_t b) { 4748 return vld1_lane_s8(a, b, 7); 4749} 4750 4751// CHECK-LABEL: define <4 x i16> @test_vld1_lane_s16(i16* %a, <4 x i16> %b) #0 { 4752// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 4753// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 4754// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4755// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16* 4756// CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 2 4757// CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[TMP4]], i32 3 4758// CHECK: ret <4 x i16> [[VLD1_LANE]] 4759int16x4_t test_vld1_lane_s16(int16_t const * a, int16x4_t b) { 4760 return vld1_lane_s16(a, b, 3); 4761} 4762 4763// CHECK-LABEL: define <2 x i32> @test_vld1_lane_s32(i32* %a, <2 x i32> %b) #0 { 4764// CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 4765// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 4766// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 4767// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i32* 4768// CHECK: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 4769// CHECK: [[VLD1_LANE:%.*]] = insertelement <2 x i32> [[TMP2]], i32 [[TMP4]], i32 1 4770// CHECK: ret <2 x i32> [[VLD1_LANE]] 4771int32x2_t test_vld1_lane_s32(int32_t const * a, int32x2_t b) { 4772 return vld1_lane_s32(a, b, 1); 4773} 4774 4775// CHECK-LABEL: define <1 x i64> @test_vld1_lane_s64(i64* %a, <1 x i64> %b) #0 { 4776// CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 4777// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 4778// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 4779// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i64* 4780// CHECK: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 4 4781// CHECK: [[VLD1_LANE:%.*]] = insertelement <1 x i64> [[TMP2]], i64 [[TMP4]], i32 0 4782// CHECK: ret <1 x i64> [[VLD1_LANE]] 4783int64x1_t test_vld1_lane_s64(int64_t const * a, int64x1_t b) { 4784 return vld1_lane_s64(a, b, 0); 4785} 4786 4787// CHECK-LABEL: define <4 x half> @test_vld1_lane_f16(half* %a, <4 x half> %b) #0 { 4788// CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* 4789// CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8> 4790// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4791// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16* 4792// CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 2 4793// CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[TMP4]], i32 3 4794// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[VLD1_LANE]] to <4 x half> 4795// CHECK: ret <4 x half> [[TMP5]] 4796float16x4_t test_vld1_lane_f16(float16_t const * a, float16x4_t b) { 4797 return vld1_lane_f16(a, b, 3); 4798} 4799 4800// CHECK-LABEL: define <2 x float> @test_vld1_lane_f32(float* %a, <2 x float> %b) #0 { 4801// CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* 4802// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 4803// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 4804// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to float* 4805// CHECK: [[TMP4:%.*]] = load float, float* [[TMP3]], align 4 4806// CHECK: [[VLD1_LANE:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP4]], i32 1 4807// CHECK: ret <2 x float> [[VLD1_LANE]] 4808float32x2_t test_vld1_lane_f32(float32_t const * a, float32x2_t b) { 4809 return vld1_lane_f32(a, b, 1); 4810} 4811 4812// CHECK-LABEL: define <8 x i8> @test_vld1_lane_p8(i8* %a, <8 x i8> %b) #0 { 4813// CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1 4814// CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i8> %b, i8 [[TMP0]], i32 7 4815// CHECK: ret <8 x i8> [[VLD1_LANE]] 4816poly8x8_t test_vld1_lane_p8(poly8_t const * a, poly8x8_t b) { 4817 return vld1_lane_p8(a, b, 7); 4818} 4819 4820// CHECK-LABEL: define <4 x i16> @test_vld1_lane_p16(i16* %a, <4 x i16> %b) #0 { 4821// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 4822// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 4823// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4824// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16* 4825// CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 2 4826// CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[TMP4]], i32 3 4827// CHECK: ret <4 x i16> [[VLD1_LANE]] 4828poly16x4_t test_vld1_lane_p16(poly16_t const * a, poly16x4_t b) { 4829 return vld1_lane_p16(a, b, 3); 4830} 4831 4832 4833// CHECK-LABEL: define void @test_vld2q_u8(%struct.uint8x16x2_t* noalias sret %agg.result, i8* %a) #0 { 4834// CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align 16 4835// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8* 4836// CHECK: [[VLD2Q_V:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8.p0i8(i8* %a, i32 1) 4837// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }* 4838// CHECK: store { <16 x i8>, <16 x i8> } [[VLD2Q_V]], { <16 x i8>, <16 x i8> }* [[TMP1]] 4839// CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x2_t* %agg.result to i8* 4840// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8* 4841// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 32, i32 16, i1 false) 4842// CHECK: ret void 4843uint8x16x2_t test_vld2q_u8(uint8_t const * a) { 4844 return vld2q_u8(a); 4845} 4846 4847// CHECK-LABEL: define void @test_vld2q_u16(%struct.uint16x8x2_t* noalias sret %agg.result, i16* %a) #0 { 4848// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16 4849// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8* 4850// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 4851// CHECK: [[VLD2Q_V:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2.v8i16.p0i8(i8* [[TMP1]], i32 2) 4852// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }* 4853// CHECK: store { <8 x i16>, <8 x i16> } [[VLD2Q_V]], { <8 x i16>, <8 x i16> }* [[TMP2]] 4854// CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x8x2_t* %agg.result to i8* 4855// CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8* 4856// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) 4857// CHECK: ret void 4858uint16x8x2_t test_vld2q_u16(uint16_t const * a) { 4859 return vld2q_u16(a); 4860} 4861 4862// CHECK-LABEL: define void @test_vld2q_u32(%struct.uint32x4x2_t* noalias sret %agg.result, i32* %a) #0 { 4863// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16 4864// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8* 4865// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 4866// CHECK: [[VLD2Q_V:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8(i8* [[TMP1]], i32 4) 4867// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }* 4868// CHECK: store { <4 x i32>, <4 x i32> } [[VLD2Q_V]], { <4 x i32>, <4 x i32> }* [[TMP2]] 4869// CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x4x2_t* %agg.result to i8* 4870// CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8* 4871// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) 4872// CHECK: ret void 4873uint32x4x2_t test_vld2q_u32(uint32_t const * a) { 4874 return vld2q_u32(a); 4875} 4876 4877// CHECK-LABEL: define void @test_vld2q_s8(%struct.int8x16x2_t* noalias sret %agg.result, i8* %a) #0 { 4878// CHECK: [[__RET:%.*]] = alloca %struct.int8x16x2_t, align 16 4879// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8* 4880// CHECK: [[VLD2Q_V:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8.p0i8(i8* %a, i32 1) 4881// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }* 4882// CHECK: store { <16 x i8>, <16 x i8> } [[VLD2Q_V]], { <16 x i8>, <16 x i8> }* [[TMP1]] 4883// CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x2_t* %agg.result to i8* 4884// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8* 4885// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 32, i32 16, i1 false) 4886// CHECK: ret void 4887int8x16x2_t test_vld2q_s8(int8_t const * a) { 4888 return vld2q_s8(a); 4889} 4890 4891// CHECK-LABEL: define void @test_vld2q_s16(%struct.int16x8x2_t* noalias sret %agg.result, i16* %a) #0 { 4892// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16 4893// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8* 4894// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 4895// CHECK: [[VLD2Q_V:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2.v8i16.p0i8(i8* [[TMP1]], i32 2) 4896// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }* 4897// CHECK: store { <8 x i16>, <8 x i16> } [[VLD2Q_V]], { <8 x i16>, <8 x i16> }* [[TMP2]] 4898// CHECK: [[TMP3:%.*]] = bitcast %struct.int16x8x2_t* %agg.result to i8* 4899// CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8* 4900// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) 4901// CHECK: ret void 4902int16x8x2_t test_vld2q_s16(int16_t const * a) { 4903 return vld2q_s16(a); 4904} 4905 4906// CHECK-LABEL: define void @test_vld2q_s32(%struct.int32x4x2_t* noalias sret %agg.result, i32* %a) #0 { 4907// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16 4908// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8* 4909// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 4910// CHECK: [[VLD2Q_V:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8(i8* [[TMP1]], i32 4) 4911// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }* 4912// CHECK: store { <4 x i32>, <4 x i32> } [[VLD2Q_V]], { <4 x i32>, <4 x i32> }* [[TMP2]] 4913// CHECK: [[TMP3:%.*]] = bitcast %struct.int32x4x2_t* %agg.result to i8* 4914// CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8* 4915// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) 4916// CHECK: ret void 4917int32x4x2_t test_vld2q_s32(int32_t const * a) { 4918 return vld2q_s32(a); 4919} 4920 4921// CHECK-LABEL: define void @test_vld2q_f16(%struct.float16x8x2_t* noalias sret %agg.result, half* %a) #0 { 4922// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16 4923// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8* 4924// CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* 4925// CHECK: [[VLD2Q_V:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2.v8i16.p0i8(i8* [[TMP1]], i32 2) 4926// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }* 4927// CHECK: store { <8 x i16>, <8 x i16> } [[VLD2Q_V]], { <8 x i16>, <8 x i16> }* [[TMP2]] 4928// CHECK: [[TMP3:%.*]] = bitcast %struct.float16x8x2_t* %agg.result to i8* 4929// CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8* 4930// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) 4931// CHECK: ret void 4932float16x8x2_t test_vld2q_f16(float16_t const * a) { 4933 return vld2q_f16(a); 4934} 4935 4936// CHECK-LABEL: define void @test_vld2q_f32(%struct.float32x4x2_t* noalias sret %agg.result, float* %a) #0 { 4937// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16 4938// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8* 4939// CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* 4940// CHECK: [[VLD2Q_V:%.*]] = call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32.p0i8(i8* [[TMP1]], i32 4) 4941// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float> }* 4942// CHECK: store { <4 x float>, <4 x float> } [[VLD2Q_V]], { <4 x float>, <4 x float> }* [[TMP2]] 4943// CHECK: [[TMP3:%.*]] = bitcast %struct.float32x4x2_t* %agg.result to i8* 4944// CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8* 4945// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) 4946// CHECK: ret void 4947float32x4x2_t test_vld2q_f32(float32_t const * a) { 4948 return vld2q_f32(a); 4949} 4950 4951// CHECK-LABEL: define void @test_vld2q_p8(%struct.poly8x16x2_t* noalias sret %agg.result, i8* %a) #0 { 4952// CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align 16 4953// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8* 4954// CHECK: [[VLD2Q_V:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8.p0i8(i8* %a, i32 1) 4955// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }* 4956// CHECK: store { <16 x i8>, <16 x i8> } [[VLD2Q_V]], { <16 x i8>, <16 x i8> }* [[TMP1]] 4957// CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x2_t* %agg.result to i8* 4958// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8* 4959// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 32, i32 16, i1 false) 4960// CHECK: ret void 4961poly8x16x2_t test_vld2q_p8(poly8_t const * a) { 4962 return vld2q_p8(a); 4963} 4964 4965// CHECK-LABEL: define void @test_vld2q_p16(%struct.poly16x8x2_t* noalias sret %agg.result, i16* %a) #0 { 4966// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16 4967// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8* 4968// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 4969// CHECK: [[VLD2Q_V:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2.v8i16.p0i8(i8* [[TMP1]], i32 2) 4970// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }* 4971// CHECK: store { <8 x i16>, <8 x i16> } [[VLD2Q_V]], { <8 x i16>, <8 x i16> }* [[TMP2]] 4972// CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x8x2_t* %agg.result to i8* 4973// CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8* 4974// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) 4975// CHECK: ret void 4976poly16x8x2_t test_vld2q_p16(poly16_t const * a) { 4977 return vld2q_p16(a); 4978} 4979 4980// CHECK-LABEL: define void @test_vld2_u8(%struct.uint8x8x2_t* noalias sret %agg.result, i8* %a) #0 { 4981// CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8 4982// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8* 4983// CHECK: [[VLD2_V:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8.p0i8(i8* %a, i32 1) 4984// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }* 4985// CHECK: store { <8 x i8>, <8 x i8> } [[VLD2_V]], { <8 x i8>, <8 x i8> }* [[TMP1]] 4986// CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* %agg.result to i8* 4987// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8* 4988// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 16, i32 8, i1 false) 4989// CHECK: ret void 4990uint8x8x2_t test_vld2_u8(uint8_t const * a) { 4991 return vld2_u8(a); 4992} 4993 4994// CHECK-LABEL: define void @test_vld2_u16(%struct.uint16x4x2_t* noalias sret %agg.result, i16* %a) #0 { 4995// CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8 4996// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8* 4997// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 4998// CHECK: [[VLD2_V:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2.v4i16.p0i8(i8* [[TMP1]], i32 2) 4999// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* 5000// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_V]], { <4 x i16>, <4 x i16> }* [[TMP2]] 5001// CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x4x2_t* %agg.result to i8* 5002// CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8* 5003// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) 5004// CHECK: ret void 5005uint16x4x2_t test_vld2_u16(uint16_t const * a) { 5006 return vld2_u16(a); 5007} 5008 5009// CHECK-LABEL: define void @test_vld2_u32(%struct.uint32x2x2_t* noalias sret %agg.result, i32* %a) #0 { 5010// CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8 5011// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8* 5012// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 5013// CHECK: [[VLD2_V:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2.v2i32.p0i8(i8* [[TMP1]], i32 4) 5014// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }* 5015// CHECK: store { <2 x i32>, <2 x i32> } [[VLD2_V]], { <2 x i32>, <2 x i32> }* [[TMP2]] 5016// CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x2x2_t* %agg.result to i8* 5017// CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8* 5018// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) 5019// CHECK: ret void 5020uint32x2x2_t test_vld2_u32(uint32_t const * a) { 5021 return vld2_u32(a); 5022} 5023 5024// CHECK-LABEL: define void @test_vld2_u64(%struct.uint64x1x2_t* noalias sret %agg.result, i64* %a) #0 { 5025// CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8 5026// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8* 5027// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 5028// CHECK: [[VLD2_V:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64.p0i8(i8* [[TMP1]], i32 4) 5029// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }* 5030// CHECK: store { <1 x i64>, <1 x i64> } [[VLD2_V]], { <1 x i64>, <1 x i64> }* [[TMP2]] 5031// CHECK: [[TMP3:%.*]] = bitcast %struct.uint64x1x2_t* %agg.result to i8* 5032// CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8* 5033// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) 5034// CHECK: ret void 5035uint64x1x2_t test_vld2_u64(uint64_t const * a) { 5036 return vld2_u64(a); 5037} 5038 5039// CHECK-LABEL: define void @test_vld2_s8(%struct.int8x8x2_t* noalias sret %agg.result, i8* %a) #0 { 5040// CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8 5041// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8* 5042// CHECK: [[VLD2_V:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8.p0i8(i8* %a, i32 1) 5043// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }* 5044// CHECK: store { <8 x i8>, <8 x i8> } [[VLD2_V]], { <8 x i8>, <8 x i8> }* [[TMP1]] 5045// CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* %agg.result to i8* 5046// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8* 5047// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 16, i32 8, i1 false) 5048// CHECK: ret void 5049int8x8x2_t test_vld2_s8(int8_t const * a) { 5050 return vld2_s8(a); 5051} 5052 5053// CHECK-LABEL: define void @test_vld2_s16(%struct.int16x4x2_t* noalias sret %agg.result, i16* %a) #0 { 5054// CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8 5055// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8* 5056// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 5057// CHECK: [[VLD2_V:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2.v4i16.p0i8(i8* [[TMP1]], i32 2) 5058// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* 5059// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_V]], { <4 x i16>, <4 x i16> }* [[TMP2]] 5060// CHECK: [[TMP3:%.*]] = bitcast %struct.int16x4x2_t* %agg.result to i8* 5061// CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8* 5062// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) 5063// CHECK: ret void 5064int16x4x2_t test_vld2_s16(int16_t const * a) { 5065 return vld2_s16(a); 5066} 5067 5068// CHECK-LABEL: define void @test_vld2_s32(%struct.int32x2x2_t* noalias sret %agg.result, i32* %a) #0 { 5069// CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8 5070// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8* 5071// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 5072// CHECK: [[VLD2_V:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2.v2i32.p0i8(i8* [[TMP1]], i32 4) 5073// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }* 5074// CHECK: store { <2 x i32>, <2 x i32> } [[VLD2_V]], { <2 x i32>, <2 x i32> }* [[TMP2]] 5075// CHECK: [[TMP3:%.*]] = bitcast %struct.int32x2x2_t* %agg.result to i8* 5076// CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8* 5077// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) 5078// CHECK: ret void 5079int32x2x2_t test_vld2_s32(int32_t const * a) { 5080 return vld2_s32(a); 5081} 5082 5083// CHECK-LABEL: define void @test_vld2_s64(%struct.int64x1x2_t* noalias sret %agg.result, i64* %a) #0 { 5084// CHECK: [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8 5085// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8* 5086// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 5087// CHECK: [[VLD2_V:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64.p0i8(i8* [[TMP1]], i32 4) 5088// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }* 5089// CHECK: store { <1 x i64>, <1 x i64> } [[VLD2_V]], { <1 x i64>, <1 x i64> }* [[TMP2]] 5090// CHECK: [[TMP3:%.*]] = bitcast %struct.int64x1x2_t* %agg.result to i8* 5091// CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8* 5092// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) 5093// CHECK: ret void 5094int64x1x2_t test_vld2_s64(int64_t const * a) { 5095 return vld2_s64(a); 5096} 5097 5098// CHECK-LABEL: define void @test_vld2_f16(%struct.float16x4x2_t* noalias sret %agg.result, half* %a) #0 { 5099// CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8 5100// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8* 5101// CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* 5102// CHECK: [[VLD2_V:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2.v4i16.p0i8(i8* [[TMP1]], i32 2) 5103// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* 5104// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_V]], { <4 x i16>, <4 x i16> }* [[TMP2]] 5105// CHECK: [[TMP3:%.*]] = bitcast %struct.float16x4x2_t* %agg.result to i8* 5106// CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8* 5107// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) 5108// CHECK: ret void 5109float16x4x2_t test_vld2_f16(float16_t const * a) { 5110 return vld2_f16(a); 5111} 5112 5113// CHECK-LABEL: define void @test_vld2_f32(%struct.float32x2x2_t* noalias sret %agg.result, float* %a) #0 { 5114// CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8 5115// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8* 5116// CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* 5117// CHECK: [[VLD2_V:%.*]] = call { <2 x float>, <2 x float> } @llvm.arm.neon.vld2.v2f32.p0i8(i8* [[TMP1]], i32 4) 5118// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float> }* 5119// CHECK: store { <2 x float>, <2 x float> } [[VLD2_V]], { <2 x float>, <2 x float> }* [[TMP2]] 5120// CHECK: [[TMP3:%.*]] = bitcast %struct.float32x2x2_t* %agg.result to i8* 5121// CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8* 5122// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) 5123// CHECK: ret void 5124float32x2x2_t test_vld2_f32(float32_t const * a) { 5125 return vld2_f32(a); 5126} 5127 5128// CHECK-LABEL: define void @test_vld2_p8(%struct.poly8x8x2_t* noalias sret %agg.result, i8* %a) #0 { 5129// CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8 5130// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8* 5131// CHECK: [[VLD2_V:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8.p0i8(i8* %a, i32 1) 5132// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }* 5133// CHECK: store { <8 x i8>, <8 x i8> } [[VLD2_V]], { <8 x i8>, <8 x i8> }* [[TMP1]] 5134// CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* %agg.result to i8* 5135// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8* 5136// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 16, i32 8, i1 false) 5137// CHECK: ret void 5138poly8x8x2_t test_vld2_p8(poly8_t const * a) { 5139 return vld2_p8(a); 5140} 5141 5142// CHECK-LABEL: define void @test_vld2_p16(%struct.poly16x4x2_t* noalias sret %agg.result, i16* %a) #0 { 5143// CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8 5144// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8* 5145// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 5146// CHECK: [[VLD2_V:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2.v4i16.p0i8(i8* [[TMP1]], i32 2) 5147// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* 5148// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_V]], { <4 x i16>, <4 x i16> }* [[TMP2]] 5149// CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x4x2_t* %agg.result to i8* 5150// CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8* 5151// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) 5152// CHECK: ret void 5153poly16x4x2_t test_vld2_p16(poly16_t const * a) { 5154 return vld2_p16(a); 5155} 5156 5157 5158// CHECK-LABEL: define void @test_vld2_dup_u8(%struct.uint8x8x2_t* noalias sret %agg.result, i8* %a) #0 { 5159// CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8 5160// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8* 5161// CHECK: [[VLD_DUP:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8.p0i8(i8* %a, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1) 5162// CHECK: [[TMP1:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD_DUP]], 0 5163// CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer 5164// CHECK: [[TMP2:%.*]] = insertvalue { <8 x i8>, <8 x i8> } [[VLD_DUP]], <8 x i8> [[LANE]], 0 5165// CHECK: [[TMP3:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[TMP2]], 1 5166// CHECK: [[LANE1:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP3]], <8 x i32> zeroinitializer 5167// CHECK: [[TMP4:%.*]] = insertvalue { <8 x i8>, <8 x i8> } [[TMP2]], <8 x i8> [[LANE1]], 1 5168// CHECK: [[TMP5:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }* 5169// CHECK: store { <8 x i8>, <8 x i8> } [[TMP4]], { <8 x i8>, <8 x i8> }* [[TMP5]] 5170// CHECK: [[TMP6:%.*]] = bitcast %struct.uint8x8x2_t* %agg.result to i8* 5171// CHECK: [[TMP7:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8* 5172// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP6]], i8* [[TMP7]], i32 16, i32 8, i1 false) 5173// CHECK: ret void 5174uint8x8x2_t test_vld2_dup_u8(uint8_t const * a) { 5175 return vld2_dup_u8(a); 5176} 5177 5178// CHECK-LABEL: define void @test_vld2_dup_u16(%struct.uint16x4x2_t* noalias sret %agg.result, i16* %a) #0 { 5179// CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8 5180// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8* 5181// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 5182// CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* [[TMP1]], <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) 5183// CHECK: [[TMP2:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[VLD_DUP]], 0 5184// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> zeroinitializer 5185// CHECK: [[TMP3:%.*]] = insertvalue { <4 x i16>, <4 x i16> } [[VLD_DUP]], <4 x i16> [[LANE]], 0 5186// CHECK: [[TMP4:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[TMP3]], 1 5187// CHECK: [[LANE1:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP4]], <4 x i32> zeroinitializer 5188// CHECK: [[TMP5:%.*]] = insertvalue { <4 x i16>, <4 x i16> } [[TMP3]], <4 x i16> [[LANE1]], 1 5189// CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* 5190// CHECK: store { <4 x i16>, <4 x i16> } [[TMP5]], { <4 x i16>, <4 x i16> }* [[TMP6]] 5191// CHECK: [[TMP7:%.*]] = bitcast %struct.uint16x4x2_t* %agg.result to i8* 5192// CHECK: [[TMP8:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8* 5193// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) 5194// CHECK: ret void 5195uint16x4x2_t test_vld2_dup_u16(uint16_t const * a) { 5196 return vld2_dup_u16(a); 5197} 5198 5199// CHECK-LABEL: define void @test_vld2_dup_u32(%struct.uint32x2x2_t* noalias sret %agg.result, i32* %a) #0 { 5200// CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8 5201// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8* 5202// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 5203// CHECK: [[VLD_DUP:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2lane.v2i32.p0i8(i8* [[TMP1]], <2 x i32> undef, <2 x i32> undef, i32 0, i32 4) 5204// CHECK: [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[VLD_DUP]], 0 5205// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP2]], <2 x i32> zeroinitializer 5206// CHECK: [[TMP3:%.*]] = insertvalue { <2 x i32>, <2 x i32> } [[VLD_DUP]], <2 x i32> [[LANE]], 0 5207// CHECK: [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[TMP3]], 1 5208// CHECK: [[LANE1:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP4]], <2 x i32> zeroinitializer 5209// CHECK: [[TMP5:%.*]] = insertvalue { <2 x i32>, <2 x i32> } [[TMP3]], <2 x i32> [[LANE1]], 1 5210// CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }* 5211// CHECK: store { <2 x i32>, <2 x i32> } [[TMP5]], { <2 x i32>, <2 x i32> }* [[TMP6]] 5212// CHECK: [[TMP7:%.*]] = bitcast %struct.uint32x2x2_t* %agg.result to i8* 5213// CHECK: [[TMP8:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8* 5214// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) 5215// CHECK: ret void 5216uint32x2x2_t test_vld2_dup_u32(uint32_t const * a) { 5217 return vld2_dup_u32(a); 5218} 5219 5220// CHECK-LABEL: define void @test_vld2_dup_u64(%struct.uint64x1x2_t* noalias sret %agg.result, i64* %a) #0 { 5221// CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8 5222// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8* 5223// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 5224// CHECK: [[VLD_DUP:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64.p0i8(i8* [[TMP1]], i32 4) 5225// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }* 5226// CHECK: store { <1 x i64>, <1 x i64> } [[VLD_DUP]], { <1 x i64>, <1 x i64> }* [[TMP2]] 5227// CHECK: [[TMP3:%.*]] = bitcast %struct.uint64x1x2_t* %agg.result to i8* 5228// CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8* 5229// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) 5230// CHECK: ret void 5231uint64x1x2_t test_vld2_dup_u64(uint64_t const * a) { 5232 return vld2_dup_u64(a); 5233} 5234 5235// CHECK-LABEL: define void @test_vld2_dup_s8(%struct.int8x8x2_t* noalias sret %agg.result, i8* %a) #0 { 5236// CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8 5237// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8* 5238// CHECK: [[VLD_DUP:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8.p0i8(i8* %a, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1) 5239// CHECK: [[TMP1:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD_DUP]], 0 5240// CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer 5241// CHECK: [[TMP2:%.*]] = insertvalue { <8 x i8>, <8 x i8> } [[VLD_DUP]], <8 x i8> [[LANE]], 0 5242// CHECK: [[TMP3:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[TMP2]], 1 5243// CHECK: [[LANE1:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP3]], <8 x i32> zeroinitializer 5244// CHECK: [[TMP4:%.*]] = insertvalue { <8 x i8>, <8 x i8> } [[TMP2]], <8 x i8> [[LANE1]], 1 5245// CHECK: [[TMP5:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }* 5246// CHECK: store { <8 x i8>, <8 x i8> } [[TMP4]], { <8 x i8>, <8 x i8> }* [[TMP5]] 5247// CHECK: [[TMP6:%.*]] = bitcast %struct.int8x8x2_t* %agg.result to i8* 5248// CHECK: [[TMP7:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8* 5249// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP6]], i8* [[TMP7]], i32 16, i32 8, i1 false) 5250// CHECK: ret void 5251int8x8x2_t test_vld2_dup_s8(int8_t const * a) { 5252 return vld2_dup_s8(a); 5253} 5254 5255// CHECK-LABEL: define void @test_vld2_dup_s16(%struct.int16x4x2_t* noalias sret %agg.result, i16* %a) #0 { 5256// CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8 5257// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8* 5258// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 5259// CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* [[TMP1]], <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) 5260// CHECK: [[TMP2:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[VLD_DUP]], 0 5261// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> zeroinitializer 5262// CHECK: [[TMP3:%.*]] = insertvalue { <4 x i16>, <4 x i16> } [[VLD_DUP]], <4 x i16> [[LANE]], 0 5263// CHECK: [[TMP4:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[TMP3]], 1 5264// CHECK: [[LANE1:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP4]], <4 x i32> zeroinitializer 5265// CHECK: [[TMP5:%.*]] = insertvalue { <4 x i16>, <4 x i16> } [[TMP3]], <4 x i16> [[LANE1]], 1 5266// CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* 5267// CHECK: store { <4 x i16>, <4 x i16> } [[TMP5]], { <4 x i16>, <4 x i16> }* [[TMP6]] 5268// CHECK: [[TMP7:%.*]] = bitcast %struct.int16x4x2_t* %agg.result to i8* 5269// CHECK: [[TMP8:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8* 5270// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) 5271// CHECK: ret void 5272int16x4x2_t test_vld2_dup_s16(int16_t const * a) { 5273 return vld2_dup_s16(a); 5274} 5275 5276// CHECK-LABEL: define void @test_vld2_dup_s32(%struct.int32x2x2_t* noalias sret %agg.result, i32* %a) #0 { 5277// CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8 5278// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8* 5279// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 5280// CHECK: [[VLD_DUP:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2lane.v2i32.p0i8(i8* [[TMP1]], <2 x i32> undef, <2 x i32> undef, i32 0, i32 4) 5281// CHECK: [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[VLD_DUP]], 0 5282// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP2]], <2 x i32> zeroinitializer 5283// CHECK: [[TMP3:%.*]] = insertvalue { <2 x i32>, <2 x i32> } [[VLD_DUP]], <2 x i32> [[LANE]], 0 5284// CHECK: [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[TMP3]], 1 5285// CHECK: [[LANE1:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP4]], <2 x i32> zeroinitializer 5286// CHECK: [[TMP5:%.*]] = insertvalue { <2 x i32>, <2 x i32> } [[TMP3]], <2 x i32> [[LANE1]], 1 5287// CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }* 5288// CHECK: store { <2 x i32>, <2 x i32> } [[TMP5]], { <2 x i32>, <2 x i32> }* [[TMP6]] 5289// CHECK: [[TMP7:%.*]] = bitcast %struct.int32x2x2_t* %agg.result to i8* 5290// CHECK: [[TMP8:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8* 5291// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) 5292// CHECK: ret void 5293int32x2x2_t test_vld2_dup_s32(int32_t const * a) { 5294 return vld2_dup_s32(a); 5295} 5296 5297// CHECK-LABEL: define void @test_vld2_dup_s64(%struct.int64x1x2_t* noalias sret %agg.result, i64* %a) #0 { 5298// CHECK: [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8 5299// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8* 5300// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 5301// CHECK: [[VLD_DUP:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64.p0i8(i8* [[TMP1]], i32 4) 5302// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }* 5303// CHECK: store { <1 x i64>, <1 x i64> } [[VLD_DUP]], { <1 x i64>, <1 x i64> }* [[TMP2]] 5304// CHECK: [[TMP3:%.*]] = bitcast %struct.int64x1x2_t* %agg.result to i8* 5305// CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8* 5306// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) 5307// CHECK: ret void 5308int64x1x2_t test_vld2_dup_s64(int64_t const * a) { 5309 return vld2_dup_s64(a); 5310} 5311 5312// CHECK-LABEL: define void @test_vld2_dup_f16(%struct.float16x4x2_t* noalias sret %agg.result, half* %a) #0 { 5313// CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8 5314// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8* 5315// CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* 5316// CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* [[TMP1]], <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) 5317// CHECK: [[TMP2:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[VLD_DUP]], 0 5318// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> zeroinitializer 5319// CHECK: [[TMP3:%.*]] = insertvalue { <4 x i16>, <4 x i16> } [[VLD_DUP]], <4 x i16> [[LANE]], 0 5320// CHECK: [[TMP4:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[TMP3]], 1 5321// CHECK: [[LANE1:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP4]], <4 x i32> zeroinitializer 5322// CHECK: [[TMP5:%.*]] = insertvalue { <4 x i16>, <4 x i16> } [[TMP3]], <4 x i16> [[LANE1]], 1 5323// CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* 5324// CHECK: store { <4 x i16>, <4 x i16> } [[TMP5]], { <4 x i16>, <4 x i16> }* [[TMP6]] 5325// CHECK: [[TMP7:%.*]] = bitcast %struct.float16x4x2_t* %agg.result to i8* 5326// CHECK: [[TMP8:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8* 5327// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) 5328// CHECK: ret void 5329float16x4x2_t test_vld2_dup_f16(float16_t const * a) { 5330 return vld2_dup_f16(a); 5331} 5332 5333// CHECK-LABEL: define void @test_vld2_dup_f32(%struct.float32x2x2_t* noalias sret %agg.result, float* %a) #0 { 5334// CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8 5335// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8* 5336// CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* 5337// CHECK: [[VLD_DUP:%.*]] = call { <2 x float>, <2 x float> } @llvm.arm.neon.vld2lane.v2f32.p0i8(i8* [[TMP1]], <2 x float> undef, <2 x float> undef, i32 0, i32 4) 5338// CHECK: [[TMP2:%.*]] = extractvalue { <2 x float>, <2 x float> } [[VLD_DUP]], 0 5339// CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP2]], <2 x i32> zeroinitializer 5340// CHECK: [[TMP3:%.*]] = insertvalue { <2 x float>, <2 x float> } [[VLD_DUP]], <2 x float> [[LANE]], 0 5341// CHECK: [[TMP4:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP3]], 1 5342// CHECK: [[LANE1:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP4]], <2 x i32> zeroinitializer 5343// CHECK: [[TMP5:%.*]] = insertvalue { <2 x float>, <2 x float> } [[TMP3]], <2 x float> [[LANE1]], 1 5344// CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float> }* 5345// CHECK: store { <2 x float>, <2 x float> } [[TMP5]], { <2 x float>, <2 x float> }* [[TMP6]] 5346// CHECK: [[TMP7:%.*]] = bitcast %struct.float32x2x2_t* %agg.result to i8* 5347// CHECK: [[TMP8:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8* 5348// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) 5349// CHECK: ret void 5350float32x2x2_t test_vld2_dup_f32(float32_t const * a) { 5351 return vld2_dup_f32(a); 5352} 5353 5354// CHECK-LABEL: define void @test_vld2_dup_p8(%struct.poly8x8x2_t* noalias sret %agg.result, i8* %a) #0 { 5355// CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8 5356// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8* 5357// CHECK: [[VLD_DUP:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8.p0i8(i8* %a, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1) 5358// CHECK: [[TMP1:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD_DUP]], 0 5359// CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer 5360// CHECK: [[TMP2:%.*]] = insertvalue { <8 x i8>, <8 x i8> } [[VLD_DUP]], <8 x i8> [[LANE]], 0 5361// CHECK: [[TMP3:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[TMP2]], 1 5362// CHECK: [[LANE1:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP3]], <8 x i32> zeroinitializer 5363// CHECK: [[TMP4:%.*]] = insertvalue { <8 x i8>, <8 x i8> } [[TMP2]], <8 x i8> [[LANE1]], 1 5364// CHECK: [[TMP5:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }* 5365// CHECK: store { <8 x i8>, <8 x i8> } [[TMP4]], { <8 x i8>, <8 x i8> }* [[TMP5]] 5366// CHECK: [[TMP6:%.*]] = bitcast %struct.poly8x8x2_t* %agg.result to i8* 5367// CHECK: [[TMP7:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8* 5368// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP6]], i8* [[TMP7]], i32 16, i32 8, i1 false) 5369// CHECK: ret void 5370poly8x8x2_t test_vld2_dup_p8(poly8_t const * a) { 5371 return vld2_dup_p8(a); 5372} 5373 5374// CHECK-LABEL: define void @test_vld2_dup_p16(%struct.poly16x4x2_t* noalias sret %agg.result, i16* %a) #0 { 5375// CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8 5376// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8* 5377// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 5378// CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* [[TMP1]], <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) 5379// CHECK: [[TMP2:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[VLD_DUP]], 0 5380// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> zeroinitializer 5381// CHECK: [[TMP3:%.*]] = insertvalue { <4 x i16>, <4 x i16> } [[VLD_DUP]], <4 x i16> [[LANE]], 0 5382// CHECK: [[TMP4:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[TMP3]], 1 5383// CHECK: [[LANE1:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP4]], <4 x i32> zeroinitializer 5384// CHECK: [[TMP5:%.*]] = insertvalue { <4 x i16>, <4 x i16> } [[TMP3]], <4 x i16> [[LANE1]], 1 5385// CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* 5386// CHECK: store { <4 x i16>, <4 x i16> } [[TMP5]], { <4 x i16>, <4 x i16> }* [[TMP6]] 5387// CHECK: [[TMP7:%.*]] = bitcast %struct.poly16x4x2_t* %agg.result to i8* 5388// CHECK: [[TMP8:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8* 5389// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) 5390// CHECK: ret void 5391poly16x4x2_t test_vld2_dup_p16(poly16_t const * a) { 5392 return vld2_dup_p16(a); 5393} 5394 5395 5396// CHECK-LABEL: define void @test_vld2q_lane_u16(%struct.uint16x8x2_t* noalias sret %agg.result, i16* %a, [4 x i64] %b.coerce) #0 { 5397// CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16 5398// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16 5399// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16 5400// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0 5401// CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]* 5402// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 5403// CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8* 5404// CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8* 5405// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) 5406// CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8* 5407// CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* 5408// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0 5409// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0 5410// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 5411// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 5412// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0 5413// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1 5414// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 5415// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 5416// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 5417// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 5418// CHECK: [[VLD2Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2lane.v8i16.p0i8(i8* [[TMP4]], <8 x i16> [[TMP9]], <8 x i16> [[TMP10]], i32 7, i32 2) 5419// CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <8 x i16>, <8 x i16> }* 5420// CHECK: store { <8 x i16>, <8 x i16> } [[VLD2Q_LANE_V]], { <8 x i16>, <8 x i16> }* [[TMP11]] 5421// CHECK: [[TMP12:%.*]] = bitcast %struct.uint16x8x2_t* %agg.result to i8* 5422// CHECK: [[TMP13:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8* 5423// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 32, i32 16, i1 false) 5424// CHECK: ret void 5425uint16x8x2_t test_vld2q_lane_u16(uint16_t const * a, uint16x8x2_t b) { 5426 return vld2q_lane_u16(a, b, 7); 5427} 5428 5429// CHECK-LABEL: define void @test_vld2q_lane_u32(%struct.uint32x4x2_t* noalias sret %agg.result, i32* %a, [4 x i64] %b.coerce) #0 { 5430// CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16 5431// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16 5432// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16 5433// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0 5434// CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]* 5435// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 5436// CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8* 5437// CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8* 5438// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) 5439// CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8* 5440// CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8* 5441// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0 5442// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0 5443// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 5444// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 5445// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0 5446// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1 5447// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 5448// CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> 5449// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 5450// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> 5451// CHECK: [[VLD2Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32.p0i8(i8* [[TMP4]], <4 x i32> [[TMP9]], <4 x i32> [[TMP10]], i32 3, i32 4) 5452// CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <4 x i32>, <4 x i32> }* 5453// CHECK: store { <4 x i32>, <4 x i32> } [[VLD2Q_LANE_V]], { <4 x i32>, <4 x i32> }* [[TMP11]] 5454// CHECK: [[TMP12:%.*]] = bitcast %struct.uint32x4x2_t* %agg.result to i8* 5455// CHECK: [[TMP13:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8* 5456// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 32, i32 16, i1 false) 5457// CHECK: ret void 5458uint32x4x2_t test_vld2q_lane_u32(uint32_t const * a, uint32x4x2_t b) { 5459 return vld2q_lane_u32(a, b, 3); 5460} 5461 5462// CHECK-LABEL: define void @test_vld2q_lane_s16(%struct.int16x8x2_t* noalias sret %agg.result, i16* %a, [4 x i64] %b.coerce) #0 { 5463// CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16 5464// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16 5465// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16 5466// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0 5467// CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]* 5468// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 5469// CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8* 5470// CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8* 5471// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) 5472// CHECK: [[TMP3:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8* 5473// CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* 5474// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0 5475// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0 5476// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 5477// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 5478// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0 5479// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1 5480// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 5481// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 5482// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 5483// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 5484// CHECK: [[VLD2Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2lane.v8i16.p0i8(i8* [[TMP4]], <8 x i16> [[TMP9]], <8 x i16> [[TMP10]], i32 7, i32 2) 5485// CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <8 x i16>, <8 x i16> }* 5486// CHECK: store { <8 x i16>, <8 x i16> } [[VLD2Q_LANE_V]], { <8 x i16>, <8 x i16> }* [[TMP11]] 5487// CHECK: [[TMP12:%.*]] = bitcast %struct.int16x8x2_t* %agg.result to i8* 5488// CHECK: [[TMP13:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8* 5489// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 32, i32 16, i1 false) 5490// CHECK: ret void 5491int16x8x2_t test_vld2q_lane_s16(int16_t const * a, int16x8x2_t b) { 5492 return vld2q_lane_s16(a, b, 7); 5493} 5494 5495// CHECK-LABEL: define void @test_vld2q_lane_s32(%struct.int32x4x2_t* noalias sret %agg.result, i32* %a, [4 x i64] %b.coerce) #0 { 5496// CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16 5497// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16 5498// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16 5499// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0 5500// CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]* 5501// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 5502// CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8* 5503// CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8* 5504// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) 5505// CHECK: [[TMP3:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8* 5506// CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8* 5507// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0 5508// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0 5509// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 5510// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 5511// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0 5512// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1 5513// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 5514// CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> 5515// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 5516// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> 5517// CHECK: [[VLD2Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32.p0i8(i8* [[TMP4]], <4 x i32> [[TMP9]], <4 x i32> [[TMP10]], i32 3, i32 4) 5518// CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <4 x i32>, <4 x i32> }* 5519// CHECK: store { <4 x i32>, <4 x i32> } [[VLD2Q_LANE_V]], { <4 x i32>, <4 x i32> }* [[TMP11]] 5520// CHECK: [[TMP12:%.*]] = bitcast %struct.int32x4x2_t* %agg.result to i8* 5521// CHECK: [[TMP13:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8* 5522// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 32, i32 16, i1 false) 5523// CHECK: ret void 5524int32x4x2_t test_vld2q_lane_s32(int32_t const * a, int32x4x2_t b) { 5525 return vld2q_lane_s32(a, b, 3); 5526} 5527 5528// CHECK-LABEL: define void @test_vld2q_lane_f16(%struct.float16x8x2_t* noalias sret %agg.result, half* %a, [4 x i64] %b.coerce) #0 { 5529// CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16 5530// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16 5531// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16 5532// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0 5533// CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x half>]* [[COERCE_DIVE]] to [4 x i64]* 5534// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 5535// CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8* 5536// CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8* 5537// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) 5538// CHECK: [[TMP3:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8* 5539// CHECK: [[TMP4:%.*]] = bitcast half* %a to i8* 5540// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0 5541// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i32 0, i32 0 5542// CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 5543// CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> 5544// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0 5545// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i32 0, i32 1 5546// CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 5547// CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8> 5548// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 5549// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 5550// CHECK: [[VLD2Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2lane.v8i16.p0i8(i8* [[TMP4]], <8 x i16> [[TMP9]], <8 x i16> [[TMP10]], i32 7, i32 2) 5551// CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <8 x i16>, <8 x i16> }* 5552// CHECK: store { <8 x i16>, <8 x i16> } [[VLD2Q_LANE_V]], { <8 x i16>, <8 x i16> }* [[TMP11]] 5553// CHECK: [[TMP12:%.*]] = bitcast %struct.float16x8x2_t* %agg.result to i8* 5554// CHECK: [[TMP13:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8* 5555// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 32, i32 16, i1 false) 5556// CHECK: ret void 5557float16x8x2_t test_vld2q_lane_f16(float16_t const * a, float16x8x2_t b) { 5558 return vld2q_lane_f16(a, b, 7); 5559} 5560 5561// CHECK-LABEL: define void @test_vld2q_lane_f32(%struct.float32x4x2_t* noalias sret %agg.result, float* %a, [4 x i64] %b.coerce) #0 { 5562// CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16 5563// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16 5564// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16 5565// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0 5566// CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x float>]* [[COERCE_DIVE]] to [4 x i64]* 5567// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 5568// CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8* 5569// CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8* 5570// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) 5571// CHECK: [[TMP3:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8* 5572// CHECK: [[TMP4:%.*]] = bitcast float* %a to i8* 5573// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0 5574// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i32 0, i32 0 5575// CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 5576// CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8> 5577// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0 5578// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL1]], i32 0, i32 1 5579// CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 5580// CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8> 5581// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float> 5582// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float> 5583// CHECK: [[VLD2Q_LANE_V:%.*]] = call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2lane.v4f32.p0i8(i8* [[TMP4]], <4 x float> [[TMP9]], <4 x float> [[TMP10]], i32 3, i32 4) 5584// CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <4 x float>, <4 x float> }* 5585// CHECK: store { <4 x float>, <4 x float> } [[VLD2Q_LANE_V]], { <4 x float>, <4 x float> }* [[TMP11]] 5586// CHECK: [[TMP12:%.*]] = bitcast %struct.float32x4x2_t* %agg.result to i8* 5587// CHECK: [[TMP13:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8* 5588// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 32, i32 16, i1 false) 5589// CHECK: ret void 5590float32x4x2_t test_vld2q_lane_f32(float32_t const * a, float32x4x2_t b) { 5591 return vld2q_lane_f32(a, b, 3); 5592} 5593 5594// CHECK-LABEL: define void @test_vld2q_lane_p16(%struct.poly16x8x2_t* noalias sret %agg.result, i16* %a, [4 x i64] %b.coerce) #0 { 5595// CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16 5596// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16 5597// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16 5598// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0 5599// CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]* 5600// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 5601// CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8* 5602// CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8* 5603// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) 5604// CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8* 5605// CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* 5606// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0 5607// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0 5608// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 5609// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 5610// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0 5611// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1 5612// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 5613// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 5614// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 5615// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 5616// CHECK: [[VLD2Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2lane.v8i16.p0i8(i8* [[TMP4]], <8 x i16> [[TMP9]], <8 x i16> [[TMP10]], i32 7, i32 2) 5617// CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <8 x i16>, <8 x i16> }* 5618// CHECK: store { <8 x i16>, <8 x i16> } [[VLD2Q_LANE_V]], { <8 x i16>, <8 x i16> }* [[TMP11]] 5619// CHECK: [[TMP12:%.*]] = bitcast %struct.poly16x8x2_t* %agg.result to i8* 5620// CHECK: [[TMP13:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8* 5621// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 32, i32 16, i1 false) 5622// CHECK: ret void 5623poly16x8x2_t test_vld2q_lane_p16(poly16_t const * a, poly16x8x2_t b) { 5624 return vld2q_lane_p16(a, b, 7); 5625} 5626 5627// CHECK-LABEL: define void @test_vld2_lane_u8(%struct.uint8x8x2_t* noalias sret %agg.result, i8* %a, [2 x i64] %b.coerce) #0 { 5628// CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8 5629// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8 5630// CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8 5631// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0 5632// CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]* 5633// CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 5634// CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8* 5635// CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8* 5636// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 5637// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8* 5638// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0 5639// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0 5640// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 5641// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0 5642// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1 5643// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 5644// CHECK: [[VLD2_LANE_V:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8.p0i8(i8* %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 7, i32 1) 5645// CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP3]] to { <8 x i8>, <8 x i8> }* 5646// CHECK: store { <8 x i8>, <8 x i8> } [[VLD2_LANE_V]], { <8 x i8>, <8 x i8> }* [[TMP6]] 5647// CHECK: [[TMP7:%.*]] = bitcast %struct.uint8x8x2_t* %agg.result to i8* 5648// CHECK: [[TMP8:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8* 5649// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) 5650// CHECK: ret void 5651uint8x8x2_t test_vld2_lane_u8(uint8_t const * a, uint8x8x2_t b) { 5652 return vld2_lane_u8(a, b, 7); 5653} 5654 5655// CHECK-LABEL: define void @test_vld2_lane_u16(%struct.uint16x4x2_t* noalias sret %agg.result, i16* %a, [2 x i64] %b.coerce) #0 { 5656// CHECK: [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8 5657// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8 5658// CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8 5659// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[B]], i32 0, i32 0 5660// CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]* 5661// CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 5662// CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8* 5663// CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8* 5664// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 5665// CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8* 5666// CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* 5667// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0 5668// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0 5669// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 5670// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 5671// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0 5672// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1 5673// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 5674// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 5675// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 5676// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 5677// CHECK: [[VLD2_LANE_V:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* [[TMP4]], <4 x i16> [[TMP9]], <4 x i16> [[TMP10]], i32 3, i32 2) 5678// CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <4 x i16>, <4 x i16> }* 5679// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_LANE_V]], { <4 x i16>, <4 x i16> }* [[TMP11]] 5680// CHECK: [[TMP12:%.*]] = bitcast %struct.uint16x4x2_t* %agg.result to i8* 5681// CHECK: [[TMP13:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8* 5682// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 16, i32 8, i1 false) 5683// CHECK: ret void 5684uint16x4x2_t test_vld2_lane_u16(uint16_t const * a, uint16x4x2_t b) { 5685 return vld2_lane_u16(a, b, 3); 5686} 5687 5688// CHECK-LABEL: define void @test_vld2_lane_u32(%struct.uint32x2x2_t* noalias sret %agg.result, i32* %a, [2 x i64] %b.coerce) #0 { 5689// CHECK: [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8 5690// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8 5691// CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8 5692// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[B]], i32 0, i32 0 5693// CHECK: [[TMP0:%.*]] = bitcast [2 x <2 x i32>]* [[COERCE_DIVE]] to [2 x i64]* 5694// CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 5695// CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8* 5696// CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8* 5697// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 5698// CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8* 5699// CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8* 5700// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0 5701// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i32 0, i32 0 5702// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 5703// CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 5704// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0 5705// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i32 0, i32 1 5706// CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 5707// CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> 5708// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 5709// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> 5710// CHECK: [[VLD2_LANE_V:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2lane.v2i32.p0i8(i8* [[TMP4]], <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], i32 1, i32 4) 5711// CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <2 x i32>, <2 x i32> }* 5712// CHECK: store { <2 x i32>, <2 x i32> } [[VLD2_LANE_V]], { <2 x i32>, <2 x i32> }* [[TMP11]] 5713// CHECK: [[TMP12:%.*]] = bitcast %struct.uint32x2x2_t* %agg.result to i8* 5714// CHECK: [[TMP13:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8* 5715// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 16, i32 8, i1 false) 5716// CHECK: ret void 5717uint32x2x2_t test_vld2_lane_u32(uint32_t const * a, uint32x2x2_t b) { 5718 return vld2_lane_u32(a, b, 1); 5719} 5720 5721// CHECK-LABEL: define void @test_vld2_lane_s8(%struct.int8x8x2_t* noalias sret %agg.result, i8* %a, [2 x i64] %b.coerce) #0 { 5722// CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8 5723// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8 5724// CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8 5725// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0 5726// CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]* 5727// CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 5728// CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8* 5729// CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8* 5730// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 5731// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8* 5732// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0 5733// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0 5734// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 5735// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0 5736// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1 5737// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 5738// CHECK: [[VLD2_LANE_V:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8.p0i8(i8* %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 7, i32 1) 5739// CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP3]] to { <8 x i8>, <8 x i8> }* 5740// CHECK: store { <8 x i8>, <8 x i8> } [[VLD2_LANE_V]], { <8 x i8>, <8 x i8> }* [[TMP6]] 5741// CHECK: [[TMP7:%.*]] = bitcast %struct.int8x8x2_t* %agg.result to i8* 5742// CHECK: [[TMP8:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8* 5743// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) 5744// CHECK: ret void 5745int8x8x2_t test_vld2_lane_s8(int8_t const * a, int8x8x2_t b) { 5746 return vld2_lane_s8(a, b, 7); 5747} 5748 5749// CHECK-LABEL: define void @test_vld2_lane_s16(%struct.int16x4x2_t* noalias sret %agg.result, i16* %a, [2 x i64] %b.coerce) #0 { 5750// CHECK: [[B:%.*]] = alloca %struct.int16x4x2_t, align 8 5751// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8 5752// CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8 5753// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[B]], i32 0, i32 0 5754// CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]* 5755// CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 5756// CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8* 5757// CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8* 5758// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 5759// CHECK: [[TMP3:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8* 5760// CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* 5761// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0 5762// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0 5763// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 5764// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 5765// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0 5766// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1 5767// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 5768// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 5769// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 5770// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 5771// CHECK: [[VLD2_LANE_V:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* [[TMP4]], <4 x i16> [[TMP9]], <4 x i16> [[TMP10]], i32 3, i32 2) 5772// CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <4 x i16>, <4 x i16> }* 5773// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_LANE_V]], { <4 x i16>, <4 x i16> }* [[TMP11]] 5774// CHECK: [[TMP12:%.*]] = bitcast %struct.int16x4x2_t* %agg.result to i8* 5775// CHECK: [[TMP13:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8* 5776// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 16, i32 8, i1 false) 5777// CHECK: ret void 5778int16x4x2_t test_vld2_lane_s16(int16_t const * a, int16x4x2_t b) { 5779 return vld2_lane_s16(a, b, 3); 5780} 5781 5782// CHECK-LABEL: define void @test_vld2_lane_s32(%struct.int32x2x2_t* noalias sret %agg.result, i32* %a, [2 x i64] %b.coerce) #0 { 5783// CHECK: [[B:%.*]] = alloca %struct.int32x2x2_t, align 8 5784// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8 5785// CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8 5786// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[B]], i32 0, i32 0 5787// CHECK: [[TMP0:%.*]] = bitcast [2 x <2 x i32>]* [[COERCE_DIVE]] to [2 x i64]* 5788// CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 5789// CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8* 5790// CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8* 5791// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 5792// CHECK: [[TMP3:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8* 5793// CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8* 5794// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0 5795// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i32 0, i32 0 5796// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 5797// CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 5798// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0 5799// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i32 0, i32 1 5800// CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 5801// CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> 5802// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 5803// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> 5804// CHECK: [[VLD2_LANE_V:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2lane.v2i32.p0i8(i8* [[TMP4]], <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], i32 1, i32 4) 5805// CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <2 x i32>, <2 x i32> }* 5806// CHECK: store { <2 x i32>, <2 x i32> } [[VLD2_LANE_V]], { <2 x i32>, <2 x i32> }* [[TMP11]] 5807// CHECK: [[TMP12:%.*]] = bitcast %struct.int32x2x2_t* %agg.result to i8* 5808// CHECK: [[TMP13:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8* 5809// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 16, i32 8, i1 false) 5810// CHECK: ret void 5811int32x2x2_t test_vld2_lane_s32(int32_t const * a, int32x2x2_t b) { 5812 return vld2_lane_s32(a, b, 1); 5813} 5814 5815// CHECK-LABEL: define void @test_vld2_lane_f16(%struct.float16x4x2_t* noalias sret %agg.result, half* %a, [2 x i64] %b.coerce) #0 { 5816// CHECK: [[B:%.*]] = alloca %struct.float16x4x2_t, align 8 5817// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8 5818// CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8 5819// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[B]], i32 0, i32 0 5820// CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x half>]* [[COERCE_DIVE]] to [2 x i64]* 5821// CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 5822// CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8* 5823// CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8* 5824// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 5825// CHECK: [[TMP3:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8* 5826// CHECK: [[TMP4:%.*]] = bitcast half* %a to i8* 5827// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0 5828// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], i32 0, i32 0 5829// CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8 5830// CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8> 5831// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0 5832// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i32 0, i32 1 5833// CHECK: [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8 5834// CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8> 5835// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 5836// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 5837// CHECK: [[VLD2_LANE_V:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* [[TMP4]], <4 x i16> [[TMP9]], <4 x i16> [[TMP10]], i32 3, i32 2) 5838// CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <4 x i16>, <4 x i16> }* 5839// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_LANE_V]], { <4 x i16>, <4 x i16> }* [[TMP11]] 5840// CHECK: [[TMP12:%.*]] = bitcast %struct.float16x4x2_t* %agg.result to i8* 5841// CHECK: [[TMP13:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8* 5842// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 16, i32 8, i1 false) 5843// CHECK: ret void 5844float16x4x2_t test_vld2_lane_f16(float16_t const * a, float16x4x2_t b) { 5845 return vld2_lane_f16(a, b, 3); 5846} 5847 5848// CHECK-LABEL: define void @test_vld2_lane_f32(%struct.float32x2x2_t* noalias sret %agg.result, float* %a, [2 x i64] %b.coerce) #0 { 5849// CHECK: [[B:%.*]] = alloca %struct.float32x2x2_t, align 8 5850// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8 5851// CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8 5852// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[B]], i32 0, i32 0 5853// CHECK: [[TMP0:%.*]] = bitcast [2 x <2 x float>]* [[COERCE_DIVE]] to [2 x i64]* 5854// CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 5855// CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8* 5856// CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8* 5857// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 5858// CHECK: [[TMP3:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8* 5859// CHECK: [[TMP4:%.*]] = bitcast float* %a to i8* 5860// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0 5861// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], i32 0, i32 0 5862// CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8 5863// CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8> 5864// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0 5865// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL1]], i32 0, i32 1 5866// CHECK: [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8 5867// CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8> 5868// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float> 5869// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float> 5870// CHECK: [[VLD2_LANE_V:%.*]] = call { <2 x float>, <2 x float> } @llvm.arm.neon.vld2lane.v2f32.p0i8(i8* [[TMP4]], <2 x float> [[TMP9]], <2 x float> [[TMP10]], i32 1, i32 4) 5871// CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <2 x float>, <2 x float> }* 5872// CHECK: store { <2 x float>, <2 x float> } [[VLD2_LANE_V]], { <2 x float>, <2 x float> }* [[TMP11]] 5873// CHECK: [[TMP12:%.*]] = bitcast %struct.float32x2x2_t* %agg.result to i8* 5874// CHECK: [[TMP13:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8* 5875// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 16, i32 8, i1 false) 5876// CHECK: ret void 5877float32x2x2_t test_vld2_lane_f32(float32_t const * a, float32x2x2_t b) { 5878 return vld2_lane_f32(a, b, 1); 5879} 5880 5881// CHECK-LABEL: define void @test_vld2_lane_p8(%struct.poly8x8x2_t* noalias sret %agg.result, i8* %a, [2 x i64] %b.coerce) #0 { 5882// CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8 5883// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8 5884// CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8 5885// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0 5886// CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]* 5887// CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 5888// CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8* 5889// CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8* 5890// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 5891// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8* 5892// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0 5893// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0 5894// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 5895// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0 5896// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1 5897// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 5898// CHECK: [[VLD2_LANE_V:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8.p0i8(i8* %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 7, i32 1) 5899// CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP3]] to { <8 x i8>, <8 x i8> }* 5900// CHECK: store { <8 x i8>, <8 x i8> } [[VLD2_LANE_V]], { <8 x i8>, <8 x i8> }* [[TMP6]] 5901// CHECK: [[TMP7:%.*]] = bitcast %struct.poly8x8x2_t* %agg.result to i8* 5902// CHECK: [[TMP8:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8* 5903// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) 5904// CHECK: ret void 5905poly8x8x2_t test_vld2_lane_p8(poly8_t const * a, poly8x8x2_t b) { 5906 return vld2_lane_p8(a, b, 7); 5907} 5908 5909// CHECK-LABEL: define void @test_vld2_lane_p16(%struct.poly16x4x2_t* noalias sret %agg.result, i16* %a, [2 x i64] %b.coerce) #0 { 5910// CHECK: [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8 5911// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8 5912// CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8 5913// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[B]], i32 0, i32 0 5914// CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]* 5915// CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 5916// CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8* 5917// CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8* 5918// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 5919// CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8* 5920// CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* 5921// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0 5922// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0 5923// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 5924// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 5925// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0 5926// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1 5927// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 5928// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 5929// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 5930// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 5931// CHECK: [[VLD2_LANE_V:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* [[TMP4]], <4 x i16> [[TMP9]], <4 x i16> [[TMP10]], i32 3, i32 2) 5932// CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <4 x i16>, <4 x i16> }* 5933// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_LANE_V]], { <4 x i16>, <4 x i16> }* [[TMP11]] 5934// CHECK: [[TMP12:%.*]] = bitcast %struct.poly16x4x2_t* %agg.result to i8* 5935// CHECK: [[TMP13:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8* 5936// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 16, i32 8, i1 false) 5937// CHECK: ret void 5938poly16x4x2_t test_vld2_lane_p16(poly16_t const * a, poly16x4x2_t b) { 5939 return vld2_lane_p16(a, b, 3); 5940} 5941 5942 5943// CHECK-LABEL: define void @test_vld3q_u8(%struct.uint8x16x3_t* noalias sret %agg.result, i8* %a) #0 { 5944// CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align 16 5945// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8* 5946// CHECK: [[VLD3Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8.p0i8(i8* %a, i32 1) 5947// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }* 5948// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3Q_V]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] 5949// CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x3_t* %agg.result to i8* 5950// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8* 5951// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 48, i32 16, i1 false) 5952// CHECK: ret void 5953uint8x16x3_t test_vld3q_u8(uint8_t const * a) { 5954 return vld3q_u8(a); 5955} 5956 5957// CHECK-LABEL: define void @test_vld3q_u16(%struct.uint16x8x3_t* noalias sret %agg.result, i16* %a) #0 { 5958// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16 5959// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8* 5960// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 5961// CHECK: [[VLD3Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3.v8i16.p0i8(i8* [[TMP1]], i32 2) 5962// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }* 5963// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3Q_V]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP2]] 5964// CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x8x3_t* %agg.result to i8* 5965// CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8* 5966// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 48, i32 16, i1 false) 5967// CHECK: ret void 5968uint16x8x3_t test_vld3q_u16(uint16_t const * a) { 5969 return vld3q_u16(a); 5970} 5971 5972// CHECK-LABEL: define void @test_vld3q_u32(%struct.uint32x4x3_t* noalias sret %agg.result, i32* %a) #0 { 5973// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16 5974// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8* 5975// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 5976// CHECK: [[VLD3Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32.p0i8(i8* [[TMP1]], i32 4) 5977// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }* 5978// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3Q_V]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP2]] 5979// CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x4x3_t* %agg.result to i8* 5980// CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8* 5981// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 48, i32 16, i1 false) 5982// CHECK: ret void 5983uint32x4x3_t test_vld3q_u32(uint32_t const * a) { 5984 return vld3q_u32(a); 5985} 5986 5987// CHECK-LABEL: define void @test_vld3q_s8(%struct.int8x16x3_t* noalias sret %agg.result, i8* %a) #0 { 5988// CHECK: [[__RET:%.*]] = alloca %struct.int8x16x3_t, align 16 5989// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8* 5990// CHECK: [[VLD3Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8.p0i8(i8* %a, i32 1) 5991// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }* 5992// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3Q_V]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] 5993// CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x3_t* %agg.result to i8* 5994// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8* 5995// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 48, i32 16, i1 false) 5996// CHECK: ret void 5997int8x16x3_t test_vld3q_s8(int8_t const * a) { 5998 return vld3q_s8(a); 5999} 6000 6001// CHECK-LABEL: define void @test_vld3q_s16(%struct.int16x8x3_t* noalias sret %agg.result, i16* %a) #0 { 6002// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16 6003// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8* 6004// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 6005// CHECK: [[VLD3Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3.v8i16.p0i8(i8* [[TMP1]], i32 2) 6006// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }* 6007// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3Q_V]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP2]] 6008// CHECK: [[TMP3:%.*]] = bitcast %struct.int16x8x3_t* %agg.result to i8* 6009// CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8* 6010// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 48, i32 16, i1 false) 6011// CHECK: ret void 6012int16x8x3_t test_vld3q_s16(int16_t const * a) { 6013 return vld3q_s16(a); 6014} 6015 6016// CHECK-LABEL: define void @test_vld3q_s32(%struct.int32x4x3_t* noalias sret %agg.result, i32* %a) #0 { 6017// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16 6018// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8* 6019// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 6020// CHECK: [[VLD3Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32.p0i8(i8* [[TMP1]], i32 4) 6021// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }* 6022// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3Q_V]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP2]] 6023// CHECK: [[TMP3:%.*]] = bitcast %struct.int32x4x3_t* %agg.result to i8* 6024// CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8* 6025// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 48, i32 16, i1 false) 6026// CHECK: ret void 6027int32x4x3_t test_vld3q_s32(int32_t const * a) { 6028 return vld3q_s32(a); 6029} 6030 6031// CHECK-LABEL: define void @test_vld3q_f16(%struct.float16x8x3_t* noalias sret %agg.result, half* %a) #0 { 6032// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16 6033// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8* 6034// CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* 6035// CHECK: [[VLD3Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3.v8i16.p0i8(i8* [[TMP1]], i32 2) 6036// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }* 6037// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3Q_V]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP2]] 6038// CHECK: [[TMP3:%.*]] = bitcast %struct.float16x8x3_t* %agg.result to i8* 6039// CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8* 6040// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 48, i32 16, i1 false) 6041// CHECK: ret void 6042float16x8x3_t test_vld3q_f16(float16_t const * a) { 6043 return vld3q_f16(a); 6044} 6045 6046// CHECK-LABEL: define void @test_vld3q_f32(%struct.float32x4x3_t* noalias sret %agg.result, float* %a) #0 { 6047// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16 6048// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8* 6049// CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* 6050// CHECK: [[VLD3Q_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3.v4f32.p0i8(i8* [[TMP1]], i32 4) 6051// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float> }* 6052// CHECK: store { <4 x float>, <4 x float>, <4 x float> } [[VLD3Q_V]], { <4 x float>, <4 x float>, <4 x float> }* [[TMP2]] 6053// CHECK: [[TMP3:%.*]] = bitcast %struct.float32x4x3_t* %agg.result to i8* 6054// CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8* 6055// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 48, i32 16, i1 false) 6056// CHECK: ret void 6057float32x4x3_t test_vld3q_f32(float32_t const * a) { 6058 return vld3q_f32(a); 6059} 6060 6061// CHECK-LABEL: define void @test_vld3q_p8(%struct.poly8x16x3_t* noalias sret %agg.result, i8* %a) #0 { 6062// CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align 16 6063// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8* 6064// CHECK: [[VLD3Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8.p0i8(i8* %a, i32 1) 6065// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }* 6066// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3Q_V]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] 6067// CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x3_t* %agg.result to i8* 6068// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8* 6069// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 48, i32 16, i1 false) 6070// CHECK: ret void 6071poly8x16x3_t test_vld3q_p8(poly8_t const * a) { 6072 return vld3q_p8(a); 6073} 6074 6075// CHECK-LABEL: define void @test_vld3q_p16(%struct.poly16x8x3_t* noalias sret %agg.result, i16* %a) #0 { 6076// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16 6077// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8* 6078// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 6079// CHECK: [[VLD3Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3.v8i16.p0i8(i8* [[TMP1]], i32 2) 6080// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }* 6081// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3Q_V]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP2]] 6082// CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x8x3_t* %agg.result to i8* 6083// CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8* 6084// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 48, i32 16, i1 false) 6085// CHECK: ret void 6086poly16x8x3_t test_vld3q_p16(poly16_t const * a) { 6087 return vld3q_p16(a); 6088} 6089 6090// CHECK-LABEL: define void @test_vld3_u8(%struct.uint8x8x3_t* noalias sret %agg.result, i8* %a) #0 { 6091// CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8 6092// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8* 6093// CHECK: [[VLD3_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8.p0i8(i8* %a, i32 1) 6094// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }* 6095// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_V]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] 6096// CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* %agg.result to i8* 6097// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8* 6098// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 24, i32 8, i1 false) 6099// CHECK: ret void 6100uint8x8x3_t test_vld3_u8(uint8_t const * a) { 6101 return vld3_u8(a); 6102} 6103 6104// CHECK-LABEL: define void @test_vld3_u16(%struct.uint16x4x3_t* noalias sret %agg.result, i16* %a) #0 { 6105// CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8 6106// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8* 6107// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 6108// CHECK: [[VLD3_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16.p0i8(i8* [[TMP1]], i32 2) 6109// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* 6110// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_V]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP2]] 6111// CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x4x3_t* %agg.result to i8* 6112// CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8* 6113// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 24, i32 8, i1 false) 6114// CHECK: ret void 6115uint16x4x3_t test_vld3_u16(uint16_t const * a) { 6116 return vld3_u16(a); 6117} 6118 6119// CHECK-LABEL: define void @test_vld3_u32(%struct.uint32x2x3_t* noalias sret %agg.result, i32* %a) #0 { 6120// CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8 6121// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8* 6122// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 6123// CHECK: [[VLD3_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3.v2i32.p0i8(i8* [[TMP1]], i32 4) 6124// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }* 6125// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_V]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP2]] 6126// CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x2x3_t* %agg.result to i8* 6127// CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8* 6128// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 24, i32 8, i1 false) 6129// CHECK: ret void 6130uint32x2x3_t test_vld3_u32(uint32_t const * a) { 6131 return vld3_u32(a); 6132} 6133 6134// CHECK-LABEL: define void @test_vld3_u64(%struct.uint64x1x3_t* noalias sret %agg.result, i64* %a) #0 { 6135// CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8 6136// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8* 6137// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 6138// CHECK: [[VLD3_V:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64.p0i8(i8* [[TMP1]], i32 4) 6139// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }* 6140// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3_V]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP2]] 6141// CHECK: [[TMP3:%.*]] = bitcast %struct.uint64x1x3_t* %agg.result to i8* 6142// CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8* 6143// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 24, i32 8, i1 false) 6144// CHECK: ret void 6145uint64x1x3_t test_vld3_u64(uint64_t const * a) { 6146 return vld3_u64(a); 6147} 6148 6149// CHECK-LABEL: define void @test_vld3_s8(%struct.int8x8x3_t* noalias sret %agg.result, i8* %a) #0 { 6150// CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8 6151// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8* 6152// CHECK: [[VLD3_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8.p0i8(i8* %a, i32 1) 6153// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }* 6154// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_V]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] 6155// CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* %agg.result to i8* 6156// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8* 6157// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 24, i32 8, i1 false) 6158// CHECK: ret void 6159int8x8x3_t test_vld3_s8(int8_t const * a) { 6160 return vld3_s8(a); 6161} 6162 6163// CHECK-LABEL: define void @test_vld3_s16(%struct.int16x4x3_t* noalias sret %agg.result, i16* %a) #0 { 6164// CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8 6165// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8* 6166// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 6167// CHECK: [[VLD3_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16.p0i8(i8* [[TMP1]], i32 2) 6168// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* 6169// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_V]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP2]] 6170// CHECK: [[TMP3:%.*]] = bitcast %struct.int16x4x3_t* %agg.result to i8* 6171// CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8* 6172// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 24, i32 8, i1 false) 6173// CHECK: ret void 6174int16x4x3_t test_vld3_s16(int16_t const * a) { 6175 return vld3_s16(a); 6176} 6177 6178// CHECK-LABEL: define void @test_vld3_s32(%struct.int32x2x3_t* noalias sret %agg.result, i32* %a) #0 { 6179// CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8 6180// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8* 6181// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 6182// CHECK: [[VLD3_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3.v2i32.p0i8(i8* [[TMP1]], i32 4) 6183// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }* 6184// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_V]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP2]] 6185// CHECK: [[TMP3:%.*]] = bitcast %struct.int32x2x3_t* %agg.result to i8* 6186// CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8* 6187// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 24, i32 8, i1 false) 6188// CHECK: ret void 6189int32x2x3_t test_vld3_s32(int32_t const * a) { 6190 return vld3_s32(a); 6191} 6192 6193// CHECK-LABEL: define void @test_vld3_s64(%struct.int64x1x3_t* noalias sret %agg.result, i64* %a) #0 { 6194// CHECK: [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8 6195// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8* 6196// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 6197// CHECK: [[VLD3_V:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64.p0i8(i8* [[TMP1]], i32 4) 6198// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }* 6199// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3_V]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP2]] 6200// CHECK: [[TMP3:%.*]] = bitcast %struct.int64x1x3_t* %agg.result to i8* 6201// CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8* 6202// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 24, i32 8, i1 false) 6203// CHECK: ret void 6204int64x1x3_t test_vld3_s64(int64_t const * a) { 6205 return vld3_s64(a); 6206} 6207 6208// CHECK-LABEL: define void @test_vld3_f16(%struct.float16x4x3_t* noalias sret %agg.result, half* %a) #0 { 6209// CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8 6210// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8* 6211// CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* 6212// CHECK: [[VLD3_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16.p0i8(i8* [[TMP1]], i32 2) 6213// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* 6214// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_V]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP2]] 6215// CHECK: [[TMP3:%.*]] = bitcast %struct.float16x4x3_t* %agg.result to i8* 6216// CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8* 6217// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 24, i32 8, i1 false) 6218// CHECK: ret void 6219float16x4x3_t test_vld3_f16(float16_t const * a) { 6220 return vld3_f16(a); 6221} 6222 6223// CHECK-LABEL: define void @test_vld3_f32(%struct.float32x2x3_t* noalias sret %agg.result, float* %a) #0 { 6224// CHECK: [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8 6225// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8* 6226// CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* 6227// CHECK: [[VLD3_V:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3.v2f32.p0i8(i8* [[TMP1]], i32 4) 6228// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float> }* 6229// CHECK: store { <2 x float>, <2 x float>, <2 x float> } [[VLD3_V]], { <2 x float>, <2 x float>, <2 x float> }* [[TMP2]] 6230// CHECK: [[TMP3:%.*]] = bitcast %struct.float32x2x3_t* %agg.result to i8* 6231// CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8* 6232// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 24, i32 8, i1 false) 6233// CHECK: ret void 6234float32x2x3_t test_vld3_f32(float32_t const * a) { 6235 return vld3_f32(a); 6236} 6237 6238// CHECK-LABEL: define void @test_vld3_p8(%struct.poly8x8x3_t* noalias sret %agg.result, i8* %a) #0 { 6239// CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8 6240// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8* 6241// CHECK: [[VLD3_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8.p0i8(i8* %a, i32 1) 6242// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }* 6243// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_V]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] 6244// CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* %agg.result to i8* 6245// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8* 6246// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 24, i32 8, i1 false) 6247// CHECK: ret void 6248poly8x8x3_t test_vld3_p8(poly8_t const * a) { 6249 return vld3_p8(a); 6250} 6251 6252// CHECK-LABEL: define void @test_vld3_p16(%struct.poly16x4x3_t* noalias sret %agg.result, i16* %a) #0 { 6253// CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8 6254// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8* 6255// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 6256// CHECK: [[VLD3_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16.p0i8(i8* [[TMP1]], i32 2) 6257// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* 6258// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_V]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP2]] 6259// CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x4x3_t* %agg.result to i8* 6260// CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8* 6261// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 24, i32 8, i1 false) 6262// CHECK: ret void 6263poly16x4x3_t test_vld3_p16(poly16_t const * a) { 6264 return vld3_p16(a); 6265} 6266 6267 6268// CHECK-LABEL: define void @test_vld3_dup_u8(%struct.uint8x8x3_t* noalias sret %agg.result, i8* %a) #0 { 6269// CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8 6270// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8* 6271// CHECK: [[VLD_DUP:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8.p0i8(i8* %a, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1) 6272// CHECK: [[TMP1:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD_DUP]], 0 6273// CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer 6274// CHECK: [[TMP2:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD_DUP]], <8 x i8> [[LANE]], 0 6275// CHECK: [[TMP3:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP2]], 1 6276// CHECK: [[LANE1:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP3]], <8 x i32> zeroinitializer 6277// CHECK: [[TMP4:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP2]], <8 x i8> [[LANE1]], 1 6278// CHECK: [[TMP5:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 2 6279// CHECK: [[LANE2:%.*]] = shufflevector <8 x i8> [[TMP5]], <8 x i8> [[TMP5]], <8 x i32> zeroinitializer 6280// CHECK: [[TMP6:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], <8 x i8> [[LANE2]], 2 6281// CHECK: [[TMP7:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }* 6282// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP6]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP7]] 6283// CHECK: [[TMP8:%.*]] = bitcast %struct.uint8x8x3_t* %agg.result to i8* 6284// CHECK: [[TMP9:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8* 6285// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP8]], i8* [[TMP9]], i32 24, i32 8, i1 false) 6286// CHECK: ret void 6287uint8x8x3_t test_vld3_dup_u8(uint8_t const * a) { 6288 return vld3_dup_u8(a); 6289} 6290 6291// CHECK-LABEL: define void @test_vld3_dup_u16(%struct.uint16x4x3_t* noalias sret %agg.result, i16* %a) #0 { 6292// CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8 6293// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8* 6294// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 6295// CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16.p0i8(i8* [[TMP1]], <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) 6296// CHECK: [[TMP2:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], 0 6297// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> zeroinitializer 6298// CHECK: [[TMP3:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], <4 x i16> [[LANE]], 0 6299// CHECK: [[TMP4:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], 1 6300// CHECK: [[LANE1:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP4]], <4 x i32> zeroinitializer 6301// CHECK: [[TMP5:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], <4 x i16> [[LANE1]], 1 6302// CHECK: [[TMP6:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], 2 6303// CHECK: [[LANE2:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> [[TMP6]], <4 x i32> zeroinitializer 6304// CHECK: [[TMP7:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], <4 x i16> [[LANE2]], 2 6305// CHECK: [[TMP8:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* 6306// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP7]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP8]] 6307// CHECK: [[TMP9:%.*]] = bitcast %struct.uint16x4x3_t* %agg.result to i8* 6308// CHECK: [[TMP10:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8* 6309// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP9]], i8* [[TMP10]], i32 24, i32 8, i1 false) 6310// CHECK: ret void 6311uint16x4x3_t test_vld3_dup_u16(uint16_t const * a) { 6312 return vld3_dup_u16(a); 6313} 6314 6315// CHECK-LABEL: define void @test_vld3_dup_u32(%struct.uint32x2x3_t* noalias sret %agg.result, i32* %a) #0 { 6316// CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8 6317// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8* 6318// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 6319// CHECK: [[VLD_DUP:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3lane.v2i32.p0i8(i8* [[TMP1]], <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 4) 6320// CHECK: [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD_DUP]], 0 6321// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP2]], <2 x i32> zeroinitializer 6322// CHECK: [[TMP3:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD_DUP]], <2 x i32> [[LANE]], 0 6323// CHECK: [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP3]], 1 6324// CHECK: [[LANE1:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP4]], <2 x i32> zeroinitializer 6325// CHECK: [[TMP5:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP3]], <2 x i32> [[LANE1]], 1 6326// CHECK: [[TMP6:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP5]], 2 6327// CHECK: [[LANE2:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> [[TMP6]], <2 x i32> zeroinitializer 6328// CHECK: [[TMP7:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP5]], <2 x i32> [[LANE2]], 2 6329// CHECK: [[TMP8:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }* 6330// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP7]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP8]] 6331// CHECK: [[TMP9:%.*]] = bitcast %struct.uint32x2x3_t* %agg.result to i8* 6332// CHECK: [[TMP10:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8* 6333// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP9]], i8* [[TMP10]], i32 24, i32 8, i1 false) 6334// CHECK: ret void 6335uint32x2x3_t test_vld3_dup_u32(uint32_t const * a) { 6336 return vld3_dup_u32(a); 6337} 6338 6339// CHECK-LABEL: define void @test_vld3_dup_u64(%struct.uint64x1x3_t* noalias sret %agg.result, i64* %a) #0 { 6340// CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8 6341// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8* 6342// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 6343// CHECK: [[VLD_DUP:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64.p0i8(i8* [[TMP1]], i32 4) 6344// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }* 6345// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD_DUP]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP2]] 6346// CHECK: [[TMP3:%.*]] = bitcast %struct.uint64x1x3_t* %agg.result to i8* 6347// CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8* 6348// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 24, i32 8, i1 false) 6349// CHECK: ret void 6350uint64x1x3_t test_vld3_dup_u64(uint64_t const * a) { 6351 return vld3_dup_u64(a); 6352} 6353 6354// CHECK-LABEL: define void @test_vld3_dup_s8(%struct.int8x8x3_t* noalias sret %agg.result, i8* %a) #0 { 6355// CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8 6356// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8* 6357// CHECK: [[VLD_DUP:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8.p0i8(i8* %a, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1) 6358// CHECK: [[TMP1:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD_DUP]], 0 6359// CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer 6360// CHECK: [[TMP2:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD_DUP]], <8 x i8> [[LANE]], 0 6361// CHECK: [[TMP3:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP2]], 1 6362// CHECK: [[LANE1:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP3]], <8 x i32> zeroinitializer 6363// CHECK: [[TMP4:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP2]], <8 x i8> [[LANE1]], 1 6364// CHECK: [[TMP5:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 2 6365// CHECK: [[LANE2:%.*]] = shufflevector <8 x i8> [[TMP5]], <8 x i8> [[TMP5]], <8 x i32> zeroinitializer 6366// CHECK: [[TMP6:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], <8 x i8> [[LANE2]], 2 6367// CHECK: [[TMP7:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }* 6368// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP6]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP7]] 6369// CHECK: [[TMP8:%.*]] = bitcast %struct.int8x8x3_t* %agg.result to i8* 6370// CHECK: [[TMP9:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8* 6371// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP8]], i8* [[TMP9]], i32 24, i32 8, i1 false) 6372// CHECK: ret void 6373int8x8x3_t test_vld3_dup_s8(int8_t const * a) { 6374 return vld3_dup_s8(a); 6375} 6376 6377// CHECK-LABEL: define void @test_vld3_dup_s16(%struct.int16x4x3_t* noalias sret %agg.result, i16* %a) #0 { 6378// CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8 6379// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8* 6380// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 6381// CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16.p0i8(i8* [[TMP1]], <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) 6382// CHECK: [[TMP2:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], 0 6383// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> zeroinitializer 6384// CHECK: [[TMP3:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], <4 x i16> [[LANE]], 0 6385// CHECK: [[TMP4:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], 1 6386// CHECK: [[LANE1:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP4]], <4 x i32> zeroinitializer 6387// CHECK: [[TMP5:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], <4 x i16> [[LANE1]], 1 6388// CHECK: [[TMP6:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], 2 6389// CHECK: [[LANE2:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> [[TMP6]], <4 x i32> zeroinitializer 6390// CHECK: [[TMP7:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], <4 x i16> [[LANE2]], 2 6391// CHECK: [[TMP8:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* 6392// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP7]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP8]] 6393// CHECK: [[TMP9:%.*]] = bitcast %struct.int16x4x3_t* %agg.result to i8* 6394// CHECK: [[TMP10:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8* 6395// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP9]], i8* [[TMP10]], i32 24, i32 8, i1 false) 6396// CHECK: ret void 6397int16x4x3_t test_vld3_dup_s16(int16_t const * a) { 6398 return vld3_dup_s16(a); 6399} 6400 6401// CHECK-LABEL: define void @test_vld3_dup_s32(%struct.int32x2x3_t* noalias sret %agg.result, i32* %a) #0 { 6402// CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8 6403// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8* 6404// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 6405// CHECK: [[VLD_DUP:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3lane.v2i32.p0i8(i8* [[TMP1]], <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 4) 6406// CHECK: [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD_DUP]], 0 6407// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP2]], <2 x i32> zeroinitializer 6408// CHECK: [[TMP3:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD_DUP]], <2 x i32> [[LANE]], 0 6409// CHECK: [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP3]], 1 6410// CHECK: [[LANE1:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP4]], <2 x i32> zeroinitializer 6411// CHECK: [[TMP5:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP3]], <2 x i32> [[LANE1]], 1 6412// CHECK: [[TMP6:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP5]], 2 6413// CHECK: [[LANE2:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> [[TMP6]], <2 x i32> zeroinitializer 6414// CHECK: [[TMP7:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP5]], <2 x i32> [[LANE2]], 2 6415// CHECK: [[TMP8:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }* 6416// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP7]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP8]] 6417// CHECK: [[TMP9:%.*]] = bitcast %struct.int32x2x3_t* %agg.result to i8* 6418// CHECK: [[TMP10:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8* 6419// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP9]], i8* [[TMP10]], i32 24, i32 8, i1 false) 6420// CHECK: ret void 6421int32x2x3_t test_vld3_dup_s32(int32_t const * a) { 6422 return vld3_dup_s32(a); 6423} 6424 6425// CHECK-LABEL: define void @test_vld3_dup_s64(%struct.int64x1x3_t* noalias sret %agg.result, i64* %a) #0 { 6426// CHECK: [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8 6427// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8* 6428// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 6429// CHECK: [[VLD_DUP:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64.p0i8(i8* [[TMP1]], i32 4) 6430// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }* 6431// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD_DUP]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP2]] 6432// CHECK: [[TMP3:%.*]] = bitcast %struct.int64x1x3_t* %agg.result to i8* 6433// CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8* 6434// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 24, i32 8, i1 false) 6435// CHECK: ret void 6436int64x1x3_t test_vld3_dup_s64(int64_t const * a) { 6437 return vld3_dup_s64(a); 6438} 6439 6440// CHECK-LABEL: define void @test_vld3_dup_f16(%struct.float16x4x3_t* noalias sret %agg.result, half* %a) #0 { 6441// CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8 6442// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8* 6443// CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* 6444// CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16.p0i8(i8* [[TMP1]], <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) 6445// CHECK: [[TMP2:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], 0 6446// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> zeroinitializer 6447// CHECK: [[TMP3:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], <4 x i16> [[LANE]], 0 6448// CHECK: [[TMP4:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], 1 6449// CHECK: [[LANE1:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP4]], <4 x i32> zeroinitializer 6450// CHECK: [[TMP5:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], <4 x i16> [[LANE1]], 1 6451// CHECK: [[TMP6:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], 2 6452// CHECK: [[LANE2:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> [[TMP6]], <4 x i32> zeroinitializer 6453// CHECK: [[TMP7:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], <4 x i16> [[LANE2]], 2 6454// CHECK: [[TMP8:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* 6455// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP7]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP8]] 6456// CHECK: [[TMP9:%.*]] = bitcast %struct.float16x4x3_t* %agg.result to i8* 6457// CHECK: [[TMP10:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8* 6458// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP9]], i8* [[TMP10]], i32 24, i32 8, i1 false) 6459// CHECK: ret void 6460float16x4x3_t test_vld3_dup_f16(float16_t const * a) { 6461 return vld3_dup_f16(a); 6462} 6463 6464// CHECK-LABEL: define void @test_vld3_dup_f32(%struct.float32x2x3_t* noalias sret %agg.result, float* %a) #0 { 6465// CHECK: [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8 6466// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8* 6467// CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* 6468// CHECK: [[VLD_DUP:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3lane.v2f32.p0i8(i8* [[TMP1]], <2 x float> undef, <2 x float> undef, <2 x float> undef, i32 0, i32 4) 6469// CHECK: [[TMP2:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float> } [[VLD_DUP]], 0 6470// CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP2]], <2 x i32> zeroinitializer 6471// CHECK: [[TMP3:%.*]] = insertvalue { <2 x float>, <2 x float>, <2 x float> } [[VLD_DUP]], <2 x float> [[LANE]], 0 6472// CHECK: [[TMP4:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float> } [[TMP3]], 1 6473// CHECK: [[LANE1:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP4]], <2 x i32> zeroinitializer 6474// CHECK: [[TMP5:%.*]] = insertvalue { <2 x float>, <2 x float>, <2 x float> } [[TMP3]], <2 x float> [[LANE1]], 1 6475// CHECK: [[TMP6:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float> } [[TMP5]], 2 6476// CHECK: [[LANE2:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> [[TMP6]], <2 x i32> zeroinitializer 6477// CHECK: [[TMP7:%.*]] = insertvalue { <2 x float>, <2 x float>, <2 x float> } [[TMP5]], <2 x float> [[LANE2]], 2 6478// CHECK: [[TMP8:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float> }* 6479// CHECK: store { <2 x float>, <2 x float>, <2 x float> } [[TMP7]], { <2 x float>, <2 x float>, <2 x float> }* [[TMP8]] 6480// CHECK: [[TMP9:%.*]] = bitcast %struct.float32x2x3_t* %agg.result to i8* 6481// CHECK: [[TMP10:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8* 6482// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP9]], i8* [[TMP10]], i32 24, i32 8, i1 false) 6483// CHECK: ret void 6484float32x2x3_t test_vld3_dup_f32(float32_t const * a) { 6485 return vld3_dup_f32(a); 6486} 6487 6488// CHECK-LABEL: define void @test_vld3_dup_p8(%struct.poly8x8x3_t* noalias sret %agg.result, i8* %a) #0 { 6489// CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8 6490// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8* 6491// CHECK: [[VLD_DUP:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8.p0i8(i8* %a, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1) 6492// CHECK: [[TMP1:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD_DUP]], 0 6493// CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer 6494// CHECK: [[TMP2:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD_DUP]], <8 x i8> [[LANE]], 0 6495// CHECK: [[TMP3:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP2]], 1 6496// CHECK: [[LANE1:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP3]], <8 x i32> zeroinitializer 6497// CHECK: [[TMP4:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP2]], <8 x i8> [[LANE1]], 1 6498// CHECK: [[TMP5:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 2 6499// CHECK: [[LANE2:%.*]] = shufflevector <8 x i8> [[TMP5]], <8 x i8> [[TMP5]], <8 x i32> zeroinitializer 6500// CHECK: [[TMP6:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], <8 x i8> [[LANE2]], 2 6501// CHECK: [[TMP7:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }* 6502// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP6]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP7]] 6503// CHECK: [[TMP8:%.*]] = bitcast %struct.poly8x8x3_t* %agg.result to i8* 6504// CHECK: [[TMP9:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8* 6505// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP8]], i8* [[TMP9]], i32 24, i32 8, i1 false) 6506// CHECK: ret void 6507poly8x8x3_t test_vld3_dup_p8(poly8_t const * a) { 6508 return vld3_dup_p8(a); 6509} 6510 6511// CHECK-LABEL: define void @test_vld3_dup_p16(%struct.poly16x4x3_t* noalias sret %agg.result, i16* %a) #0 { 6512// CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8 6513// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8* 6514// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 6515// CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16.p0i8(i8* [[TMP1]], <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) 6516// CHECK: [[TMP2:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], 0 6517// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> zeroinitializer 6518// CHECK: [[TMP3:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], <4 x i16> [[LANE]], 0 6519// CHECK: [[TMP4:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], 1 6520// CHECK: [[LANE1:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP4]], <4 x i32> zeroinitializer 6521// CHECK: [[TMP5:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], <4 x i16> [[LANE1]], 1 6522// CHECK: [[TMP6:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], 2 6523// CHECK: [[LANE2:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> [[TMP6]], <4 x i32> zeroinitializer 6524// CHECK: [[TMP7:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], <4 x i16> [[LANE2]], 2 6525// CHECK: [[TMP8:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* 6526// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP7]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP8]] 6527// CHECK: [[TMP9:%.*]] = bitcast %struct.poly16x4x3_t* %agg.result to i8* 6528// CHECK: [[TMP10:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8* 6529// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP9]], i8* [[TMP10]], i32 24, i32 8, i1 false) 6530// CHECK: ret void 6531poly16x4x3_t test_vld3_dup_p16(poly16_t const * a) { 6532 return vld3_dup_p16(a); 6533} 6534 6535 6536// CHECK-LABEL: define void @test_vld3q_lane_u16(%struct.uint16x8x3_t* noalias sret %agg.result, i16* %a, [6 x i64] %b.coerce) #0 { 6537// CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16 6538// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16 6539// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16 6540// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0 6541// CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]* 6542// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 6543// CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8* 6544// CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8* 6545// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) 6546// CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8* 6547// CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* 6548// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 6549// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0 6550// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 6551// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 6552// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 6553// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1 6554// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 6555// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 6556// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 6557// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2 6558// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 6559// CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8> 6560// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 6561// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 6562// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> 6563// CHECK: [[VLD3Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16.p0i8(i8* [[TMP4]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], i32 7, i32 2) 6564// CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <8 x i16>, <8 x i16>, <8 x i16> }* 6565// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3Q_LANE_V]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP14]] 6566// CHECK: [[TMP15:%.*]] = bitcast %struct.uint16x8x3_t* %agg.result to i8* 6567// CHECK: [[TMP16:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8* 6568// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 48, i32 16, i1 false) 6569// CHECK: ret void 6570uint16x8x3_t test_vld3q_lane_u16(uint16_t const * a, uint16x8x3_t b) { 6571 return vld3q_lane_u16(a, b, 7); 6572} 6573 6574// CHECK-LABEL: define void @test_vld3q_lane_u32(%struct.uint32x4x3_t* noalias sret %agg.result, i32* %a, [6 x i64] %b.coerce) #0 { 6575// CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16 6576// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16 6577// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16 6578// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0 6579// CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]* 6580// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 6581// CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8* 6582// CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8* 6583// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) 6584// CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8* 6585// CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8* 6586// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 6587// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0 6588// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 6589// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 6590// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 6591// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1 6592// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 6593// CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> 6594// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 6595// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2 6596// CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 6597// CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8> 6598// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 6599// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> 6600// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32> 6601// CHECK: [[VLD3Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3lane.v4i32.p0i8(i8* [[TMP4]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], i32 3, i32 4) 6602// CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <4 x i32>, <4 x i32>, <4 x i32> }* 6603// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3Q_LANE_V]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP14]] 6604// CHECK: [[TMP15:%.*]] = bitcast %struct.uint32x4x3_t* %agg.result to i8* 6605// CHECK: [[TMP16:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8* 6606// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 48, i32 16, i1 false) 6607// CHECK: ret void 6608uint32x4x3_t test_vld3q_lane_u32(uint32_t const * a, uint32x4x3_t b) { 6609 return vld3q_lane_u32(a, b, 3); 6610} 6611 6612// CHECK-LABEL: define void @test_vld3q_lane_s16(%struct.int16x8x3_t* noalias sret %agg.result, i16* %a, [6 x i64] %b.coerce) #0 { 6613// CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16 6614// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16 6615// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16 6616// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0 6617// CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]* 6618// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 6619// CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8* 6620// CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8* 6621// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) 6622// CHECK: [[TMP3:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8* 6623// CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* 6624// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 6625// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0 6626// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 6627// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 6628// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 6629// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1 6630// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 6631// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 6632// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 6633// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2 6634// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 6635// CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8> 6636// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 6637// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 6638// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> 6639// CHECK: [[VLD3Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16.p0i8(i8* [[TMP4]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], i32 7, i32 2) 6640// CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <8 x i16>, <8 x i16>, <8 x i16> }* 6641// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3Q_LANE_V]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP14]] 6642// CHECK: [[TMP15:%.*]] = bitcast %struct.int16x8x3_t* %agg.result to i8* 6643// CHECK: [[TMP16:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8* 6644// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 48, i32 16, i1 false) 6645// CHECK: ret void 6646int16x8x3_t test_vld3q_lane_s16(int16_t const * a, int16x8x3_t b) { 6647 return vld3q_lane_s16(a, b, 7); 6648} 6649 6650// CHECK-LABEL: define void @test_vld3q_lane_s32(%struct.int32x4x3_t* noalias sret %agg.result, i32* %a, [6 x i64] %b.coerce) #0 { 6651// CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16 6652// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16 6653// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16 6654// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0 6655// CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]* 6656// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 6657// CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8* 6658// CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8* 6659// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) 6660// CHECK: [[TMP3:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8* 6661// CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8* 6662// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 6663// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0 6664// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 6665// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 6666// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 6667// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1 6668// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 6669// CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> 6670// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 6671// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2 6672// CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 6673// CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8> 6674// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 6675// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> 6676// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32> 6677// CHECK: [[VLD3Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3lane.v4i32.p0i8(i8* [[TMP4]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], i32 3, i32 4) 6678// CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <4 x i32>, <4 x i32>, <4 x i32> }* 6679// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3Q_LANE_V]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP14]] 6680// CHECK: [[TMP15:%.*]] = bitcast %struct.int32x4x3_t* %agg.result to i8* 6681// CHECK: [[TMP16:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8* 6682// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 48, i32 16, i1 false) 6683// CHECK: ret void 6684int32x4x3_t test_vld3q_lane_s32(int32_t const * a, int32x4x3_t b) { 6685 return vld3q_lane_s32(a, b, 3); 6686} 6687 6688// CHECK-LABEL: define void @test_vld3q_lane_f16(%struct.float16x8x3_t* noalias sret %agg.result, half* %a, [6 x i64] %b.coerce) #0 { 6689// CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16 6690// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16 6691// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16 6692// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0 6693// CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x half>]* [[COERCE_DIVE]] to [6 x i64]* 6694// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 6695// CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8* 6696// CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8* 6697// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) 6698// CHECK: [[TMP3:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8* 6699// CHECK: [[TMP4:%.*]] = bitcast half* %a to i8* 6700// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 6701// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i32 0, i32 0 6702// CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 6703// CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> 6704// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 6705// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL1]], i32 0, i32 1 6706// CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 6707// CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8> 6708// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 6709// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i32 0, i32 2 6710// CHECK: [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16 6711// CHECK: [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8> 6712// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 6713// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 6714// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> 6715// CHECK: [[VLD3Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16.p0i8(i8* [[TMP4]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], i32 7, i32 2) 6716// CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <8 x i16>, <8 x i16>, <8 x i16> }* 6717// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3Q_LANE_V]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP14]] 6718// CHECK: [[TMP15:%.*]] = bitcast %struct.float16x8x3_t* %agg.result to i8* 6719// CHECK: [[TMP16:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8* 6720// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 48, i32 16, i1 false) 6721// CHECK: ret void 6722float16x8x3_t test_vld3q_lane_f16(float16_t const * a, float16x8x3_t b) { 6723 return vld3q_lane_f16(a, b, 7); 6724} 6725 6726// CHECK-LABEL: define void @test_vld3q_lane_f32(%struct.float32x4x3_t* noalias sret %agg.result, float* %a, [6 x i64] %b.coerce) #0 { 6727// CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16 6728// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16 6729// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16 6730// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0 6731// CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x float>]* [[COERCE_DIVE]] to [6 x i64]* 6732// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 6733// CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8* 6734// CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8* 6735// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) 6736// CHECK: [[TMP3:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8* 6737// CHECK: [[TMP4:%.*]] = bitcast float* %a to i8* 6738// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 6739// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i32 0, i32 0 6740// CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 6741// CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8> 6742// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 6743// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL1]], i32 0, i32 1 6744// CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 6745// CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8> 6746// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 6747// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL3]], i32 0, i32 2 6748// CHECK: [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16 6749// CHECK: [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8> 6750// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float> 6751// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float> 6752// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float> 6753// CHECK: [[VLD3Q_LANE_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3lane.v4f32.p0i8(i8* [[TMP4]], <4 x float> [[TMP11]], <4 x float> [[TMP12]], <4 x float> [[TMP13]], i32 3, i32 4) 6754// CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <4 x float>, <4 x float>, <4 x float> }* 6755// CHECK: store { <4 x float>, <4 x float>, <4 x float> } [[VLD3Q_LANE_V]], { <4 x float>, <4 x float>, <4 x float> }* [[TMP14]] 6756// CHECK: [[TMP15:%.*]] = bitcast %struct.float32x4x3_t* %agg.result to i8* 6757// CHECK: [[TMP16:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8* 6758// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 48, i32 16, i1 false) 6759// CHECK: ret void 6760float32x4x3_t test_vld3q_lane_f32(float32_t const * a, float32x4x3_t b) { 6761 return vld3q_lane_f32(a, b, 3); 6762} 6763 6764// CHECK-LABEL: define void @test_vld3q_lane_p16(%struct.poly16x8x3_t* noalias sret %agg.result, i16* %a, [6 x i64] %b.coerce) #0 { 6765// CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16 6766// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16 6767// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16 6768// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0 6769// CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]* 6770// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 6771// CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8* 6772// CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8* 6773// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) 6774// CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8* 6775// CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* 6776// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 6777// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0 6778// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 6779// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 6780// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 6781// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1 6782// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 6783// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 6784// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 6785// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2 6786// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 6787// CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8> 6788// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 6789// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 6790// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> 6791// CHECK: [[VLD3Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16.p0i8(i8* [[TMP4]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], i32 7, i32 2) 6792// CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <8 x i16>, <8 x i16>, <8 x i16> }* 6793// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3Q_LANE_V]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP14]] 6794// CHECK: [[TMP15:%.*]] = bitcast %struct.poly16x8x3_t* %agg.result to i8* 6795// CHECK: [[TMP16:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8* 6796// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 48, i32 16, i1 false) 6797// CHECK: ret void 6798poly16x8x3_t test_vld3q_lane_p16(poly16_t const * a, poly16x8x3_t b) { 6799 return vld3q_lane_p16(a, b, 7); 6800} 6801 6802// CHECK-LABEL: define void @test_vld3_lane_u8(%struct.uint8x8x3_t* noalias sret %agg.result, i8* %a, [3 x i64] %b.coerce) #0 { 6803// CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8 6804// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8 6805// CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8 6806// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0 6807// CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]* 6808// CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 6809// CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8* 6810// CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8* 6811// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 6812// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8* 6813// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 6814// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0 6815// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 6816// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 6817// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1 6818// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 6819// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 6820// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2 6821// CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 6822// CHECK: [[VLD3_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8.p0i8(i8* %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 7, i32 1) 6823// CHECK: [[TMP7:%.*]] = bitcast i8* [[TMP3]] to { <8 x i8>, <8 x i8>, <8 x i8> }* 6824// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE_V]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP7]] 6825// CHECK: [[TMP8:%.*]] = bitcast %struct.uint8x8x3_t* %agg.result to i8* 6826// CHECK: [[TMP9:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8* 6827// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP8]], i8* [[TMP9]], i32 24, i32 8, i1 false) 6828// CHECK: ret void 6829uint8x8x3_t test_vld3_lane_u8(uint8_t const * a, uint8x8x3_t b) { 6830 return vld3_lane_u8(a, b, 7); 6831} 6832 6833// CHECK-LABEL: define void @test_vld3_lane_u16(%struct.uint16x4x3_t* noalias sret %agg.result, i16* %a, [3 x i64] %b.coerce) #0 { 6834// CHECK: [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8 6835// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8 6836// CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8 6837// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[B]], i32 0, i32 0 6838// CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]* 6839// CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 6840// CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8* 6841// CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8* 6842// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 6843// CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8* 6844// CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* 6845// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 6846// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0 6847// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 6848// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 6849// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 6850// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1 6851// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 6852// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 6853// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 6854// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2 6855// CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 6856// CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8> 6857// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 6858// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 6859// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> 6860// CHECK: [[VLD3_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16.p0i8(i8* [[TMP4]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], i32 3, i32 2) 6861// CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <4 x i16>, <4 x i16>, <4 x i16> }* 6862// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE_V]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP14]] 6863// CHECK: [[TMP15:%.*]] = bitcast %struct.uint16x4x3_t* %agg.result to i8* 6864// CHECK: [[TMP16:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8* 6865// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 24, i32 8, i1 false) 6866// CHECK: ret void 6867uint16x4x3_t test_vld3_lane_u16(uint16_t const * a, uint16x4x3_t b) { 6868 return vld3_lane_u16(a, b, 3); 6869} 6870 6871// CHECK-LABEL: define void @test_vld3_lane_u32(%struct.uint32x2x3_t* noalias sret %agg.result, i32* %a, [3 x i64] %b.coerce) #0 { 6872// CHECK: [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8 6873// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8 6874// CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8 6875// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[B]], i32 0, i32 0 6876// CHECK: [[TMP0:%.*]] = bitcast [3 x <2 x i32>]* [[COERCE_DIVE]] to [3 x i64]* 6877// CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 6878// CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8* 6879// CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8* 6880// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 6881// CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8* 6882// CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8* 6883// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 6884// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i32 0, i32 0 6885// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 6886// CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 6887// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 6888// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i32 0, i32 1 6889// CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 6890// CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> 6891// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 6892// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i32 0, i32 2 6893// CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 6894// CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8> 6895// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 6896// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> 6897// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32> 6898// CHECK: [[VLD3_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3lane.v2i32.p0i8(i8* [[TMP4]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], i32 1, i32 4) 6899// CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <2 x i32>, <2 x i32>, <2 x i32> }* 6900// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_LANE_V]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP14]] 6901// CHECK: [[TMP15:%.*]] = bitcast %struct.uint32x2x3_t* %agg.result to i8* 6902// CHECK: [[TMP16:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8* 6903// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 24, i32 8, i1 false) 6904// CHECK: ret void 6905uint32x2x3_t test_vld3_lane_u32(uint32_t const * a, uint32x2x3_t b) { 6906 return vld3_lane_u32(a, b, 1); 6907} 6908 6909// CHECK-LABEL: define void @test_vld3_lane_s8(%struct.int8x8x3_t* noalias sret %agg.result, i8* %a, [3 x i64] %b.coerce) #0 { 6910// CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8 6911// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8 6912// CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8 6913// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0 6914// CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]* 6915// CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 6916// CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8* 6917// CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8* 6918// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 6919// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8* 6920// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 6921// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0 6922// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 6923// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 6924// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1 6925// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 6926// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 6927// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2 6928// CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 6929// CHECK: [[VLD3_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8.p0i8(i8* %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 7, i32 1) 6930// CHECK: [[TMP7:%.*]] = bitcast i8* [[TMP3]] to { <8 x i8>, <8 x i8>, <8 x i8> }* 6931// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE_V]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP7]] 6932// CHECK: [[TMP8:%.*]] = bitcast %struct.int8x8x3_t* %agg.result to i8* 6933// CHECK: [[TMP9:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8* 6934// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP8]], i8* [[TMP9]], i32 24, i32 8, i1 false) 6935// CHECK: ret void 6936int8x8x3_t test_vld3_lane_s8(int8_t const * a, int8x8x3_t b) { 6937 return vld3_lane_s8(a, b, 7); 6938} 6939 6940// CHECK-LABEL: define void @test_vld3_lane_s16(%struct.int16x4x3_t* noalias sret %agg.result, i16* %a, [3 x i64] %b.coerce) #0 { 6941// CHECK: [[B:%.*]] = alloca %struct.int16x4x3_t, align 8 6942// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8 6943// CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8 6944// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[B]], i32 0, i32 0 6945// CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]* 6946// CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 6947// CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8* 6948// CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8* 6949// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 6950// CHECK: [[TMP3:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8* 6951// CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* 6952// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 6953// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0 6954// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 6955// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 6956// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 6957// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1 6958// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 6959// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 6960// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 6961// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2 6962// CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 6963// CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8> 6964// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 6965// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 6966// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> 6967// CHECK: [[VLD3_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16.p0i8(i8* [[TMP4]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], i32 3, i32 2) 6968// CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <4 x i16>, <4 x i16>, <4 x i16> }* 6969// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE_V]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP14]] 6970// CHECK: [[TMP15:%.*]] = bitcast %struct.int16x4x3_t* %agg.result to i8* 6971// CHECK: [[TMP16:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8* 6972// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 24, i32 8, i1 false) 6973// CHECK: ret void 6974int16x4x3_t test_vld3_lane_s16(int16_t const * a, int16x4x3_t b) { 6975 return vld3_lane_s16(a, b, 3); 6976} 6977 6978// CHECK-LABEL: define void @test_vld3_lane_s32(%struct.int32x2x3_t* noalias sret %agg.result, i32* %a, [3 x i64] %b.coerce) #0 { 6979// CHECK: [[B:%.*]] = alloca %struct.int32x2x3_t, align 8 6980// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8 6981// CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8 6982// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[B]], i32 0, i32 0 6983// CHECK: [[TMP0:%.*]] = bitcast [3 x <2 x i32>]* [[COERCE_DIVE]] to [3 x i64]* 6984// CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 6985// CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8* 6986// CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8* 6987// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 6988// CHECK: [[TMP3:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8* 6989// CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8* 6990// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 6991// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i32 0, i32 0 6992// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 6993// CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 6994// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 6995// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i32 0, i32 1 6996// CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 6997// CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> 6998// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 6999// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i32 0, i32 2 7000// CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 7001// CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8> 7002// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 7003// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> 7004// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32> 7005// CHECK: [[VLD3_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3lane.v2i32.p0i8(i8* [[TMP4]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], i32 1, i32 4) 7006// CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <2 x i32>, <2 x i32>, <2 x i32> }* 7007// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_LANE_V]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP14]] 7008// CHECK: [[TMP15:%.*]] = bitcast %struct.int32x2x3_t* %agg.result to i8* 7009// CHECK: [[TMP16:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8* 7010// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 24, i32 8, i1 false) 7011// CHECK: ret void 7012int32x2x3_t test_vld3_lane_s32(int32_t const * a, int32x2x3_t b) { 7013 return vld3_lane_s32(a, b, 1); 7014} 7015 7016// CHECK-LABEL: define void @test_vld3_lane_f16(%struct.float16x4x3_t* noalias sret %agg.result, half* %a, [3 x i64] %b.coerce) #0 { 7017// CHECK: [[B:%.*]] = alloca %struct.float16x4x3_t, align 8 7018// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8 7019// CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8 7020// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[B]], i32 0, i32 0 7021// CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x half>]* [[COERCE_DIVE]] to [3 x i64]* 7022// CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 7023// CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8* 7024// CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8* 7025// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 7026// CHECK: [[TMP3:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8* 7027// CHECK: [[TMP4:%.*]] = bitcast half* %a to i8* 7028// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 7029// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], i32 0, i32 0 7030// CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8 7031// CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8> 7032// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 7033// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL1]], i32 0, i32 1 7034// CHECK: [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8 7035// CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8> 7036// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 7037// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i32 0, i32 2 7038// CHECK: [[TMP9:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8 7039// CHECK: [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8> 7040// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 7041// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 7042// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> 7043// CHECK: [[VLD3_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16.p0i8(i8* [[TMP4]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], i32 3, i32 2) 7044// CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <4 x i16>, <4 x i16>, <4 x i16> }* 7045// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE_V]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP14]] 7046// CHECK: [[TMP15:%.*]] = bitcast %struct.float16x4x3_t* %agg.result to i8* 7047// CHECK: [[TMP16:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8* 7048// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 24, i32 8, i1 false) 7049// CHECK: ret void 7050float16x4x3_t test_vld3_lane_f16(float16_t const * a, float16x4x3_t b) { 7051 return vld3_lane_f16(a, b, 3); 7052} 7053 7054// CHECK-LABEL: define void @test_vld3_lane_f32(%struct.float32x2x3_t* noalias sret %agg.result, float* %a, [3 x i64] %b.coerce) #0 { 7055// CHECK: [[B:%.*]] = alloca %struct.float32x2x3_t, align 8 7056// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8 7057// CHECK: [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8 7058// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[B]], i32 0, i32 0 7059// CHECK: [[TMP0:%.*]] = bitcast [3 x <2 x float>]* [[COERCE_DIVE]] to [3 x i64]* 7060// CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 7061// CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8* 7062// CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8* 7063// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 7064// CHECK: [[TMP3:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8* 7065// CHECK: [[TMP4:%.*]] = bitcast float* %a to i8* 7066// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 7067// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], i32 0, i32 0 7068// CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8 7069// CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8> 7070// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 7071// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL1]], i32 0, i32 1 7072// CHECK: [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8 7073// CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8> 7074// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 7075// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL3]], i32 0, i32 2 7076// CHECK: [[TMP9:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8 7077// CHECK: [[TMP10:%.*]] = bitcast <2 x float> [[TMP9]] to <8 x i8> 7078// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float> 7079// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float> 7080// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float> 7081// CHECK: [[VLD3_LANE_V:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3lane.v2f32.p0i8(i8* [[TMP4]], <2 x float> [[TMP11]], <2 x float> [[TMP12]], <2 x float> [[TMP13]], i32 1, i32 4) 7082// CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <2 x float>, <2 x float>, <2 x float> }* 7083// CHECK: store { <2 x float>, <2 x float>, <2 x float> } [[VLD3_LANE_V]], { <2 x float>, <2 x float>, <2 x float> }* [[TMP14]] 7084// CHECK: [[TMP15:%.*]] = bitcast %struct.float32x2x3_t* %agg.result to i8* 7085// CHECK: [[TMP16:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8* 7086// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 24, i32 8, i1 false) 7087// CHECK: ret void 7088float32x2x3_t test_vld3_lane_f32(float32_t const * a, float32x2x3_t b) { 7089 return vld3_lane_f32(a, b, 1); 7090} 7091 7092// CHECK-LABEL: define void @test_vld3_lane_p8(%struct.poly8x8x3_t* noalias sret %agg.result, i8* %a, [3 x i64] %b.coerce) #0 { 7093// CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8 7094// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8 7095// CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8 7096// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0 7097// CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]* 7098// CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 7099// CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8* 7100// CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8* 7101// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 7102// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8* 7103// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 7104// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0 7105// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 7106// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 7107// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1 7108// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 7109// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 7110// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2 7111// CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 7112// CHECK: [[VLD3_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8.p0i8(i8* %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 7, i32 1) 7113// CHECK: [[TMP7:%.*]] = bitcast i8* [[TMP3]] to { <8 x i8>, <8 x i8>, <8 x i8> }* 7114// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE_V]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP7]] 7115// CHECK: [[TMP8:%.*]] = bitcast %struct.poly8x8x3_t* %agg.result to i8* 7116// CHECK: [[TMP9:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8* 7117// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP8]], i8* [[TMP9]], i32 24, i32 8, i1 false) 7118// CHECK: ret void 7119poly8x8x3_t test_vld3_lane_p8(poly8_t const * a, poly8x8x3_t b) { 7120 return vld3_lane_p8(a, b, 7); 7121} 7122 7123// CHECK-LABEL: define void @test_vld3_lane_p16(%struct.poly16x4x3_t* noalias sret %agg.result, i16* %a, [3 x i64] %b.coerce) #0 { 7124// CHECK: [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8 7125// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8 7126// CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8 7127// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[B]], i32 0, i32 0 7128// CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]* 7129// CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 7130// CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8* 7131// CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8* 7132// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 7133// CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8* 7134// CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* 7135// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 7136// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0 7137// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 7138// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 7139// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 7140// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1 7141// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 7142// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 7143// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 7144// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2 7145// CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 7146// CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8> 7147// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 7148// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 7149// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> 7150// CHECK: [[VLD3_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16.p0i8(i8* [[TMP4]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], i32 3, i32 2) 7151// CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <4 x i16>, <4 x i16>, <4 x i16> }* 7152// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE_V]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP14]] 7153// CHECK: [[TMP15:%.*]] = bitcast %struct.poly16x4x3_t* %agg.result to i8* 7154// CHECK: [[TMP16:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8* 7155// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 24, i32 8, i1 false) 7156// CHECK: ret void 7157poly16x4x3_t test_vld3_lane_p16(poly16_t const * a, poly16x4x3_t b) { 7158 return vld3_lane_p16(a, b, 3); 7159} 7160 7161 7162// CHECK-LABEL: define void @test_vld4q_u8(%struct.uint8x16x4_t* noalias sret %agg.result, i8* %a) #0 { 7163// CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align 16 7164// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8* 7165// CHECK: [[VLD4Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8.p0i8(i8* %a, i32 1) 7166// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* 7167// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4Q_V]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] 7168// CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x4_t* %agg.result to i8* 7169// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8* 7170// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 64, i32 16, i1 false) 7171// CHECK: ret void 7172uint8x16x4_t test_vld4q_u8(uint8_t const * a) { 7173 return vld4q_u8(a); 7174} 7175 7176// CHECK-LABEL: define void @test_vld4q_u16(%struct.uint16x8x4_t* noalias sret %agg.result, i16* %a) #0 { 7177// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16 7178// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8* 7179// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 7180// CHECK: [[VLD4Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16.p0i8(i8* [[TMP1]], i32 2) 7181// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* 7182// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_V]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP2]] 7183// CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x8x4_t* %agg.result to i8* 7184// CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8* 7185// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 64, i32 16, i1 false) 7186// CHECK: ret void 7187uint16x8x4_t test_vld4q_u16(uint16_t const * a) { 7188 return vld4q_u16(a); 7189} 7190 7191// CHECK-LABEL: define void @test_vld4q_u32(%struct.uint32x4x4_t* noalias sret %agg.result, i32* %a) #0 { 7192// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16 7193// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8* 7194// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 7195// CHECK: [[VLD4Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4.v4i32.p0i8(i8* [[TMP1]], i32 4) 7196// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* 7197// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4Q_V]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP2]] 7198// CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x4x4_t* %agg.result to i8* 7199// CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8* 7200// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 64, i32 16, i1 false) 7201// CHECK: ret void 7202uint32x4x4_t test_vld4q_u32(uint32_t const * a) { 7203 return vld4q_u32(a); 7204} 7205 7206// CHECK-LABEL: define void @test_vld4q_s8(%struct.int8x16x4_t* noalias sret %agg.result, i8* %a) #0 { 7207// CHECK: [[__RET:%.*]] = alloca %struct.int8x16x4_t, align 16 7208// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8* 7209// CHECK: [[VLD4Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8.p0i8(i8* %a, i32 1) 7210// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* 7211// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4Q_V]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] 7212// CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x4_t* %agg.result to i8* 7213// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8* 7214// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 64, i32 16, i1 false) 7215// CHECK: ret void 7216int8x16x4_t test_vld4q_s8(int8_t const * a) { 7217 return vld4q_s8(a); 7218} 7219 7220// CHECK-LABEL: define void @test_vld4q_s16(%struct.int16x8x4_t* noalias sret %agg.result, i16* %a) #0 { 7221// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16 7222// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8* 7223// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 7224// CHECK: [[VLD4Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16.p0i8(i8* [[TMP1]], i32 2) 7225// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* 7226// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_V]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP2]] 7227// CHECK: [[TMP3:%.*]] = bitcast %struct.int16x8x4_t* %agg.result to i8* 7228// CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8* 7229// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 64, i32 16, i1 false) 7230// CHECK: ret void 7231int16x8x4_t test_vld4q_s16(int16_t const * a) { 7232 return vld4q_s16(a); 7233} 7234 7235// CHECK-LABEL: define void @test_vld4q_s32(%struct.int32x4x4_t* noalias sret %agg.result, i32* %a) #0 { 7236// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16 7237// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8* 7238// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 7239// CHECK: [[VLD4Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4.v4i32.p0i8(i8* [[TMP1]], i32 4) 7240// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* 7241// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4Q_V]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP2]] 7242// CHECK: [[TMP3:%.*]] = bitcast %struct.int32x4x4_t* %agg.result to i8* 7243// CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8* 7244// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 64, i32 16, i1 false) 7245// CHECK: ret void 7246int32x4x4_t test_vld4q_s32(int32_t const * a) { 7247 return vld4q_s32(a); 7248} 7249 7250// CHECK-LABEL: define void @test_vld4q_f16(%struct.float16x8x4_t* noalias sret %agg.result, half* %a) #0 { 7251// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16 7252// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8* 7253// CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* 7254// CHECK: [[VLD4Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16.p0i8(i8* [[TMP1]], i32 2) 7255// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* 7256// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_V]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP2]] 7257// CHECK: [[TMP3:%.*]] = bitcast %struct.float16x8x4_t* %agg.result to i8* 7258// CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8* 7259// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 64, i32 16, i1 false) 7260// CHECK: ret void 7261float16x8x4_t test_vld4q_f16(float16_t const * a) { 7262 return vld4q_f16(a); 7263} 7264 7265// CHECK-LABEL: define void @test_vld4q_f32(%struct.float32x4x4_t* noalias sret %agg.result, float* %a) #0 { 7266// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16 7267// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8* 7268// CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* 7269// CHECK: [[VLD4Q_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4.v4f32.p0i8(i8* [[TMP1]], i32 4) 7270// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* 7271// CHECK: store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4Q_V]], { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* [[TMP2]] 7272// CHECK: [[TMP3:%.*]] = bitcast %struct.float32x4x4_t* %agg.result to i8* 7273// CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8* 7274// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 64, i32 16, i1 false) 7275// CHECK: ret void 7276float32x4x4_t test_vld4q_f32(float32_t const * a) { 7277 return vld4q_f32(a); 7278} 7279 7280// CHECK-LABEL: define void @test_vld4q_p8(%struct.poly8x16x4_t* noalias sret %agg.result, i8* %a) #0 { 7281// CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align 16 7282// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8* 7283// CHECK: [[VLD4Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8.p0i8(i8* %a, i32 1) 7284// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* 7285// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4Q_V]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] 7286// CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x4_t* %agg.result to i8* 7287// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8* 7288// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 64, i32 16, i1 false) 7289// CHECK: ret void 7290poly8x16x4_t test_vld4q_p8(poly8_t const * a) { 7291 return vld4q_p8(a); 7292} 7293 7294// CHECK-LABEL: define void @test_vld4q_p16(%struct.poly16x8x4_t* noalias sret %agg.result, i16* %a) #0 { 7295// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16 7296// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8* 7297// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 7298// CHECK: [[VLD4Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16.p0i8(i8* [[TMP1]], i32 2) 7299// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* 7300// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_V]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP2]] 7301// CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x8x4_t* %agg.result to i8* 7302// CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8* 7303// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 64, i32 16, i1 false) 7304// CHECK: ret void 7305poly16x8x4_t test_vld4q_p16(poly16_t const * a) { 7306 return vld4q_p16(a); 7307} 7308 7309// CHECK-LABEL: define void @test_vld4_u8(%struct.uint8x8x4_t* noalias sret %agg.result, i8* %a) #0 { 7310// CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8 7311// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8* 7312// CHECK: [[VLD4_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8.p0i8(i8* %a, i32 1) 7313// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* 7314// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_V]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] 7315// CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* %agg.result to i8* 7316// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8* 7317// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 32, i32 8, i1 false) 7318// CHECK: ret void 7319uint8x8x4_t test_vld4_u8(uint8_t const * a) { 7320 return vld4_u8(a); 7321} 7322 7323// CHECK-LABEL: define void @test_vld4_u16(%struct.uint16x4x4_t* noalias sret %agg.result, i16* %a) #0 { 7324// CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8 7325// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8* 7326// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 7327// CHECK: [[VLD4_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16.p0i8(i8* [[TMP1]], i32 2) 7328// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* 7329// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_V]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP2]] 7330// CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x4x4_t* %agg.result to i8* 7331// CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8* 7332// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false) 7333// CHECK: ret void 7334uint16x4x4_t test_vld4_u16(uint16_t const * a) { 7335 return vld4_u16(a); 7336} 7337 7338// CHECK-LABEL: define void @test_vld4_u32(%struct.uint32x2x4_t* noalias sret %agg.result, i32* %a) #0 { 7339// CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8 7340// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8* 7341// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 7342// CHECK: [[VLD4_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4.v2i32.p0i8(i8* [[TMP1]], i32 4) 7343// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* 7344// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_V]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP2]] 7345// CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x2x4_t* %agg.result to i8* 7346// CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8* 7347// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false) 7348// CHECK: ret void 7349uint32x2x4_t test_vld4_u32(uint32_t const * a) { 7350 return vld4_u32(a); 7351} 7352 7353// CHECK-LABEL: define void @test_vld4_u64(%struct.uint64x1x4_t* noalias sret %agg.result, i64* %a) #0 { 7354// CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8 7355// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8* 7356// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 7357// CHECK: [[VLD4_V:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64.p0i8(i8* [[TMP1]], i32 4) 7358// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* 7359// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_V]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP2]] 7360// CHECK: [[TMP3:%.*]] = bitcast %struct.uint64x1x4_t* %agg.result to i8* 7361// CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8* 7362// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false) 7363// CHECK: ret void 7364uint64x1x4_t test_vld4_u64(uint64_t const * a) { 7365 return vld4_u64(a); 7366} 7367 7368// CHECK-LABEL: define void @test_vld4_s8(%struct.int8x8x4_t* noalias sret %agg.result, i8* %a) #0 { 7369// CHECK: [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8 7370// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8* 7371// CHECK: [[VLD4_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8.p0i8(i8* %a, i32 1) 7372// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* 7373// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_V]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] 7374// CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* %agg.result to i8* 7375// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8* 7376// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 32, i32 8, i1 false) 7377// CHECK: ret void 7378int8x8x4_t test_vld4_s8(int8_t const * a) { 7379 return vld4_s8(a); 7380} 7381 7382// CHECK-LABEL: define void @test_vld4_s16(%struct.int16x4x4_t* noalias sret %agg.result, i16* %a) #0 { 7383// CHECK: [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8 7384// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8* 7385// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 7386// CHECK: [[VLD4_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16.p0i8(i8* [[TMP1]], i32 2) 7387// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* 7388// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_V]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP2]] 7389// CHECK: [[TMP3:%.*]] = bitcast %struct.int16x4x4_t* %agg.result to i8* 7390// CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8* 7391// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false) 7392// CHECK: ret void 7393int16x4x4_t test_vld4_s16(int16_t const * a) { 7394 return vld4_s16(a); 7395} 7396 7397// CHECK-LABEL: define void @test_vld4_s32(%struct.int32x2x4_t* noalias sret %agg.result, i32* %a) #0 { 7398// CHECK: [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8 7399// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8* 7400// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 7401// CHECK: [[VLD4_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4.v2i32.p0i8(i8* [[TMP1]], i32 4) 7402// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* 7403// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_V]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP2]] 7404// CHECK: [[TMP3:%.*]] = bitcast %struct.int32x2x4_t* %agg.result to i8* 7405// CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8* 7406// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false) 7407// CHECK: ret void 7408int32x2x4_t test_vld4_s32(int32_t const * a) { 7409 return vld4_s32(a); 7410} 7411 7412// CHECK-LABEL: define void @test_vld4_s64(%struct.int64x1x4_t* noalias sret %agg.result, i64* %a) #0 { 7413// CHECK: [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8 7414// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8* 7415// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 7416// CHECK: [[VLD4_V:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64.p0i8(i8* [[TMP1]], i32 4) 7417// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* 7418// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_V]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP2]] 7419// CHECK: [[TMP3:%.*]] = bitcast %struct.int64x1x4_t* %agg.result to i8* 7420// CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8* 7421// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false) 7422// CHECK: ret void 7423int64x1x4_t test_vld4_s64(int64_t const * a) { 7424 return vld4_s64(a); 7425} 7426 7427// CHECK-LABEL: define void @test_vld4_f16(%struct.float16x4x4_t* noalias sret %agg.result, half* %a) #0 { 7428// CHECK: [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8 7429// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8* 7430// CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* 7431// CHECK: [[VLD4_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16.p0i8(i8* [[TMP1]], i32 2) 7432// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* 7433// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_V]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP2]] 7434// CHECK: [[TMP3:%.*]] = bitcast %struct.float16x4x4_t* %agg.result to i8* 7435// CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8* 7436// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false) 7437// CHECK: ret void 7438float16x4x4_t test_vld4_f16(float16_t const * a) { 7439 return vld4_f16(a); 7440} 7441 7442// CHECK-LABEL: define void @test_vld4_f32(%struct.float32x2x4_t* noalias sret %agg.result, float* %a) #0 { 7443// CHECK: [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8 7444// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8* 7445// CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* 7446// CHECK: [[VLD4_V:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4.v2f32.p0i8(i8* [[TMP1]], i32 4) 7447// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* 7448// CHECK: store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4_V]], { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* [[TMP2]] 7449// CHECK: [[TMP3:%.*]] = bitcast %struct.float32x2x4_t* %agg.result to i8* 7450// CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8* 7451// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false) 7452// CHECK: ret void 7453float32x2x4_t test_vld4_f32(float32_t const * a) { 7454 return vld4_f32(a); 7455} 7456 7457// CHECK-LABEL: define void @test_vld4_p8(%struct.poly8x8x4_t* noalias sret %agg.result, i8* %a) #0 { 7458// CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8 7459// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8* 7460// CHECK: [[VLD4_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8.p0i8(i8* %a, i32 1) 7461// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* 7462// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_V]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] 7463// CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* %agg.result to i8* 7464// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8* 7465// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 32, i32 8, i1 false) 7466// CHECK: ret void 7467poly8x8x4_t test_vld4_p8(poly8_t const * a) { 7468 return vld4_p8(a); 7469} 7470 7471// CHECK-LABEL: define void @test_vld4_p16(%struct.poly16x4x4_t* noalias sret %agg.result, i16* %a) #0 { 7472// CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8 7473// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8* 7474// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 7475// CHECK: [[VLD4_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16.p0i8(i8* [[TMP1]], i32 2) 7476// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* 7477// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_V]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP2]] 7478// CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x4x4_t* %agg.result to i8* 7479// CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8* 7480// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false) 7481// CHECK: ret void 7482poly16x4x4_t test_vld4_p16(poly16_t const * a) { 7483 return vld4_p16(a); 7484} 7485 7486 7487// CHECK-LABEL: define void @test_vld4_dup_u8(%struct.uint8x8x4_t* noalias sret %agg.result, i8* %a) #0 { 7488// CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8 7489// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8* 7490// CHECK: [[VLD_DUP:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4lane.v8i8.p0i8(i8* %a, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1) 7491// CHECK: [[TMP1:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD_DUP]], 0 7492// CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer 7493// CHECK: [[TMP2:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD_DUP]], <8 x i8> [[LANE]], 0 7494// CHECK: [[TMP3:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP2]], 1 7495// CHECK: [[LANE1:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP3]], <8 x i32> zeroinitializer 7496// CHECK: [[TMP4:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP2]], <8 x i8> [[LANE1]], 1 7497// CHECK: [[TMP5:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 2 7498// CHECK: [[LANE2:%.*]] = shufflevector <8 x i8> [[TMP5]], <8 x i8> [[TMP5]], <8 x i32> zeroinitializer 7499// CHECK: [[TMP6:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], <8 x i8> [[LANE2]], 2 7500// CHECK: [[TMP7:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP6]], 3 7501// CHECK: [[LANE3:%.*]] = shufflevector <8 x i8> [[TMP7]], <8 x i8> [[TMP7]], <8 x i32> zeroinitializer 7502// CHECK: [[TMP8:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP6]], <8 x i8> [[LANE3]], 3 7503// CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* 7504// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP8]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP9]] 7505// CHECK: [[TMP10:%.*]] = bitcast %struct.uint8x8x4_t* %agg.result to i8* 7506// CHECK: [[TMP11:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8* 7507// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP10]], i8* [[TMP11]], i32 32, i32 8, i1 false) 7508// CHECK: ret void 7509uint8x8x4_t test_vld4_dup_u8(uint8_t const * a) { 7510 return vld4_dup_u8(a); 7511} 7512 7513// CHECK-LABEL: define void @test_vld4_dup_u16(%struct.uint16x4x4_t* noalias sret %agg.result, i16* %a) #0 { 7514// CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8 7515// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8* 7516// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 7517// CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16.p0i8(i8* [[TMP1]], <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) 7518// CHECK: [[TMP2:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], 0 7519// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> zeroinitializer 7520// CHECK: [[TMP3:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], <4 x i16> [[LANE]], 0 7521// CHECK: [[TMP4:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], 1 7522// CHECK: [[LANE1:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP4]], <4 x i32> zeroinitializer 7523// CHECK: [[TMP5:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], <4 x i16> [[LANE1]], 1 7524// CHECK: [[TMP6:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], 2 7525// CHECK: [[LANE2:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> [[TMP6]], <4 x i32> zeroinitializer 7526// CHECK: [[TMP7:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], <4 x i16> [[LANE2]], 2 7527// CHECK: [[TMP8:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP7]], 3 7528// CHECK: [[LANE3:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP8]], <4 x i32> zeroinitializer 7529// CHECK: [[TMP9:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP7]], <4 x i16> [[LANE3]], 3 7530// CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* 7531// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP9]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP10]] 7532// CHECK: [[TMP11:%.*]] = bitcast %struct.uint16x4x4_t* %agg.result to i8* 7533// CHECK: [[TMP12:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8* 7534// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP11]], i8* [[TMP12]], i32 32, i32 8, i1 false) 7535// CHECK: ret void 7536uint16x4x4_t test_vld4_dup_u16(uint16_t const * a) { 7537 return vld4_dup_u16(a); 7538} 7539 7540// CHECK-LABEL: define void @test_vld4_dup_u32(%struct.uint32x2x4_t* noalias sret %agg.result, i32* %a) #0 { 7541// CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8 7542// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8* 7543// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 7544// CHECK: [[VLD_DUP:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32.p0i8(i8* [[TMP1]], <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 4) 7545// CHECK: [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD_DUP]], 0 7546// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP2]], <2 x i32> zeroinitializer 7547// CHECK: [[TMP3:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD_DUP]], <2 x i32> [[LANE]], 0 7548// CHECK: [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP3]], 1 7549// CHECK: [[LANE1:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP4]], <2 x i32> zeroinitializer 7550// CHECK: [[TMP5:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP3]], <2 x i32> [[LANE1]], 1 7551// CHECK: [[TMP6:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP5]], 2 7552// CHECK: [[LANE2:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> [[TMP6]], <2 x i32> zeroinitializer 7553// CHECK: [[TMP7:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP5]], <2 x i32> [[LANE2]], 2 7554// CHECK: [[TMP8:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP7]], 3 7555// CHECK: [[LANE3:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> [[TMP8]], <2 x i32> zeroinitializer 7556// CHECK: [[TMP9:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP7]], <2 x i32> [[LANE3]], 3 7557// CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* 7558// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP9]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP10]] 7559// CHECK: [[TMP11:%.*]] = bitcast %struct.uint32x2x4_t* %agg.result to i8* 7560// CHECK: [[TMP12:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8* 7561// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP11]], i8* [[TMP12]], i32 32, i32 8, i1 false) 7562// CHECK: ret void 7563uint32x2x4_t test_vld4_dup_u32(uint32_t const * a) { 7564 return vld4_dup_u32(a); 7565} 7566 7567// CHECK-LABEL: define void @test_vld4_dup_u64(%struct.uint64x1x4_t* noalias sret %agg.result, i64* %a) #0 { 7568// CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8 7569// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8* 7570// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 7571// CHECK: [[VLD_DUP:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64.p0i8(i8* [[TMP1]], i32 4) 7572// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* 7573// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD_DUP]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP2]] 7574// CHECK: [[TMP3:%.*]] = bitcast %struct.uint64x1x4_t* %agg.result to i8* 7575// CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8* 7576// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false) 7577// CHECK: ret void 7578uint64x1x4_t test_vld4_dup_u64(uint64_t const * a) { 7579 return vld4_dup_u64(a); 7580} 7581 7582// CHECK-LABEL: define void @test_vld4_dup_s8(%struct.int8x8x4_t* noalias sret %agg.result, i8* %a) #0 { 7583// CHECK: [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8 7584// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8* 7585// CHECK: [[VLD_DUP:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4lane.v8i8.p0i8(i8* %a, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1) 7586// CHECK: [[TMP1:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD_DUP]], 0 7587// CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer 7588// CHECK: [[TMP2:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD_DUP]], <8 x i8> [[LANE]], 0 7589// CHECK: [[TMP3:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP2]], 1 7590// CHECK: [[LANE1:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP3]], <8 x i32> zeroinitializer 7591// CHECK: [[TMP4:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP2]], <8 x i8> [[LANE1]], 1 7592// CHECK: [[TMP5:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 2 7593// CHECK: [[LANE2:%.*]] = shufflevector <8 x i8> [[TMP5]], <8 x i8> [[TMP5]], <8 x i32> zeroinitializer 7594// CHECK: [[TMP6:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], <8 x i8> [[LANE2]], 2 7595// CHECK: [[TMP7:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP6]], 3 7596// CHECK: [[LANE3:%.*]] = shufflevector <8 x i8> [[TMP7]], <8 x i8> [[TMP7]], <8 x i32> zeroinitializer 7597// CHECK: [[TMP8:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP6]], <8 x i8> [[LANE3]], 3 7598// CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* 7599// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP8]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP9]] 7600// CHECK: [[TMP10:%.*]] = bitcast %struct.int8x8x4_t* %agg.result to i8* 7601// CHECK: [[TMP11:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8* 7602// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP10]], i8* [[TMP11]], i32 32, i32 8, i1 false) 7603// CHECK: ret void 7604int8x8x4_t test_vld4_dup_s8(int8_t const * a) { 7605 return vld4_dup_s8(a); 7606} 7607 7608// CHECK-LABEL: define void @test_vld4_dup_s16(%struct.int16x4x4_t* noalias sret %agg.result, i16* %a) #0 { 7609// CHECK: [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8 7610// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8* 7611// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 7612// CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16.p0i8(i8* [[TMP1]], <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) 7613// CHECK: [[TMP2:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], 0 7614// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> zeroinitializer 7615// CHECK: [[TMP3:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], <4 x i16> [[LANE]], 0 7616// CHECK: [[TMP4:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], 1 7617// CHECK: [[LANE1:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP4]], <4 x i32> zeroinitializer 7618// CHECK: [[TMP5:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], <4 x i16> [[LANE1]], 1 7619// CHECK: [[TMP6:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], 2 7620// CHECK: [[LANE2:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> [[TMP6]], <4 x i32> zeroinitializer 7621// CHECK: [[TMP7:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], <4 x i16> [[LANE2]], 2 7622// CHECK: [[TMP8:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP7]], 3 7623// CHECK: [[LANE3:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP8]], <4 x i32> zeroinitializer 7624// CHECK: [[TMP9:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP7]], <4 x i16> [[LANE3]], 3 7625// CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* 7626// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP9]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP10]] 7627// CHECK: [[TMP11:%.*]] = bitcast %struct.int16x4x4_t* %agg.result to i8* 7628// CHECK: [[TMP12:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8* 7629// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP11]], i8* [[TMP12]], i32 32, i32 8, i1 false) 7630// CHECK: ret void 7631int16x4x4_t test_vld4_dup_s16(int16_t const * a) { 7632 return vld4_dup_s16(a); 7633} 7634 7635// CHECK-LABEL: define void @test_vld4_dup_s32(%struct.int32x2x4_t* noalias sret %agg.result, i32* %a) #0 { 7636// CHECK: [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8 7637// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8* 7638// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 7639// CHECK: [[VLD_DUP:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32.p0i8(i8* [[TMP1]], <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 4) 7640// CHECK: [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD_DUP]], 0 7641// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP2]], <2 x i32> zeroinitializer 7642// CHECK: [[TMP3:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD_DUP]], <2 x i32> [[LANE]], 0 7643// CHECK: [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP3]], 1 7644// CHECK: [[LANE1:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP4]], <2 x i32> zeroinitializer 7645// CHECK: [[TMP5:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP3]], <2 x i32> [[LANE1]], 1 7646// CHECK: [[TMP6:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP5]], 2 7647// CHECK: [[LANE2:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> [[TMP6]], <2 x i32> zeroinitializer 7648// CHECK: [[TMP7:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP5]], <2 x i32> [[LANE2]], 2 7649// CHECK: [[TMP8:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP7]], 3 7650// CHECK: [[LANE3:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> [[TMP8]], <2 x i32> zeroinitializer 7651// CHECK: [[TMP9:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP7]], <2 x i32> [[LANE3]], 3 7652// CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* 7653// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP9]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP10]] 7654// CHECK: [[TMP11:%.*]] = bitcast %struct.int32x2x4_t* %agg.result to i8* 7655// CHECK: [[TMP12:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8* 7656// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP11]], i8* [[TMP12]], i32 32, i32 8, i1 false) 7657// CHECK: ret void 7658int32x2x4_t test_vld4_dup_s32(int32_t const * a) { 7659 return vld4_dup_s32(a); 7660} 7661 7662// CHECK-LABEL: define void @test_vld4_dup_s64(%struct.int64x1x4_t* noalias sret %agg.result, i64* %a) #0 { 7663// CHECK: [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8 7664// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8* 7665// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 7666// CHECK: [[VLD_DUP:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64.p0i8(i8* [[TMP1]], i32 4) 7667// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* 7668// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD_DUP]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP2]] 7669// CHECK: [[TMP3:%.*]] = bitcast %struct.int64x1x4_t* %agg.result to i8* 7670// CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8* 7671// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false) 7672// CHECK: ret void 7673int64x1x4_t test_vld4_dup_s64(int64_t const * a) { 7674 return vld4_dup_s64(a); 7675} 7676 7677// CHECK-LABEL: define void @test_vld4_dup_f16(%struct.float16x4x4_t* noalias sret %agg.result, half* %a) #0 { 7678// CHECK: [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8 7679// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8* 7680// CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* 7681// CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16.p0i8(i8* [[TMP1]], <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) 7682// CHECK: [[TMP2:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], 0 7683// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> zeroinitializer 7684// CHECK: [[TMP3:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], <4 x i16> [[LANE]], 0 7685// CHECK: [[TMP4:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], 1 7686// CHECK: [[LANE1:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP4]], <4 x i32> zeroinitializer 7687// CHECK: [[TMP5:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], <4 x i16> [[LANE1]], 1 7688// CHECK: [[TMP6:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], 2 7689// CHECK: [[LANE2:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> [[TMP6]], <4 x i32> zeroinitializer 7690// CHECK: [[TMP7:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], <4 x i16> [[LANE2]], 2 7691// CHECK: [[TMP8:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP7]], 3 7692// CHECK: [[LANE3:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP8]], <4 x i32> zeroinitializer 7693// CHECK: [[TMP9:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP7]], <4 x i16> [[LANE3]], 3 7694// CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* 7695// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP9]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP10]] 7696// CHECK: [[TMP11:%.*]] = bitcast %struct.float16x4x4_t* %agg.result to i8* 7697// CHECK: [[TMP12:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8* 7698// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP11]], i8* [[TMP12]], i32 32, i32 8, i1 false) 7699// CHECK: ret void 7700float16x4x4_t test_vld4_dup_f16(float16_t const * a) { 7701 return vld4_dup_f16(a); 7702} 7703 7704// CHECK-LABEL: define void @test_vld4_dup_f32(%struct.float32x2x4_t* noalias sret %agg.result, float* %a) #0 { 7705// CHECK: [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8 7706// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8* 7707// CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* 7708// CHECK: [[VLD_DUP:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4lane.v2f32.p0i8(i8* [[TMP1]], <2 x float> undef, <2 x float> undef, <2 x float> undef, <2 x float> undef, i32 0, i32 4) 7709// CHECK: [[TMP2:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD_DUP]], 0 7710// CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP2]], <2 x i32> zeroinitializer 7711// CHECK: [[TMP3:%.*]] = insertvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD_DUP]], <2 x float> [[LANE]], 0 7712// CHECK: [[TMP4:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[TMP3]], 1 7713// CHECK: [[LANE1:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP4]], <2 x i32> zeroinitializer 7714// CHECK: [[TMP5:%.*]] = insertvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[TMP3]], <2 x float> [[LANE1]], 1 7715// CHECK: [[TMP6:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[TMP5]], 2 7716// CHECK: [[LANE2:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> [[TMP6]], <2 x i32> zeroinitializer 7717// CHECK: [[TMP7:%.*]] = insertvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[TMP5]], <2 x float> [[LANE2]], 2 7718// CHECK: [[TMP8:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[TMP7]], 3 7719// CHECK: [[LANE3:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> [[TMP8]], <2 x i32> zeroinitializer 7720// CHECK: [[TMP9:%.*]] = insertvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[TMP7]], <2 x float> [[LANE3]], 3 7721// CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* 7722// CHECK: store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[TMP9]], { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* [[TMP10]] 7723// CHECK: [[TMP11:%.*]] = bitcast %struct.float32x2x4_t* %agg.result to i8* 7724// CHECK: [[TMP12:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8* 7725// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP11]], i8* [[TMP12]], i32 32, i32 8, i1 false) 7726// CHECK: ret void 7727float32x2x4_t test_vld4_dup_f32(float32_t const * a) { 7728 return vld4_dup_f32(a); 7729} 7730 7731// CHECK-LABEL: define void @test_vld4_dup_p8(%struct.poly8x8x4_t* noalias sret %agg.result, i8* %a) #0 { 7732// CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8 7733// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8* 7734// CHECK: [[VLD_DUP:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4lane.v8i8.p0i8(i8* %a, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1) 7735// CHECK: [[TMP1:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD_DUP]], 0 7736// CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer 7737// CHECK: [[TMP2:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD_DUP]], <8 x i8> [[LANE]], 0 7738// CHECK: [[TMP3:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP2]], 1 7739// CHECK: [[LANE1:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP3]], <8 x i32> zeroinitializer 7740// CHECK: [[TMP4:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP2]], <8 x i8> [[LANE1]], 1 7741// CHECK: [[TMP5:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 2 7742// CHECK: [[LANE2:%.*]] = shufflevector <8 x i8> [[TMP5]], <8 x i8> [[TMP5]], <8 x i32> zeroinitializer 7743// CHECK: [[TMP6:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], <8 x i8> [[LANE2]], 2 7744// CHECK: [[TMP7:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP6]], 3 7745// CHECK: [[LANE3:%.*]] = shufflevector <8 x i8> [[TMP7]], <8 x i8> [[TMP7]], <8 x i32> zeroinitializer 7746// CHECK: [[TMP8:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP6]], <8 x i8> [[LANE3]], 3 7747// CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* 7748// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP8]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP9]] 7749// CHECK: [[TMP10:%.*]] = bitcast %struct.poly8x8x4_t* %agg.result to i8* 7750// CHECK: [[TMP11:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8* 7751// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP10]], i8* [[TMP11]], i32 32, i32 8, i1 false) 7752// CHECK: ret void 7753poly8x8x4_t test_vld4_dup_p8(poly8_t const * a) { 7754 return vld4_dup_p8(a); 7755} 7756 7757// CHECK-LABEL: define void @test_vld4_dup_p16(%struct.poly16x4x4_t* noalias sret %agg.result, i16* %a) #0 { 7758// CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8 7759// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8* 7760// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 7761// CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16.p0i8(i8* [[TMP1]], <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) 7762// CHECK: [[TMP2:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], 0 7763// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> zeroinitializer 7764// CHECK: [[TMP3:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], <4 x i16> [[LANE]], 0 7765// CHECK: [[TMP4:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], 1 7766// CHECK: [[LANE1:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP4]], <4 x i32> zeroinitializer 7767// CHECK: [[TMP5:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], <4 x i16> [[LANE1]], 1 7768// CHECK: [[TMP6:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], 2 7769// CHECK: [[LANE2:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> [[TMP6]], <4 x i32> zeroinitializer 7770// CHECK: [[TMP7:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], <4 x i16> [[LANE2]], 2 7771// CHECK: [[TMP8:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP7]], 3 7772// CHECK: [[LANE3:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP8]], <4 x i32> zeroinitializer 7773// CHECK: [[TMP9:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP7]], <4 x i16> [[LANE3]], 3 7774// CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* 7775// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP9]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP10]] 7776// CHECK: [[TMP11:%.*]] = bitcast %struct.poly16x4x4_t* %agg.result to i8* 7777// CHECK: [[TMP12:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8* 7778// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP11]], i8* [[TMP12]], i32 32, i32 8, i1 false) 7779// CHECK: ret void 7780poly16x4x4_t test_vld4_dup_p16(poly16_t const * a) { 7781 return vld4_dup_p16(a); 7782} 7783 7784 7785// CHECK-LABEL: define void @test_vld4q_lane_u16(%struct.uint16x8x4_t* noalias sret %agg.result, i16* %a, [8 x i64] %b.coerce) #0 { 7786// CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16 7787// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16 7788// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16 7789// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0 7790// CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]* 7791// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 7792// CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8* 7793// CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8* 7794// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) 7795// CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8* 7796// CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* 7797// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 7798// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0 7799// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 7800// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 7801// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 7802// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1 7803// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 7804// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 7805// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 7806// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2 7807// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 7808// CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8> 7809// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 7810// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3 7811// CHECK: [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 7812// CHECK: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP11]] to <16 x i8> 7813// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 7814// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 7815// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> 7816// CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <8 x i16> 7817// CHECK: [[VLD4Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4lane.v8i16.p0i8(i8* [[TMP4]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], <8 x i16> [[TMP16]], i32 7, i32 2) 7818// CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* 7819// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_LANE_V]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP17]] 7820// CHECK: [[TMP18:%.*]] = bitcast %struct.uint16x8x4_t* %agg.result to i8* 7821// CHECK: [[TMP19:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8* 7822// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 64, i32 16, i1 false) 7823// CHECK: ret void 7824uint16x8x4_t test_vld4q_lane_u16(uint16_t const * a, uint16x8x4_t b) { 7825 return vld4q_lane_u16(a, b, 7); 7826} 7827 7828// CHECK-LABEL: define void @test_vld4q_lane_u32(%struct.uint32x4x4_t* noalias sret %agg.result, i32* %a, [8 x i64] %b.coerce) #0 { 7829// CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16 7830// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16 7831// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16 7832// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0 7833// CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]* 7834// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 7835// CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8* 7836// CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8* 7837// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) 7838// CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8* 7839// CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8* 7840// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 7841// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0 7842// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 7843// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 7844// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 7845// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1 7846// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 7847// CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> 7848// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 7849// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2 7850// CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 7851// CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8> 7852// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 7853// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3 7854// CHECK: [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16 7855// CHECK: [[TMP12:%.*]] = bitcast <4 x i32> [[TMP11]] to <16 x i8> 7856// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 7857// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> 7858// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32> 7859// CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <4 x i32> 7860// CHECK: [[VLD4Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4lane.v4i32.p0i8(i8* [[TMP4]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], <4 x i32> [[TMP16]], i32 3, i32 4) 7861// CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* 7862// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4Q_LANE_V]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP17]] 7863// CHECK: [[TMP18:%.*]] = bitcast %struct.uint32x4x4_t* %agg.result to i8* 7864// CHECK: [[TMP19:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8* 7865// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 64, i32 16, i1 false) 7866// CHECK: ret void 7867uint32x4x4_t test_vld4q_lane_u32(uint32_t const * a, uint32x4x4_t b) { 7868 return vld4q_lane_u32(a, b, 3); 7869} 7870 7871// CHECK-LABEL: define void @test_vld4q_lane_s16(%struct.int16x8x4_t* noalias sret %agg.result, i16* %a, [8 x i64] %b.coerce) #0 { 7872// CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16 7873// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16 7874// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16 7875// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0 7876// CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]* 7877// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 7878// CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8* 7879// CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8* 7880// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) 7881// CHECK: [[TMP3:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8* 7882// CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* 7883// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 7884// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0 7885// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 7886// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 7887// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 7888// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1 7889// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 7890// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 7891// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 7892// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2 7893// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 7894// CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8> 7895// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 7896// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3 7897// CHECK: [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 7898// CHECK: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP11]] to <16 x i8> 7899// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 7900// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 7901// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> 7902// CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <8 x i16> 7903// CHECK: [[VLD4Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4lane.v8i16.p0i8(i8* [[TMP4]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], <8 x i16> [[TMP16]], i32 7, i32 2) 7904// CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* 7905// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_LANE_V]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP17]] 7906// CHECK: [[TMP18:%.*]] = bitcast %struct.int16x8x4_t* %agg.result to i8* 7907// CHECK: [[TMP19:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8* 7908// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 64, i32 16, i1 false) 7909// CHECK: ret void 7910int16x8x4_t test_vld4q_lane_s16(int16_t const * a, int16x8x4_t b) { 7911 return vld4q_lane_s16(a, b, 7); 7912} 7913 7914// CHECK-LABEL: define void @test_vld4q_lane_s32(%struct.int32x4x4_t* noalias sret %agg.result, i32* %a, [8 x i64] %b.coerce) #0 { 7915// CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16 7916// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16 7917// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16 7918// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0 7919// CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]* 7920// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 7921// CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8* 7922// CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8* 7923// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) 7924// CHECK: [[TMP3:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8* 7925// CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8* 7926// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 7927// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0 7928// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 7929// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 7930// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 7931// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1 7932// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 7933// CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> 7934// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 7935// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2 7936// CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 7937// CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8> 7938// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 7939// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3 7940// CHECK: [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16 7941// CHECK: [[TMP12:%.*]] = bitcast <4 x i32> [[TMP11]] to <16 x i8> 7942// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 7943// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> 7944// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32> 7945// CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <4 x i32> 7946// CHECK: [[VLD4Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4lane.v4i32.p0i8(i8* [[TMP4]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], <4 x i32> [[TMP16]], i32 3, i32 4) 7947// CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* 7948// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4Q_LANE_V]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP17]] 7949// CHECK: [[TMP18:%.*]] = bitcast %struct.int32x4x4_t* %agg.result to i8* 7950// CHECK: [[TMP19:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8* 7951// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 64, i32 16, i1 false) 7952// CHECK: ret void 7953int32x4x4_t test_vld4q_lane_s32(int32_t const * a, int32x4x4_t b) { 7954 return vld4q_lane_s32(a, b, 3); 7955} 7956 7957// CHECK-LABEL: define void @test_vld4q_lane_f16(%struct.float16x8x4_t* noalias sret %agg.result, half* %a, [8 x i64] %b.coerce) #0 { 7958// CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16 7959// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16 7960// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16 7961// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0 7962// CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x half>]* [[COERCE_DIVE]] to [8 x i64]* 7963// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 7964// CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8* 7965// CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8* 7966// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) 7967// CHECK: [[TMP3:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8* 7968// CHECK: [[TMP4:%.*]] = bitcast half* %a to i8* 7969// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 7970// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i32 0, i32 0 7971// CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 7972// CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> 7973// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 7974// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL1]], i32 0, i32 1 7975// CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 7976// CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8> 7977// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 7978// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL3]], i32 0, i32 2 7979// CHECK: [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16 7980// CHECK: [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8> 7981// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 7982// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i32 0, i32 3 7983// CHECK: [[TMP11:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16 7984// CHECK: [[TMP12:%.*]] = bitcast <8 x half> [[TMP11]] to <16 x i8> 7985// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 7986// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 7987// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> 7988// CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <8 x i16> 7989// CHECK: [[VLD4Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4lane.v8i16.p0i8(i8* [[TMP4]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], <8 x i16> [[TMP16]], i32 7, i32 2) 7990// CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* 7991// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_LANE_V]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP17]] 7992// CHECK: [[TMP18:%.*]] = bitcast %struct.float16x8x4_t* %agg.result to i8* 7993// CHECK: [[TMP19:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8* 7994// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 64, i32 16, i1 false) 7995// CHECK: ret void 7996float16x8x4_t test_vld4q_lane_f16(float16_t const * a, float16x8x4_t b) { 7997 return vld4q_lane_f16(a, b, 7); 7998} 7999 8000// CHECK-LABEL: define void @test_vld4q_lane_f32(%struct.float32x4x4_t* noalias sret %agg.result, float* %a, [8 x i64] %b.coerce) #0 { 8001// CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16 8002// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16 8003// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16 8004// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0 8005// CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x float>]* [[COERCE_DIVE]] to [8 x i64]* 8006// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 8007// CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8* 8008// CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8* 8009// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) 8010// CHECK: [[TMP3:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8* 8011// CHECK: [[TMP4:%.*]] = bitcast float* %a to i8* 8012// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 8013// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i32 0, i32 0 8014// CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 8015// CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8> 8016// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 8017// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL1]], i32 0, i32 1 8018// CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 8019// CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8> 8020// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 8021// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL3]], i32 0, i32 2 8022// CHECK: [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16 8023// CHECK: [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8> 8024// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 8025// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL5]], i32 0, i32 3 8026// CHECK: [[TMP11:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 16 8027// CHECK: [[TMP12:%.*]] = bitcast <4 x float> [[TMP11]] to <16 x i8> 8028// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float> 8029// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float> 8030// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float> 8031// CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <4 x float> 8032// CHECK: [[VLD4Q_LANE_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4lane.v4f32.p0i8(i8* [[TMP4]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], <4 x float> [[TMP15]], <4 x float> [[TMP16]], i32 3, i32 4) 8033// CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* 8034// CHECK: store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4Q_LANE_V]], { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* [[TMP17]] 8035// CHECK: [[TMP18:%.*]] = bitcast %struct.float32x4x4_t* %agg.result to i8* 8036// CHECK: [[TMP19:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8* 8037// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 64, i32 16, i1 false) 8038// CHECK: ret void 8039float32x4x4_t test_vld4q_lane_f32(float32_t const * a, float32x4x4_t b) { 8040 return vld4q_lane_f32(a, b, 3); 8041} 8042 8043// CHECK-LABEL: define void @test_vld4q_lane_p16(%struct.poly16x8x4_t* noalias sret %agg.result, i16* %a, [8 x i64] %b.coerce) #0 { 8044// CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16 8045// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16 8046// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16 8047// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0 8048// CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]* 8049// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 8050// CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8* 8051// CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8* 8052// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) 8053// CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8* 8054// CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* 8055// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 8056// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0 8057// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 8058// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 8059// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 8060// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1 8061// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 8062// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 8063// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 8064// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2 8065// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 8066// CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8> 8067// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 8068// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3 8069// CHECK: [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 8070// CHECK: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP11]] to <16 x i8> 8071// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 8072// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 8073// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> 8074// CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <8 x i16> 8075// CHECK: [[VLD4Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4lane.v8i16.p0i8(i8* [[TMP4]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], <8 x i16> [[TMP16]], i32 7, i32 2) 8076// CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* 8077// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_LANE_V]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP17]] 8078// CHECK: [[TMP18:%.*]] = bitcast %struct.poly16x8x4_t* %agg.result to i8* 8079// CHECK: [[TMP19:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8* 8080// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 64, i32 16, i1 false) 8081// CHECK: ret void 8082poly16x8x4_t test_vld4q_lane_p16(poly16_t const * a, poly16x8x4_t b) { 8083 return vld4q_lane_p16(a, b, 7); 8084} 8085 8086// CHECK-LABEL: define void @test_vld4_lane_u8(%struct.uint8x8x4_t* noalias sret %agg.result, i8* %a, [4 x i64] %b.coerce) #0 { 8087// CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8 8088// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8 8089// CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8 8090// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0 8091// CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]* 8092// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 8093// CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8* 8094// CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8* 8095// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 8096// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8* 8097// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 8098// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0 8099// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 8100// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 8101// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1 8102// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 8103// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 8104// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2 8105// CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 8106// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 8107// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3 8108// CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 8109// CHECK: [[VLD4_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4lane.v8i8.p0i8(i8* %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], i32 7, i32 1) 8110// CHECK: [[TMP8:%.*]] = bitcast i8* [[TMP3]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* 8111// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE_V]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP8]] 8112// CHECK: [[TMP9:%.*]] = bitcast %struct.uint8x8x4_t* %agg.result to i8* 8113// CHECK: [[TMP10:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8* 8114// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP9]], i8* [[TMP10]], i32 32, i32 8, i1 false) 8115// CHECK: ret void 8116uint8x8x4_t test_vld4_lane_u8(uint8_t const * a, uint8x8x4_t b) { 8117 return vld4_lane_u8(a, b, 7); 8118} 8119 8120// CHECK-LABEL: define void @test_vld4_lane_u16(%struct.uint16x4x4_t* noalias sret %agg.result, i16* %a, [4 x i64] %b.coerce) #0 { 8121// CHECK: [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8 8122// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8 8123// CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8 8124// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[B]], i32 0, i32 0 8125// CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]* 8126// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 8127// CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8* 8128// CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8* 8129// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 8130// CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8* 8131// CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* 8132// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 8133// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0 8134// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 8135// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 8136// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 8137// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1 8138// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 8139// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 8140// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 8141// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2 8142// CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 8143// CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8> 8144// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 8145// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3 8146// CHECK: [[TMP11:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8 8147// CHECK: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP11]] to <8 x i8> 8148// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 8149// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 8150// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> 8151// CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x i16> 8152// CHECK: [[VLD4_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16.p0i8(i8* [[TMP4]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], <4 x i16> [[TMP16]], i32 3, i32 2) 8153// CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* 8154// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE_V]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP17]] 8155// CHECK: [[TMP18:%.*]] = bitcast %struct.uint16x4x4_t* %agg.result to i8* 8156// CHECK: [[TMP19:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8* 8157// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 32, i32 8, i1 false) 8158// CHECK: ret void 8159uint16x4x4_t test_vld4_lane_u16(uint16_t const * a, uint16x4x4_t b) { 8160 return vld4_lane_u16(a, b, 3); 8161} 8162 8163// CHECK-LABEL: define void @test_vld4_lane_u32(%struct.uint32x2x4_t* noalias sret %agg.result, i32* %a, [4 x i64] %b.coerce) #0 { 8164// CHECK: [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8 8165// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8 8166// CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8 8167// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[B]], i32 0, i32 0 8168// CHECK: [[TMP0:%.*]] = bitcast [4 x <2 x i32>]* [[COERCE_DIVE]] to [4 x i64]* 8169// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 8170// CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8* 8171// CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8* 8172// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 8173// CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8* 8174// CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8* 8175// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 8176// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i32 0, i32 0 8177// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 8178// CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 8179// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 8180// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i32 0, i32 1 8181// CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 8182// CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> 8183// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 8184// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i32 0, i32 2 8185// CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 8186// CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8> 8187// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 8188// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i32 0, i32 3 8189// CHECK: [[TMP11:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8 8190// CHECK: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP11]] to <8 x i8> 8191// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 8192// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> 8193// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32> 8194// CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <2 x i32> 8195// CHECK: [[VLD4_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32.p0i8(i8* [[TMP4]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], <2 x i32> [[TMP16]], i32 1, i32 4) 8196// CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* 8197// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_LANE_V]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP17]] 8198// CHECK: [[TMP18:%.*]] = bitcast %struct.uint32x2x4_t* %agg.result to i8* 8199// CHECK: [[TMP19:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8* 8200// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 32, i32 8, i1 false) 8201// CHECK: ret void 8202uint32x2x4_t test_vld4_lane_u32(uint32_t const * a, uint32x2x4_t b) { 8203 return vld4_lane_u32(a, b, 1); 8204} 8205 8206// CHECK-LABEL: define void @test_vld4_lane_s8(%struct.int8x8x4_t* noalias sret %agg.result, i8* %a, [4 x i64] %b.coerce) #0 { 8207// CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8 8208// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8 8209// CHECK: [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8 8210// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0 8211// CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]* 8212// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 8213// CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8* 8214// CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8* 8215// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 8216// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8* 8217// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 8218// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0 8219// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 8220// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 8221// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1 8222// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 8223// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 8224// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2 8225// CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 8226// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 8227// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3 8228// CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 8229// CHECK: [[VLD4_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4lane.v8i8.p0i8(i8* %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], i32 7, i32 1) 8230// CHECK: [[TMP8:%.*]] = bitcast i8* [[TMP3]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* 8231// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE_V]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP8]] 8232// CHECK: [[TMP9:%.*]] = bitcast %struct.int8x8x4_t* %agg.result to i8* 8233// CHECK: [[TMP10:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8* 8234// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP9]], i8* [[TMP10]], i32 32, i32 8, i1 false) 8235// CHECK: ret void 8236int8x8x4_t test_vld4_lane_s8(int8_t const * a, int8x8x4_t b) { 8237 return vld4_lane_s8(a, b, 7); 8238} 8239 8240// CHECK-LABEL: define void @test_vld4_lane_s16(%struct.int16x4x4_t* noalias sret %agg.result, i16* %a, [4 x i64] %b.coerce) #0 { 8241// CHECK: [[B:%.*]] = alloca %struct.int16x4x4_t, align 8 8242// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8 8243// CHECK: [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8 8244// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[B]], i32 0, i32 0 8245// CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]* 8246// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 8247// CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8* 8248// CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8* 8249// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 8250// CHECK: [[TMP3:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8* 8251// CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* 8252// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 8253// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0 8254// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 8255// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 8256// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 8257// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1 8258// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 8259// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 8260// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 8261// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2 8262// CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 8263// CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8> 8264// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 8265// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3 8266// CHECK: [[TMP11:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8 8267// CHECK: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP11]] to <8 x i8> 8268// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 8269// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 8270// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> 8271// CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x i16> 8272// CHECK: [[VLD4_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16.p0i8(i8* [[TMP4]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], <4 x i16> [[TMP16]], i32 3, i32 2) 8273// CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* 8274// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE_V]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP17]] 8275// CHECK: [[TMP18:%.*]] = bitcast %struct.int16x4x4_t* %agg.result to i8* 8276// CHECK: [[TMP19:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8* 8277// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 32, i32 8, i1 false) 8278// CHECK: ret void 8279int16x4x4_t test_vld4_lane_s16(int16_t const * a, int16x4x4_t b) { 8280 return vld4_lane_s16(a, b, 3); 8281} 8282 8283// CHECK-LABEL: define void @test_vld4_lane_s32(%struct.int32x2x4_t* noalias sret %agg.result, i32* %a, [4 x i64] %b.coerce) #0 { 8284// CHECK: [[B:%.*]] = alloca %struct.int32x2x4_t, align 8 8285// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8 8286// CHECK: [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8 8287// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[B]], i32 0, i32 0 8288// CHECK: [[TMP0:%.*]] = bitcast [4 x <2 x i32>]* [[COERCE_DIVE]] to [4 x i64]* 8289// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 8290// CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8* 8291// CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8* 8292// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 8293// CHECK: [[TMP3:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8* 8294// CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8* 8295// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 8296// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i32 0, i32 0 8297// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 8298// CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 8299// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 8300// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i32 0, i32 1 8301// CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 8302// CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> 8303// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 8304// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i32 0, i32 2 8305// CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 8306// CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8> 8307// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 8308// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i32 0, i32 3 8309// CHECK: [[TMP11:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8 8310// CHECK: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP11]] to <8 x i8> 8311// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 8312// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> 8313// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32> 8314// CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <2 x i32> 8315// CHECK: [[VLD4_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32.p0i8(i8* [[TMP4]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], <2 x i32> [[TMP16]], i32 1, i32 4) 8316// CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* 8317// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_LANE_V]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP17]] 8318// CHECK: [[TMP18:%.*]] = bitcast %struct.int32x2x4_t* %agg.result to i8* 8319// CHECK: [[TMP19:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8* 8320// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 32, i32 8, i1 false) 8321// CHECK: ret void 8322int32x2x4_t test_vld4_lane_s32(int32_t const * a, int32x2x4_t b) { 8323 return vld4_lane_s32(a, b, 1); 8324} 8325 8326// CHECK-LABEL: define void @test_vld4_lane_f16(%struct.float16x4x4_t* noalias sret %agg.result, half* %a, [4 x i64] %b.coerce) #0 { 8327// CHECK: [[B:%.*]] = alloca %struct.float16x4x4_t, align 8 8328// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8 8329// CHECK: [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8 8330// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[B]], i32 0, i32 0 8331// CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x half>]* [[COERCE_DIVE]] to [4 x i64]* 8332// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 8333// CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8* 8334// CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8* 8335// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 8336// CHECK: [[TMP3:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8* 8337// CHECK: [[TMP4:%.*]] = bitcast half* %a to i8* 8338// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 8339// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], i32 0, i32 0 8340// CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8 8341// CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8> 8342// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 8343// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL1]], i32 0, i32 1 8344// CHECK: [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8 8345// CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8> 8346// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 8347// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL3]], i32 0, i32 2 8348// CHECK: [[TMP9:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8 8349// CHECK: [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8> 8350// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 8351// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i32 0, i32 3 8352// CHECK: [[TMP11:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8 8353// CHECK: [[TMP12:%.*]] = bitcast <4 x half> [[TMP11]] to <8 x i8> 8354// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 8355// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 8356// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> 8357// CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x i16> 8358// CHECK: [[VLD4_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16.p0i8(i8* [[TMP4]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], <4 x i16> [[TMP16]], i32 3, i32 2) 8359// CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* 8360// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE_V]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP17]] 8361// CHECK: [[TMP18:%.*]] = bitcast %struct.float16x4x4_t* %agg.result to i8* 8362// CHECK: [[TMP19:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8* 8363// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 32, i32 8, i1 false) 8364// CHECK: ret void 8365float16x4x4_t test_vld4_lane_f16(float16_t const * a, float16x4x4_t b) { 8366 return vld4_lane_f16(a, b, 3); 8367} 8368 8369// CHECK-LABEL: define void @test_vld4_lane_f32(%struct.float32x2x4_t* noalias sret %agg.result, float* %a, [4 x i64] %b.coerce) #0 { 8370// CHECK: [[B:%.*]] = alloca %struct.float32x2x4_t, align 8 8371// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8 8372// CHECK: [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8 8373// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[B]], i32 0, i32 0 8374// CHECK: [[TMP0:%.*]] = bitcast [4 x <2 x float>]* [[COERCE_DIVE]] to [4 x i64]* 8375// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 8376// CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8* 8377// CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8* 8378// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 8379// CHECK: [[TMP3:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8* 8380// CHECK: [[TMP4:%.*]] = bitcast float* %a to i8* 8381// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 8382// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], i32 0, i32 0 8383// CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8 8384// CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8> 8385// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 8386// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL1]], i32 0, i32 1 8387// CHECK: [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8 8388// CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8> 8389// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 8390// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL3]], i32 0, i32 2 8391// CHECK: [[TMP9:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8 8392// CHECK: [[TMP10:%.*]] = bitcast <2 x float> [[TMP9]] to <8 x i8> 8393// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 8394// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL5]], i32 0, i32 3 8395// CHECK: [[TMP11:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX6]], align 8 8396// CHECK: [[TMP12:%.*]] = bitcast <2 x float> [[TMP11]] to <8 x i8> 8397// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float> 8398// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float> 8399// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float> 8400// CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <2 x float> 8401// CHECK: [[VLD4_LANE_V:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4lane.v2f32.p0i8(i8* [[TMP4]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], <2 x float> [[TMP15]], <2 x float> [[TMP16]], i32 1, i32 4) 8402// CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* 8403// CHECK: store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4_LANE_V]], { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* [[TMP17]] 8404// CHECK: [[TMP18:%.*]] = bitcast %struct.float32x2x4_t* %agg.result to i8* 8405// CHECK: [[TMP19:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8* 8406// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 32, i32 8, i1 false) 8407// CHECK: ret void 8408float32x2x4_t test_vld4_lane_f32(float32_t const * a, float32x2x4_t b) { 8409 return vld4_lane_f32(a, b, 1); 8410} 8411 8412// CHECK-LABEL: define void @test_vld4_lane_p8(%struct.poly8x8x4_t* noalias sret %agg.result, i8* %a, [4 x i64] %b.coerce) #0 { 8413// CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8 8414// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8 8415// CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8 8416// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0 8417// CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]* 8418// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 8419// CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8* 8420// CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8* 8421// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 8422// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8* 8423// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 8424// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0 8425// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 8426// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 8427// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1 8428// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 8429// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 8430// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2 8431// CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 8432// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 8433// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3 8434// CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 8435// CHECK: [[VLD4_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4lane.v8i8.p0i8(i8* %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], i32 7, i32 1) 8436// CHECK: [[TMP8:%.*]] = bitcast i8* [[TMP3]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* 8437// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE_V]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP8]] 8438// CHECK: [[TMP9:%.*]] = bitcast %struct.poly8x8x4_t* %agg.result to i8* 8439// CHECK: [[TMP10:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8* 8440// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP9]], i8* [[TMP10]], i32 32, i32 8, i1 false) 8441// CHECK: ret void 8442poly8x8x4_t test_vld4_lane_p8(poly8_t const * a, poly8x8x4_t b) { 8443 return vld4_lane_p8(a, b, 7); 8444} 8445 8446// CHECK-LABEL: define void @test_vld4_lane_p16(%struct.poly16x4x4_t* noalias sret %agg.result, i16* %a, [4 x i64] %b.coerce) #0 { 8447// CHECK: [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8 8448// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8 8449// CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8 8450// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[B]], i32 0, i32 0 8451// CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]* 8452// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 8453// CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8* 8454// CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8* 8455// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 8456// CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8* 8457// CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* 8458// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 8459// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0 8460// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 8461// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 8462// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 8463// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1 8464// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 8465// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 8466// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 8467// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2 8468// CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 8469// CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8> 8470// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 8471// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3 8472// CHECK: [[TMP11:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8 8473// CHECK: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP11]] to <8 x i8> 8474// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 8475// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 8476// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> 8477// CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x i16> 8478// CHECK: [[VLD4_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16.p0i8(i8* [[TMP4]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], <4 x i16> [[TMP16]], i32 3, i32 2) 8479// CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* 8480// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE_V]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP17]] 8481// CHECK: [[TMP18:%.*]] = bitcast %struct.poly16x4x4_t* %agg.result to i8* 8482// CHECK: [[TMP19:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8* 8483// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 32, i32 8, i1 false) 8484// CHECK: ret void 8485poly16x4x4_t test_vld4_lane_p16(poly16_t const * a, poly16x4x4_t b) { 8486 return vld4_lane_p16(a, b, 3); 8487} 8488 8489 8490// CHECK-LABEL: define <8 x i8> @test_vmax_s8(<8 x i8> %a, <8 x i8> %b) #0 { 8491// CHECK: [[VMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8> %a, <8 x i8> %b) #4 8492// CHECK: ret <8 x i8> [[VMAX_V_I]] 8493int8x8_t test_vmax_s8(int8x8_t a, int8x8_t b) { 8494 return vmax_s8(a, b); 8495} 8496 8497// CHECK-LABEL: define <4 x i16> @test_vmax_s16(<4 x i16> %a, <4 x i16> %b) #0 { 8498// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 8499// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 8500// CHECK: [[VMAX_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 8501// CHECK: [[VMAX_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 8502// CHECK: [[VMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16> [[VMAX_V_I]], <4 x i16> [[VMAX_V1_I]]) #4 8503// CHECK: [[VMAX_V3_I:%.*]] = bitcast <4 x i16> [[VMAX_V2_I]] to <8 x i8> 8504// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VMAX_V3_I]] to <4 x i16> 8505// CHECK: ret <4 x i16> [[TMP2]] 8506int16x4_t test_vmax_s16(int16x4_t a, int16x4_t b) { 8507 return vmax_s16(a, b); 8508} 8509 8510// CHECK-LABEL: define <2 x i32> @test_vmax_s32(<2 x i32> %a, <2 x i32> %b) #0 { 8511// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 8512// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 8513// CHECK: [[VMAX_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 8514// CHECK: [[VMAX_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 8515// CHECK: [[VMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> [[VMAX_V_I]], <2 x i32> [[VMAX_V1_I]]) #4 8516// CHECK: [[VMAX_V3_I:%.*]] = bitcast <2 x i32> [[VMAX_V2_I]] to <8 x i8> 8517// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VMAX_V3_I]] to <2 x i32> 8518// CHECK: ret <2 x i32> [[TMP2]] 8519int32x2_t test_vmax_s32(int32x2_t a, int32x2_t b) { 8520 return vmax_s32(a, b); 8521} 8522 8523// CHECK-LABEL: define <8 x i8> @test_vmax_u8(<8 x i8> %a, <8 x i8> %b) #0 { 8524// CHECK: [[VMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 8525// CHECK: ret <8 x i8> [[VMAX_V_I]] 8526uint8x8_t test_vmax_u8(uint8x8_t a, uint8x8_t b) { 8527 return vmax_u8(a, b); 8528} 8529 8530// CHECK-LABEL: define <4 x i16> @test_vmax_u16(<4 x i16> %a, <4 x i16> %b) #0 { 8531// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 8532// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 8533// CHECK: [[VMAX_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 8534// CHECK: [[VMAX_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 8535// CHECK: [[VMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16> [[VMAX_V_I]], <4 x i16> [[VMAX_V1_I]]) #4 8536// CHECK: [[VMAX_V3_I:%.*]] = bitcast <4 x i16> [[VMAX_V2_I]] to <8 x i8> 8537// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VMAX_V3_I]] to <4 x i16> 8538// CHECK: ret <4 x i16> [[TMP2]] 8539uint16x4_t test_vmax_u16(uint16x4_t a, uint16x4_t b) { 8540 return vmax_u16(a, b); 8541} 8542 8543// CHECK-LABEL: define <2 x i32> @test_vmax_u32(<2 x i32> %a, <2 x i32> %b) #0 { 8544// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 8545// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 8546// CHECK: [[VMAX_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 8547// CHECK: [[VMAX_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 8548// CHECK: [[VMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> [[VMAX_V_I]], <2 x i32> [[VMAX_V1_I]]) #4 8549// CHECK: [[VMAX_V3_I:%.*]] = bitcast <2 x i32> [[VMAX_V2_I]] to <8 x i8> 8550// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VMAX_V3_I]] to <2 x i32> 8551// CHECK: ret <2 x i32> [[TMP2]] 8552uint32x2_t test_vmax_u32(uint32x2_t a, uint32x2_t b) { 8553 return vmax_u32(a, b); 8554} 8555 8556// CHECK-LABEL: define <2 x float> @test_vmax_f32(<2 x float> %a, <2 x float> %b) #0 { 8557// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 8558// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 8559// CHECK: [[VMAX_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 8560// CHECK: [[VMAX_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 8561// CHECK: [[VMAX_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> [[VMAX_V_I]], <2 x float> [[VMAX_V1_I]]) #4 8562// CHECK: [[VMAX_V3_I:%.*]] = bitcast <2 x float> [[VMAX_V2_I]] to <8 x i8> 8563// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VMAX_V3_I]] to <2 x float> 8564// CHECK: ret <2 x float> [[TMP2]] 8565float32x2_t test_vmax_f32(float32x2_t a, float32x2_t b) { 8566 return vmax_f32(a, b); 8567} 8568 8569// CHECK-LABEL: define <16 x i8> @test_vmaxq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 8570// CHECK: [[VMAXQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8> %a, <16 x i8> %b) #4 8571// CHECK: ret <16 x i8> [[VMAXQ_V_I]] 8572int8x16_t test_vmaxq_s8(int8x16_t a, int8x16_t b) { 8573 return vmaxq_s8(a, b); 8574} 8575 8576// CHECK-LABEL: define <8 x i16> @test_vmaxq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 8577// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 8578// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 8579// CHECK: [[VMAXQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 8580// CHECK: [[VMAXQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 8581// CHECK: [[VMAXQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16> [[VMAXQ_V_I]], <8 x i16> [[VMAXQ_V1_I]]) #4 8582// CHECK: [[VMAXQ_V3_I:%.*]] = bitcast <8 x i16> [[VMAXQ_V2_I]] to <16 x i8> 8583// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VMAXQ_V3_I]] to <8 x i16> 8584// CHECK: ret <8 x i16> [[TMP2]] 8585int16x8_t test_vmaxq_s16(int16x8_t a, int16x8_t b) { 8586 return vmaxq_s16(a, b); 8587} 8588 8589// CHECK-LABEL: define <4 x i32> @test_vmaxq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 8590// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 8591// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 8592// CHECK: [[VMAXQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 8593// CHECK: [[VMAXQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 8594// CHECK: [[VMAXQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> [[VMAXQ_V_I]], <4 x i32> [[VMAXQ_V1_I]]) #4 8595// CHECK: [[VMAXQ_V3_I:%.*]] = bitcast <4 x i32> [[VMAXQ_V2_I]] to <16 x i8> 8596// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VMAXQ_V3_I]] to <4 x i32> 8597// CHECK: ret <4 x i32> [[TMP2]] 8598int32x4_t test_vmaxq_s32(int32x4_t a, int32x4_t b) { 8599 return vmaxq_s32(a, b); 8600} 8601 8602// CHECK-LABEL: define <16 x i8> @test_vmaxq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 8603// CHECK: [[VMAXQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8> %a, <16 x i8> %b) #4 8604// CHECK: ret <16 x i8> [[VMAXQ_V_I]] 8605uint8x16_t test_vmaxq_u8(uint8x16_t a, uint8x16_t b) { 8606 return vmaxq_u8(a, b); 8607} 8608 8609// CHECK-LABEL: define <8 x i16> @test_vmaxq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 8610// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 8611// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 8612// CHECK: [[VMAXQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 8613// CHECK: [[VMAXQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 8614// CHECK: [[VMAXQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16> [[VMAXQ_V_I]], <8 x i16> [[VMAXQ_V1_I]]) #4 8615// CHECK: [[VMAXQ_V3_I:%.*]] = bitcast <8 x i16> [[VMAXQ_V2_I]] to <16 x i8> 8616// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VMAXQ_V3_I]] to <8 x i16> 8617// CHECK: ret <8 x i16> [[TMP2]] 8618uint16x8_t test_vmaxq_u16(uint16x8_t a, uint16x8_t b) { 8619 return vmaxq_u16(a, b); 8620} 8621 8622// CHECK-LABEL: define <4 x i32> @test_vmaxq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 8623// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 8624// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 8625// CHECK: [[VMAXQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 8626// CHECK: [[VMAXQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 8627// CHECK: [[VMAXQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> [[VMAXQ_V_I]], <4 x i32> [[VMAXQ_V1_I]]) #4 8628// CHECK: [[VMAXQ_V3_I:%.*]] = bitcast <4 x i32> [[VMAXQ_V2_I]] to <16 x i8> 8629// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VMAXQ_V3_I]] to <4 x i32> 8630// CHECK: ret <4 x i32> [[TMP2]] 8631uint32x4_t test_vmaxq_u32(uint32x4_t a, uint32x4_t b) { 8632 return vmaxq_u32(a, b); 8633} 8634 8635// CHECK-LABEL: define <4 x float> @test_vmaxq_f32(<4 x float> %a, <4 x float> %b) #0 { 8636// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 8637// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 8638// CHECK: [[VMAXQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 8639// CHECK: [[VMAXQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 8640// CHECK: [[VMAXQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> [[VMAXQ_V_I]], <4 x float> [[VMAXQ_V1_I]]) #4 8641// CHECK: [[VMAXQ_V3_I:%.*]] = bitcast <4 x float> [[VMAXQ_V2_I]] to <16 x i8> 8642// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VMAXQ_V3_I]] to <4 x float> 8643// CHECK: ret <4 x float> [[TMP2]] 8644float32x4_t test_vmaxq_f32(float32x4_t a, float32x4_t b) { 8645 return vmaxq_f32(a, b); 8646} 8647 8648 8649// CHECK-LABEL: define <8 x i8> @test_vmin_s8(<8 x i8> %a, <8 x i8> %b) #0 { 8650// CHECK: [[VMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8> %a, <8 x i8> %b) #4 8651// CHECK: ret <8 x i8> [[VMIN_V_I]] 8652int8x8_t test_vmin_s8(int8x8_t a, int8x8_t b) { 8653 return vmin_s8(a, b); 8654} 8655 8656// CHECK-LABEL: define <4 x i16> @test_vmin_s16(<4 x i16> %a, <4 x i16> %b) #0 { 8657// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 8658// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 8659// CHECK: [[VMIN_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 8660// CHECK: [[VMIN_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 8661// CHECK: [[VMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16> [[VMIN_V_I]], <4 x i16> [[VMIN_V1_I]]) #4 8662// CHECK: [[VMIN_V3_I:%.*]] = bitcast <4 x i16> [[VMIN_V2_I]] to <8 x i8> 8663// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VMIN_V3_I]] to <4 x i16> 8664// CHECK: ret <4 x i16> [[TMP2]] 8665int16x4_t test_vmin_s16(int16x4_t a, int16x4_t b) { 8666 return vmin_s16(a, b); 8667} 8668 8669// CHECK-LABEL: define <2 x i32> @test_vmin_s32(<2 x i32> %a, <2 x i32> %b) #0 { 8670// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 8671// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 8672// CHECK: [[VMIN_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 8673// CHECK: [[VMIN_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 8674// CHECK: [[VMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> [[VMIN_V_I]], <2 x i32> [[VMIN_V1_I]]) #4 8675// CHECK: [[VMIN_V3_I:%.*]] = bitcast <2 x i32> [[VMIN_V2_I]] to <8 x i8> 8676// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VMIN_V3_I]] to <2 x i32> 8677// CHECK: ret <2 x i32> [[TMP2]] 8678int32x2_t test_vmin_s32(int32x2_t a, int32x2_t b) { 8679 return vmin_s32(a, b); 8680} 8681 8682// CHECK-LABEL: define <8 x i8> @test_vmin_u8(<8 x i8> %a, <8 x i8> %b) #0 { 8683// CHECK: [[VMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 8684// CHECK: ret <8 x i8> [[VMIN_V_I]] 8685uint8x8_t test_vmin_u8(uint8x8_t a, uint8x8_t b) { 8686 return vmin_u8(a, b); 8687} 8688 8689// CHECK-LABEL: define <4 x i16> @test_vmin_u16(<4 x i16> %a, <4 x i16> %b) #0 { 8690// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 8691// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 8692// CHECK: [[VMIN_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 8693// CHECK: [[VMIN_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 8694// CHECK: [[VMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16> [[VMIN_V_I]], <4 x i16> [[VMIN_V1_I]]) #4 8695// CHECK: [[VMIN_V3_I:%.*]] = bitcast <4 x i16> [[VMIN_V2_I]] to <8 x i8> 8696// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VMIN_V3_I]] to <4 x i16> 8697// CHECK: ret <4 x i16> [[TMP2]] 8698uint16x4_t test_vmin_u16(uint16x4_t a, uint16x4_t b) { 8699 return vmin_u16(a, b); 8700} 8701 8702// CHECK-LABEL: define <2 x i32> @test_vmin_u32(<2 x i32> %a, <2 x i32> %b) #0 { 8703// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 8704// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 8705// CHECK: [[VMIN_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 8706// CHECK: [[VMIN_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 8707// CHECK: [[VMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> [[VMIN_V_I]], <2 x i32> [[VMIN_V1_I]]) #4 8708// CHECK: [[VMIN_V3_I:%.*]] = bitcast <2 x i32> [[VMIN_V2_I]] to <8 x i8> 8709// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VMIN_V3_I]] to <2 x i32> 8710// CHECK: ret <2 x i32> [[TMP2]] 8711uint32x2_t test_vmin_u32(uint32x2_t a, uint32x2_t b) { 8712 return vmin_u32(a, b); 8713} 8714 8715// CHECK-LABEL: define <2 x float> @test_vmin_f32(<2 x float> %a, <2 x float> %b) #0 { 8716// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 8717// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 8718// CHECK: [[VMIN_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 8719// CHECK: [[VMIN_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 8720// CHECK: [[VMIN_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> [[VMIN_V_I]], <2 x float> [[VMIN_V1_I]]) #4 8721// CHECK: [[VMIN_V3_I:%.*]] = bitcast <2 x float> [[VMIN_V2_I]] to <8 x i8> 8722// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VMIN_V3_I]] to <2 x float> 8723// CHECK: ret <2 x float> [[TMP2]] 8724float32x2_t test_vmin_f32(float32x2_t a, float32x2_t b) { 8725 return vmin_f32(a, b); 8726} 8727 8728// CHECK-LABEL: define <16 x i8> @test_vminq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 8729// CHECK: [[VMINQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8> %a, <16 x i8> %b) #4 8730// CHECK: ret <16 x i8> [[VMINQ_V_I]] 8731int8x16_t test_vminq_s8(int8x16_t a, int8x16_t b) { 8732 return vminq_s8(a, b); 8733} 8734 8735// CHECK-LABEL: define <8 x i16> @test_vminq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 8736// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 8737// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 8738// CHECK: [[VMINQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 8739// CHECK: [[VMINQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 8740// CHECK: [[VMINQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16> [[VMINQ_V_I]], <8 x i16> [[VMINQ_V1_I]]) #4 8741// CHECK: [[VMINQ_V3_I:%.*]] = bitcast <8 x i16> [[VMINQ_V2_I]] to <16 x i8> 8742// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VMINQ_V3_I]] to <8 x i16> 8743// CHECK: ret <8 x i16> [[TMP2]] 8744int16x8_t test_vminq_s16(int16x8_t a, int16x8_t b) { 8745 return vminq_s16(a, b); 8746} 8747 8748// CHECK-LABEL: define <4 x i32> @test_vminq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 8749// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 8750// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 8751// CHECK: [[VMINQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 8752// CHECK: [[VMINQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 8753// CHECK: [[VMINQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> [[VMINQ_V_I]], <4 x i32> [[VMINQ_V1_I]]) #4 8754// CHECK: [[VMINQ_V3_I:%.*]] = bitcast <4 x i32> [[VMINQ_V2_I]] to <16 x i8> 8755// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VMINQ_V3_I]] to <4 x i32> 8756// CHECK: ret <4 x i32> [[TMP2]] 8757int32x4_t test_vminq_s32(int32x4_t a, int32x4_t b) { 8758 return vminq_s32(a, b); 8759} 8760 8761// CHECK-LABEL: define <16 x i8> @test_vminq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 8762// CHECK: [[VMINQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8> %a, <16 x i8> %b) #4 8763// CHECK: ret <16 x i8> [[VMINQ_V_I]] 8764uint8x16_t test_vminq_u8(uint8x16_t a, uint8x16_t b) { 8765 return vminq_u8(a, b); 8766} 8767 8768// CHECK-LABEL: define <8 x i16> @test_vminq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 8769// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 8770// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 8771// CHECK: [[VMINQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 8772// CHECK: [[VMINQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 8773// CHECK: [[VMINQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16> [[VMINQ_V_I]], <8 x i16> [[VMINQ_V1_I]]) #4 8774// CHECK: [[VMINQ_V3_I:%.*]] = bitcast <8 x i16> [[VMINQ_V2_I]] to <16 x i8> 8775// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VMINQ_V3_I]] to <8 x i16> 8776// CHECK: ret <8 x i16> [[TMP2]] 8777uint16x8_t test_vminq_u16(uint16x8_t a, uint16x8_t b) { 8778 return vminq_u16(a, b); 8779} 8780 8781// CHECK-LABEL: define <4 x i32> @test_vminq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 8782// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 8783// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 8784// CHECK: [[VMINQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 8785// CHECK: [[VMINQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 8786// CHECK: [[VMINQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> [[VMINQ_V_I]], <4 x i32> [[VMINQ_V1_I]]) #4 8787// CHECK: [[VMINQ_V3_I:%.*]] = bitcast <4 x i32> [[VMINQ_V2_I]] to <16 x i8> 8788// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VMINQ_V3_I]] to <4 x i32> 8789// CHECK: ret <4 x i32> [[TMP2]] 8790uint32x4_t test_vminq_u32(uint32x4_t a, uint32x4_t b) { 8791 return vminq_u32(a, b); 8792} 8793 8794// CHECK-LABEL: define <4 x float> @test_vminq_f32(<4 x float> %a, <4 x float> %b) #0 { 8795// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 8796// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 8797// CHECK: [[VMINQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 8798// CHECK: [[VMINQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 8799// CHECK: [[VMINQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> [[VMINQ_V_I]], <4 x float> [[VMINQ_V1_I]]) #4 8800// CHECK: [[VMINQ_V3_I:%.*]] = bitcast <4 x float> [[VMINQ_V2_I]] to <16 x i8> 8801// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VMINQ_V3_I]] to <4 x float> 8802// CHECK: ret <4 x float> [[TMP2]] 8803float32x4_t test_vminq_f32(float32x4_t a, float32x4_t b) { 8804 return vminq_f32(a, b); 8805} 8806 8807 8808// CHECK-LABEL: define <8 x i8> @test_vmla_s8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 { 8809// CHECK: [[MUL_I:%.*]] = mul <8 x i8> %b, %c 8810// CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, [[MUL_I]] 8811// CHECK: ret <8 x i8> [[ADD_I]] 8812int8x8_t test_vmla_s8(int8x8_t a, int8x8_t b, int8x8_t c) { 8813 return vmla_s8(a, b, c); 8814} 8815 8816// CHECK-LABEL: define <4 x i16> @test_vmla_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 { 8817// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, %c 8818// CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[MUL_I]] 8819// CHECK: ret <4 x i16> [[ADD_I]] 8820int16x4_t test_vmla_s16(int16x4_t a, int16x4_t b, int16x4_t c) { 8821 return vmla_s16(a, b, c); 8822} 8823 8824// CHECK-LABEL: define <2 x i32> @test_vmla_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 { 8825// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, %c 8826// CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[MUL_I]] 8827// CHECK: ret <2 x i32> [[ADD_I]] 8828int32x2_t test_vmla_s32(int32x2_t a, int32x2_t b, int32x2_t c) { 8829 return vmla_s32(a, b, c); 8830} 8831 8832// CHECK-LABEL: define <2 x float> @test_vmla_f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) #0 { 8833// CHECK: [[MUL_I:%.*]] = fmul <2 x float> %b, %c 8834// CHECK: [[ADD_I:%.*]] = fadd <2 x float> %a, [[MUL_I]] 8835// CHECK: ret <2 x float> [[ADD_I]] 8836float32x2_t test_vmla_f32(float32x2_t a, float32x2_t b, float32x2_t c) { 8837 return vmla_f32(a, b, c); 8838} 8839 8840// CHECK-LABEL: define <8 x i8> @test_vmla_u8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 { 8841// CHECK: [[MUL_I:%.*]] = mul <8 x i8> %b, %c 8842// CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, [[MUL_I]] 8843// CHECK: ret <8 x i8> [[ADD_I]] 8844uint8x8_t test_vmla_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) { 8845 return vmla_u8(a, b, c); 8846} 8847 8848// CHECK-LABEL: define <4 x i16> @test_vmla_u16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 { 8849// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, %c 8850// CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[MUL_I]] 8851// CHECK: ret <4 x i16> [[ADD_I]] 8852uint16x4_t test_vmla_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) { 8853 return vmla_u16(a, b, c); 8854} 8855 8856// CHECK-LABEL: define <2 x i32> @test_vmla_u32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 { 8857// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, %c 8858// CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[MUL_I]] 8859// CHECK: ret <2 x i32> [[ADD_I]] 8860uint32x2_t test_vmla_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) { 8861 return vmla_u32(a, b, c); 8862} 8863 8864// CHECK-LABEL: define <16 x i8> @test_vmlaq_s8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 { 8865// CHECK: [[MUL_I:%.*]] = mul <16 x i8> %b, %c 8866// CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, [[MUL_I]] 8867// CHECK: ret <16 x i8> [[ADD_I]] 8868int8x16_t test_vmlaq_s8(int8x16_t a, int8x16_t b, int8x16_t c) { 8869 return vmlaq_s8(a, b, c); 8870} 8871 8872// CHECK-LABEL: define <8 x i16> @test_vmlaq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 { 8873// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, %c 8874// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[MUL_I]] 8875// CHECK: ret <8 x i16> [[ADD_I]] 8876int16x8_t test_vmlaq_s16(int16x8_t a, int16x8_t b, int16x8_t c) { 8877 return vmlaq_s16(a, b, c); 8878} 8879 8880// CHECK-LABEL: define <4 x i32> @test_vmlaq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #0 { 8881// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, %c 8882// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[MUL_I]] 8883// CHECK: ret <4 x i32> [[ADD_I]] 8884int32x4_t test_vmlaq_s32(int32x4_t a, int32x4_t b, int32x4_t c) { 8885 return vmlaq_s32(a, b, c); 8886} 8887 8888// CHECK-LABEL: define <4 x float> @test_vmlaq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #0 { 8889// CHECK: [[MUL_I:%.*]] = fmul <4 x float> %b, %c 8890// CHECK: [[ADD_I:%.*]] = fadd <4 x float> %a, [[MUL_I]] 8891// CHECK: ret <4 x float> [[ADD_I]] 8892float32x4_t test_vmlaq_f32(float32x4_t a, float32x4_t b, float32x4_t c) { 8893 return vmlaq_f32(a, b, c); 8894} 8895 8896// CHECK-LABEL: define <16 x i8> @test_vmlaq_u8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 { 8897// CHECK: [[MUL_I:%.*]] = mul <16 x i8> %b, %c 8898// CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, [[MUL_I]] 8899// CHECK: ret <16 x i8> [[ADD_I]] 8900uint8x16_t test_vmlaq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) { 8901 return vmlaq_u8(a, b, c); 8902} 8903 8904// CHECK-LABEL: define <8 x i16> @test_vmlaq_u16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 { 8905// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, %c 8906// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[MUL_I]] 8907// CHECK: ret <8 x i16> [[ADD_I]] 8908uint16x8_t test_vmlaq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) { 8909 return vmlaq_u16(a, b, c); 8910} 8911 8912// CHECK-LABEL: define <4 x i32> @test_vmlaq_u32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #0 { 8913// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, %c 8914// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[MUL_I]] 8915// CHECK: ret <4 x i32> [[ADD_I]] 8916uint32x4_t test_vmlaq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) { 8917 return vmlaq_u32(a, b, c); 8918} 8919 8920 8921// CHECK-LABEL: define <8 x i16> @test_vmlal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 { 8922// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %b, <8 x i8> %c) #4 8923// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]] 8924// CHECK: ret <8 x i16> [[ADD_I]] 8925int16x8_t test_vmlal_s8(int16x8_t a, int8x8_t b, int8x8_t c) { 8926 return vmlal_s8(a, b, c); 8927} 8928 8929// CHECK-LABEL: define <4 x i32> @test_vmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { 8930// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 8931// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> 8932// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 8933// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 8934// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4 8935// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]] 8936// CHECK: ret <4 x i32> [[ADD_I]] 8937int32x4_t test_vmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) { 8938 return vmlal_s16(a, b, c); 8939} 8940 8941// CHECK-LABEL: define <2 x i64> @test_vmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { 8942// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 8943// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> 8944// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 8945// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 8946// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4 8947// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]] 8948// CHECK: ret <2 x i64> [[ADD_I]] 8949int64x2_t test_vmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) { 8950 return vmlal_s32(a, b, c); 8951} 8952 8953// CHECK-LABEL: define <8 x i16> @test_vmlal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 { 8954// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %b, <8 x i8> %c) #4 8955// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]] 8956// CHECK: ret <8 x i16> [[ADD_I]] 8957uint16x8_t test_vmlal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) { 8958 return vmlal_u8(a, b, c); 8959} 8960 8961// CHECK-LABEL: define <4 x i32> @test_vmlal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { 8962// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 8963// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> 8964// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 8965// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 8966// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4 8967// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]] 8968// CHECK: ret <4 x i32> [[ADD_I]] 8969uint32x4_t test_vmlal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) { 8970 return vmlal_u16(a, b, c); 8971} 8972 8973// CHECK-LABEL: define <2 x i64> @test_vmlal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { 8974// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 8975// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> 8976// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 8977// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 8978// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4 8979// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]] 8980// CHECK: ret <2 x i64> [[ADD_I]] 8981uint64x2_t test_vmlal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) { 8982 return vmlal_u32(a, b, c); 8983} 8984 8985 8986// CHECK-LABEL: define <4 x i32> @test_vmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { 8987// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 8988// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 8989// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 8990// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 8991// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 8992// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4 8993// CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] 8994// CHECK: ret <4 x i32> [[ADD]] 8995int32x4_t test_vmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) { 8996 return vmlal_lane_s16(a, b, c, 3); 8997} 8998 8999// CHECK-LABEL: define <2 x i64> @test_vmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { 9000// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> <i32 1, i32 1> 9001// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 9002// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 9003// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 9004// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 9005// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4 9006// CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] 9007// CHECK: ret <2 x i64> [[ADD]] 9008int64x2_t test_vmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) { 9009 return vmlal_lane_s32(a, b, c, 1); 9010} 9011 9012// CHECK-LABEL: define <4 x i32> @test_vmlal_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { 9013// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 9014// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 9015// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 9016// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 9017// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 9018// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4 9019// CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] 9020// CHECK: ret <4 x i32> [[ADD]] 9021uint32x4_t test_vmlal_lane_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) { 9022 return vmlal_lane_u16(a, b, c, 3); 9023} 9024 9025// CHECK-LABEL: define <2 x i64> @test_vmlal_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { 9026// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> <i32 1, i32 1> 9027// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 9028// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 9029// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 9030// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 9031// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4 9032// CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] 9033// CHECK: ret <2 x i64> [[ADD]] 9034uint64x2_t test_vmlal_lane_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) { 9035 return vmlal_lane_u32(a, b, c, 1); 9036} 9037 9038 9039// CHECK-LABEL: define <4 x i32> @test_vmlal_n_s16(<4 x i32> %a, <4 x i16> %b, i16 signext %c) #0 { 9040// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 9041// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 9042// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 9043// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 9044// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 9045// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 9046// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 9047// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 9048// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4 9049// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]] 9050// CHECK: ret <4 x i32> [[ADD_I]] 9051int32x4_t test_vmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) { 9052 return vmlal_n_s16(a, b, c); 9053} 9054 9055// CHECK-LABEL: define <2 x i64> @test_vmlal_n_s32(<2 x i64> %a, <2 x i32> %b, i32 %c) #0 { 9056// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 9057// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 9058// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 9059// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 9060// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 9061// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 9062// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4 9063// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]] 9064// CHECK: ret <2 x i64> [[ADD_I]] 9065int64x2_t test_vmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c) { 9066 return vmlal_n_s32(a, b, c); 9067} 9068 9069// CHECK-LABEL: define <4 x i32> @test_vmlal_n_u16(<4 x i32> %a, <4 x i16> %b, i16 zeroext %c) #0 { 9070// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 9071// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 9072// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 9073// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 9074// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 9075// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 9076// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 9077// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 9078// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4 9079// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]] 9080// CHECK: ret <4 x i32> [[ADD_I]] 9081uint32x4_t test_vmlal_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c) { 9082 return vmlal_n_u16(a, b, c); 9083} 9084 9085// CHECK-LABEL: define <2 x i64> @test_vmlal_n_u32(<2 x i64> %a, <2 x i32> %b, i32 %c) #0 { 9086// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 9087// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 9088// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 9089// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 9090// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 9091// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 9092// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4 9093// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]] 9094// CHECK: ret <2 x i64> [[ADD_I]] 9095uint64x2_t test_vmlal_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c) { 9096 return vmlal_n_u32(a, b, c); 9097} 9098 9099 9100// CHECK-LABEL: define <4 x i16> @test_vmla_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 { 9101// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 9102// CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] 9103// CHECK: [[ADD:%.*]] = add <4 x i16> %a, [[MUL]] 9104// CHECK: ret <4 x i16> [[ADD]] 9105int16x4_t test_vmla_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) { 9106 return vmla_lane_s16(a, b, c, 3); 9107} 9108 9109// CHECK-LABEL: define <2 x i32> @test_vmla_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 { 9110// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> <i32 1, i32 1> 9111// CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] 9112// CHECK: [[ADD:%.*]] = add <2 x i32> %a, [[MUL]] 9113// CHECK: ret <2 x i32> [[ADD]] 9114int32x2_t test_vmla_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) { 9115 return vmla_lane_s32(a, b, c, 1); 9116} 9117 9118// CHECK-LABEL: define <4 x i16> @test_vmla_lane_u16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 { 9119// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 9120// CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] 9121// CHECK: [[ADD:%.*]] = add <4 x i16> %a, [[MUL]] 9122// CHECK: ret <4 x i16> [[ADD]] 9123uint16x4_t test_vmla_lane_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) { 9124 return vmla_lane_u16(a, b, c, 3); 9125} 9126 9127// CHECK-LABEL: define <2 x i32> @test_vmla_lane_u32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 { 9128// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> <i32 1, i32 1> 9129// CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] 9130// CHECK: [[ADD:%.*]] = add <2 x i32> %a, [[MUL]] 9131// CHECK: ret <2 x i32> [[ADD]] 9132uint32x2_t test_vmla_lane_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) { 9133 return vmla_lane_u32(a, b, c, 1); 9134} 9135 9136// CHECK-LABEL: define <2 x float> @test_vmla_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) #0 { 9137// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %c, <2 x float> %c, <2 x i32> <i32 1, i32 1> 9138// CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]] 9139// CHECK: [[ADD:%.*]] = fadd <2 x float> %a, [[MUL]] 9140// CHECK: ret <2 x float> [[ADD]] 9141float32x2_t test_vmla_lane_f32(float32x2_t a, float32x2_t b, float32x2_t c) { 9142 return vmla_lane_f32(a, b, c, 1); 9143} 9144 9145// CHECK-LABEL: define <8 x i16> @test_vmlaq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %c) #0 { 9146// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 9147// CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] 9148// CHECK: [[ADD:%.*]] = add <8 x i16> %a, [[MUL]] 9149// CHECK: ret <8 x i16> [[ADD]] 9150int16x8_t test_vmlaq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) { 9151 return vmlaq_lane_s16(a, b, c, 3); 9152} 9153 9154// CHECK-LABEL: define <4 x i32> @test_vmlaq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %c) #0 { 9155// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 9156// CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] 9157// CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[MUL]] 9158// CHECK: ret <4 x i32> [[ADD]] 9159int32x4_t test_vmlaq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) { 9160 return vmlaq_lane_s32(a, b, c, 1); 9161} 9162 9163// CHECK-LABEL: define <8 x i16> @test_vmlaq_lane_u16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %c) #0 { 9164// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 9165// CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] 9166// CHECK: [[ADD:%.*]] = add <8 x i16> %a, [[MUL]] 9167// CHECK: ret <8 x i16> [[ADD]] 9168uint16x8_t test_vmlaq_lane_u16(uint16x8_t a, uint16x8_t b, uint16x4_t c) { 9169 return vmlaq_lane_u16(a, b, c, 3); 9170} 9171 9172// CHECK-LABEL: define <4 x i32> @test_vmlaq_lane_u32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %c) #0 { 9173// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 9174// CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] 9175// CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[MUL]] 9176// CHECK: ret <4 x i32> [[ADD]] 9177uint32x4_t test_vmlaq_lane_u32(uint32x4_t a, uint32x4_t b, uint32x2_t c) { 9178 return vmlaq_lane_u32(a, b, c, 1); 9179} 9180 9181// CHECK-LABEL: define <4 x float> @test_vmlaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %c) #0 { 9182// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %c, <2 x float> %c, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 9183// CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]] 9184// CHECK: [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]] 9185// CHECK: ret <4 x float> [[ADD]] 9186float32x4_t test_vmlaq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t c) { 9187 return vmlaq_lane_f32(a, b, c, 1); 9188} 9189 9190 9191// CHECK-LABEL: define <4 x i16> @test_vmla_n_s16(<4 x i16> %a, <4 x i16> %b, i16 signext %c) #0 { 9192// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 9193// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 9194// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 9195// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 9196// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]] 9197// CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[MUL_I]] 9198// CHECK: ret <4 x i16> [[ADD_I]] 9199int16x4_t test_vmla_n_s16(int16x4_t a, int16x4_t b, int16_t c) { 9200 return vmla_n_s16(a, b, c); 9201} 9202 9203// CHECK-LABEL: define <2 x i32> @test_vmla_n_s32(<2 x i32> %a, <2 x i32> %b, i32 %c) #0 { 9204// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 9205// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 9206// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]] 9207// CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[MUL_I]] 9208// CHECK: ret <2 x i32> [[ADD_I]] 9209int32x2_t test_vmla_n_s32(int32x2_t a, int32x2_t b, int32_t c) { 9210 return vmla_n_s32(a, b, c); 9211} 9212 9213// CHECK-LABEL: define <4 x i16> @test_vmla_n_u16(<4 x i16> %a, <4 x i16> %b, i16 zeroext %c) #0 { 9214// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 9215// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 9216// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 9217// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 9218// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]] 9219// CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[MUL_I]] 9220// CHECK: ret <4 x i16> [[ADD_I]] 9221uint16x4_t test_vmla_n_u16(uint16x4_t a, uint16x4_t b, uint16_t c) { 9222 return vmla_n_u16(a, b, c); 9223} 9224 9225// CHECK-LABEL: define <2 x i32> @test_vmla_n_u32(<2 x i32> %a, <2 x i32> %b, i32 %c) #0 { 9226// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 9227// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 9228// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]] 9229// CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[MUL_I]] 9230// CHECK: ret <2 x i32> [[ADD_I]] 9231uint32x2_t test_vmla_n_u32(uint32x2_t a, uint32x2_t b, uint32_t c) { 9232 return vmla_n_u32(a, b, c); 9233} 9234 9235// CHECK-LABEL: define <2 x float> @test_vmla_n_f32(<2 x float> %a, <2 x float> %b, float %c) #0 { 9236// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %c, i32 0 9237// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %c, i32 1 9238// CHECK: [[MUL_I:%.*]] = fmul <2 x float> %b, [[VECINIT1_I]] 9239// CHECK: [[ADD_I:%.*]] = fadd <2 x float> %a, [[MUL_I]] 9240// CHECK: ret <2 x float> [[ADD_I]] 9241float32x2_t test_vmla_n_f32(float32x2_t a, float32x2_t b, float32_t c) { 9242 return vmla_n_f32(a, b, c); 9243} 9244 9245// CHECK-LABEL: define <8 x i16> @test_vmlaq_n_s16(<8 x i16> %a, <8 x i16> %b, i16 signext %c) #0 { 9246// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0 9247// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1 9248// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2 9249// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3 9250// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4 9251// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5 9252// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6 9253// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7 9254// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]] 9255// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[MUL_I]] 9256// CHECK: ret <8 x i16> [[ADD_I]] 9257int16x8_t test_vmlaq_n_s16(int16x8_t a, int16x8_t b, int16_t c) { 9258 return vmlaq_n_s16(a, b, c); 9259} 9260 9261// CHECK-LABEL: define <4 x i32> @test_vmlaq_n_s32(<4 x i32> %a, <4 x i32> %b, i32 %c) #0 { 9262// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0 9263// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1 9264// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2 9265// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3 9266// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]] 9267// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[MUL_I]] 9268// CHECK: ret <4 x i32> [[ADD_I]] 9269int32x4_t test_vmlaq_n_s32(int32x4_t a, int32x4_t b, int32_t c) { 9270 return vmlaq_n_s32(a, b, c); 9271} 9272 9273// CHECK-LABEL: define <8 x i16> @test_vmlaq_n_u16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %c) #0 { 9274// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0 9275// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1 9276// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2 9277// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3 9278// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4 9279// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5 9280// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6 9281// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7 9282// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]] 9283// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[MUL_I]] 9284// CHECK: ret <8 x i16> [[ADD_I]] 9285uint16x8_t test_vmlaq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c) { 9286 return vmlaq_n_u16(a, b, c); 9287} 9288 9289// CHECK-LABEL: define <4 x i32> @test_vmlaq_n_u32(<4 x i32> %a, <4 x i32> %b, i32 %c) #0 { 9290// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0 9291// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1 9292// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2 9293// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3 9294// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]] 9295// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[MUL_I]] 9296// CHECK: ret <4 x i32> [[ADD_I]] 9297uint32x4_t test_vmlaq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c) { 9298 return vmlaq_n_u32(a, b, c); 9299} 9300 9301// CHECK-LABEL: define <4 x float> @test_vmlaq_n_f32(<4 x float> %a, <4 x float> %b, float %c) #0 { 9302// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %c, i32 0 9303// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %c, i32 1 9304// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %c, i32 2 9305// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %c, i32 3 9306// CHECK: [[MUL_I:%.*]] = fmul <4 x float> %b, [[VECINIT3_I]] 9307// CHECK: [[ADD_I:%.*]] = fadd <4 x float> %a, [[MUL_I]] 9308// CHECK: ret <4 x float> [[ADD_I]] 9309float32x4_t test_vmlaq_n_f32(float32x4_t a, float32x4_t b, float32_t c) { 9310 return vmlaq_n_f32(a, b, c); 9311} 9312 9313 9314// CHECK-LABEL: define <8 x i8> @test_vmls_s8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 { 9315// CHECK: [[MUL_I:%.*]] = mul <8 x i8> %b, %c 9316// CHECK: [[SUB_I:%.*]] = sub <8 x i8> %a, [[MUL_I]] 9317// CHECK: ret <8 x i8> [[SUB_I]] 9318int8x8_t test_vmls_s8(int8x8_t a, int8x8_t b, int8x8_t c) { 9319 return vmls_s8(a, b, c); 9320} 9321 9322// CHECK-LABEL: define <4 x i16> @test_vmls_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 { 9323// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, %c 9324// CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, [[MUL_I]] 9325// CHECK: ret <4 x i16> [[SUB_I]] 9326int16x4_t test_vmls_s16(int16x4_t a, int16x4_t b, int16x4_t c) { 9327 return vmls_s16(a, b, c); 9328} 9329 9330// CHECK-LABEL: define <2 x i32> @test_vmls_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 { 9331// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, %c 9332// CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, [[MUL_I]] 9333// CHECK: ret <2 x i32> [[SUB_I]] 9334int32x2_t test_vmls_s32(int32x2_t a, int32x2_t b, int32x2_t c) { 9335 return vmls_s32(a, b, c); 9336} 9337 9338// CHECK-LABEL: define <2 x float> @test_vmls_f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) #0 { 9339// CHECK: [[MUL_I:%.*]] = fmul <2 x float> %b, %c 9340// CHECK: [[SUB_I:%.*]] = fsub <2 x float> %a, [[MUL_I]] 9341// CHECK: ret <2 x float> [[SUB_I]] 9342float32x2_t test_vmls_f32(float32x2_t a, float32x2_t b, float32x2_t c) { 9343 return vmls_f32(a, b, c); 9344} 9345 9346// CHECK-LABEL: define <8 x i8> @test_vmls_u8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 { 9347// CHECK: [[MUL_I:%.*]] = mul <8 x i8> %b, %c 9348// CHECK: [[SUB_I:%.*]] = sub <8 x i8> %a, [[MUL_I]] 9349// CHECK: ret <8 x i8> [[SUB_I]] 9350uint8x8_t test_vmls_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) { 9351 return vmls_u8(a, b, c); 9352} 9353 9354// CHECK-LABEL: define <4 x i16> @test_vmls_u16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 { 9355// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, %c 9356// CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, [[MUL_I]] 9357// CHECK: ret <4 x i16> [[SUB_I]] 9358uint16x4_t test_vmls_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) { 9359 return vmls_u16(a, b, c); 9360} 9361 9362// CHECK-LABEL: define <2 x i32> @test_vmls_u32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 { 9363// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, %c 9364// CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, [[MUL_I]] 9365// CHECK: ret <2 x i32> [[SUB_I]] 9366uint32x2_t test_vmls_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) { 9367 return vmls_u32(a, b, c); 9368} 9369 9370// CHECK-LABEL: define <16 x i8> @test_vmlsq_s8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 { 9371// CHECK: [[MUL_I:%.*]] = mul <16 x i8> %b, %c 9372// CHECK: [[SUB_I:%.*]] = sub <16 x i8> %a, [[MUL_I]] 9373// CHECK: ret <16 x i8> [[SUB_I]] 9374int8x16_t test_vmlsq_s8(int8x16_t a, int8x16_t b, int8x16_t c) { 9375 return vmlsq_s8(a, b, c); 9376} 9377 9378// CHECK-LABEL: define <8 x i16> @test_vmlsq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 { 9379// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, %c 9380// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[MUL_I]] 9381// CHECK: ret <8 x i16> [[SUB_I]] 9382int16x8_t test_vmlsq_s16(int16x8_t a, int16x8_t b, int16x8_t c) { 9383 return vmlsq_s16(a, b, c); 9384} 9385 9386// CHECK-LABEL: define <4 x i32> @test_vmlsq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #0 { 9387// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, %c 9388// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[MUL_I]] 9389// CHECK: ret <4 x i32> [[SUB_I]] 9390int32x4_t test_vmlsq_s32(int32x4_t a, int32x4_t b, int32x4_t c) { 9391 return vmlsq_s32(a, b, c); 9392} 9393 9394// CHECK-LABEL: define <4 x float> @test_vmlsq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #0 { 9395// CHECK: [[MUL_I:%.*]] = fmul <4 x float> %b, %c 9396// CHECK: [[SUB_I:%.*]] = fsub <4 x float> %a, [[MUL_I]] 9397// CHECK: ret <4 x float> [[SUB_I]] 9398float32x4_t test_vmlsq_f32(float32x4_t a, float32x4_t b, float32x4_t c) { 9399 return vmlsq_f32(a, b, c); 9400} 9401 9402// CHECK-LABEL: define <16 x i8> @test_vmlsq_u8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 { 9403// CHECK: [[MUL_I:%.*]] = mul <16 x i8> %b, %c 9404// CHECK: [[SUB_I:%.*]] = sub <16 x i8> %a, [[MUL_I]] 9405// CHECK: ret <16 x i8> [[SUB_I]] 9406uint8x16_t test_vmlsq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) { 9407 return vmlsq_u8(a, b, c); 9408} 9409 9410// CHECK-LABEL: define <8 x i16> @test_vmlsq_u16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 { 9411// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, %c 9412// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[MUL_I]] 9413// CHECK: ret <8 x i16> [[SUB_I]] 9414uint16x8_t test_vmlsq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) { 9415 return vmlsq_u16(a, b, c); 9416} 9417 9418// CHECK-LABEL: define <4 x i32> @test_vmlsq_u32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #0 { 9419// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, %c 9420// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[MUL_I]] 9421// CHECK: ret <4 x i32> [[SUB_I]] 9422uint32x4_t test_vmlsq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) { 9423 return vmlsq_u32(a, b, c); 9424} 9425 9426 9427// CHECK-LABEL: define <8 x i16> @test_vmlsl_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 { 9428// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %b, <8 x i8> %c) #4 9429// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]] 9430// CHECK: ret <8 x i16> [[SUB_I]] 9431int16x8_t test_vmlsl_s8(int16x8_t a, int8x8_t b, int8x8_t c) { 9432 return vmlsl_s8(a, b, c); 9433} 9434 9435// CHECK-LABEL: define <4 x i32> @test_vmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { 9436// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 9437// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> 9438// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 9439// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 9440// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4 9441// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]] 9442// CHECK: ret <4 x i32> [[SUB_I]] 9443int32x4_t test_vmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) { 9444 return vmlsl_s16(a, b, c); 9445} 9446 9447// CHECK-LABEL: define <2 x i64> @test_vmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { 9448// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 9449// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> 9450// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 9451// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 9452// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4 9453// CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]] 9454// CHECK: ret <2 x i64> [[SUB_I]] 9455int64x2_t test_vmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) { 9456 return vmlsl_s32(a, b, c); 9457} 9458 9459// CHECK-LABEL: define <8 x i16> @test_vmlsl_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 { 9460// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %b, <8 x i8> %c) #4 9461// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]] 9462// CHECK: ret <8 x i16> [[SUB_I]] 9463uint16x8_t test_vmlsl_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) { 9464 return vmlsl_u8(a, b, c); 9465} 9466 9467// CHECK-LABEL: define <4 x i32> @test_vmlsl_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { 9468// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 9469// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> 9470// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 9471// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 9472// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4 9473// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]] 9474// CHECK: ret <4 x i32> [[SUB_I]] 9475uint32x4_t test_vmlsl_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) { 9476 return vmlsl_u16(a, b, c); 9477} 9478 9479// CHECK-LABEL: define <2 x i64> @test_vmlsl_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { 9480// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 9481// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> 9482// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 9483// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 9484// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4 9485// CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]] 9486// CHECK: ret <2 x i64> [[SUB_I]] 9487uint64x2_t test_vmlsl_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) { 9488 return vmlsl_u32(a, b, c); 9489} 9490 9491 9492// CHECK-LABEL: define <4 x i32> @test_vmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { 9493// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 9494// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 9495// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 9496// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 9497// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 9498// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4 9499// CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] 9500// CHECK: ret <4 x i32> [[SUB]] 9501int32x4_t test_vmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) { 9502 return vmlsl_lane_s16(a, b, c, 3); 9503} 9504 9505// CHECK-LABEL: define <2 x i64> @test_vmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { 9506// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> <i32 1, i32 1> 9507// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 9508// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 9509// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 9510// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 9511// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4 9512// CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] 9513// CHECK: ret <2 x i64> [[SUB]] 9514int64x2_t test_vmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) { 9515 return vmlsl_lane_s32(a, b, c, 1); 9516} 9517 9518// CHECK-LABEL: define <4 x i32> @test_vmlsl_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { 9519// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 9520// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 9521// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 9522// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 9523// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 9524// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4 9525// CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] 9526// CHECK: ret <4 x i32> [[SUB]] 9527uint32x4_t test_vmlsl_lane_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) { 9528 return vmlsl_lane_u16(a, b, c, 3); 9529} 9530 9531// CHECK-LABEL: define <2 x i64> @test_vmlsl_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { 9532// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> <i32 1, i32 1> 9533// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 9534// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 9535// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 9536// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 9537// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4 9538// CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] 9539// CHECK: ret <2 x i64> [[SUB]] 9540uint64x2_t test_vmlsl_lane_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) { 9541 return vmlsl_lane_u32(a, b, c, 1); 9542} 9543 9544 9545// CHECK-LABEL: define <4 x i32> @test_vmlsl_n_s16(<4 x i32> %a, <4 x i16> %b, i16 signext %c) #0 { 9546// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 9547// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 9548// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 9549// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 9550// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 9551// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 9552// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 9553// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 9554// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4 9555// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]] 9556// CHECK: ret <4 x i32> [[SUB_I]] 9557int32x4_t test_vmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) { 9558 return vmlsl_n_s16(a, b, c); 9559} 9560 9561// CHECK-LABEL: define <2 x i64> @test_vmlsl_n_s32(<2 x i64> %a, <2 x i32> %b, i32 %c) #0 { 9562// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 9563// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 9564// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 9565// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 9566// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 9567// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 9568// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4 9569// CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]] 9570// CHECK: ret <2 x i64> [[SUB_I]] 9571int64x2_t test_vmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c) { 9572 return vmlsl_n_s32(a, b, c); 9573} 9574 9575// CHECK-LABEL: define <4 x i32> @test_vmlsl_n_u16(<4 x i32> %a, <4 x i16> %b, i16 zeroext %c) #0 { 9576// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 9577// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 9578// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 9579// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 9580// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 9581// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 9582// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 9583// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 9584// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4 9585// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]] 9586// CHECK: ret <4 x i32> [[SUB_I]] 9587uint32x4_t test_vmlsl_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c) { 9588 return vmlsl_n_u16(a, b, c); 9589} 9590 9591// CHECK-LABEL: define <2 x i64> @test_vmlsl_n_u32(<2 x i64> %a, <2 x i32> %b, i32 %c) #0 { 9592// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 9593// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 9594// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 9595// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 9596// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 9597// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 9598// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4 9599// CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]] 9600// CHECK: ret <2 x i64> [[SUB_I]] 9601uint64x2_t test_vmlsl_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c) { 9602 return vmlsl_n_u32(a, b, c); 9603} 9604 9605 9606// CHECK-LABEL: define <4 x i16> @test_vmls_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 { 9607// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 9608// CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] 9609// CHECK: [[SUB:%.*]] = sub <4 x i16> %a, [[MUL]] 9610// CHECK: ret <4 x i16> [[SUB]] 9611int16x4_t test_vmls_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) { 9612 return vmls_lane_s16(a, b, c, 3); 9613} 9614 9615// CHECK-LABEL: define <2 x i32> @test_vmls_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 { 9616// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> <i32 1, i32 1> 9617// CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] 9618// CHECK: [[SUB:%.*]] = sub <2 x i32> %a, [[MUL]] 9619// CHECK: ret <2 x i32> [[SUB]] 9620int32x2_t test_vmls_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) { 9621 return vmls_lane_s32(a, b, c, 1); 9622} 9623 9624// CHECK-LABEL: define <4 x i16> @test_vmls_lane_u16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 { 9625// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 9626// CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] 9627// CHECK: [[SUB:%.*]] = sub <4 x i16> %a, [[MUL]] 9628// CHECK: ret <4 x i16> [[SUB]] 9629uint16x4_t test_vmls_lane_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) { 9630 return vmls_lane_u16(a, b, c, 3); 9631} 9632 9633// CHECK-LABEL: define <2 x i32> @test_vmls_lane_u32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 { 9634// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> <i32 1, i32 1> 9635// CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] 9636// CHECK: [[SUB:%.*]] = sub <2 x i32> %a, [[MUL]] 9637// CHECK: ret <2 x i32> [[SUB]] 9638uint32x2_t test_vmls_lane_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) { 9639 return vmls_lane_u32(a, b, c, 1); 9640} 9641 9642// CHECK-LABEL: define <2 x float> @test_vmls_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) #0 { 9643// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %c, <2 x float> %c, <2 x i32> <i32 1, i32 1> 9644// CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]] 9645// CHECK: [[SUB:%.*]] = fsub <2 x float> %a, [[MUL]] 9646// CHECK: ret <2 x float> [[SUB]] 9647float32x2_t test_vmls_lane_f32(float32x2_t a, float32x2_t b, float32x2_t c) { 9648 return vmls_lane_f32(a, b, c, 1); 9649} 9650 9651// CHECK-LABEL: define <8 x i16> @test_vmlsq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %c) #0 { 9652// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 9653// CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] 9654// CHECK: [[SUB:%.*]] = sub <8 x i16> %a, [[MUL]] 9655// CHECK: ret <8 x i16> [[SUB]] 9656int16x8_t test_vmlsq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) { 9657 return vmlsq_lane_s16(a, b, c, 3); 9658} 9659 9660// CHECK-LABEL: define <4 x i32> @test_vmlsq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %c) #0 { 9661// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 9662// CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] 9663// CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[MUL]] 9664// CHECK: ret <4 x i32> [[SUB]] 9665int32x4_t test_vmlsq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) { 9666 return vmlsq_lane_s32(a, b, c, 1); 9667} 9668 9669// CHECK-LABEL: define <8 x i16> @test_vmlsq_lane_u16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %c) #0 { 9670// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 9671// CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] 9672// CHECK: [[SUB:%.*]] = sub <8 x i16> %a, [[MUL]] 9673// CHECK: ret <8 x i16> [[SUB]] 9674uint16x8_t test_vmlsq_lane_u16(uint16x8_t a, uint16x8_t b, uint16x4_t c) { 9675 return vmlsq_lane_u16(a, b, c, 3); 9676} 9677 9678// CHECK-LABEL: define <4 x i32> @test_vmlsq_lane_u32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %c) #0 { 9679// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 9680// CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] 9681// CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[MUL]] 9682// CHECK: ret <4 x i32> [[SUB]] 9683uint32x4_t test_vmlsq_lane_u32(uint32x4_t a, uint32x4_t b, uint32x2_t c) { 9684 return vmlsq_lane_u32(a, b, c, 1); 9685} 9686 9687// CHECK-LABEL: define <4 x float> @test_vmlsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %c) #0 { 9688// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %c, <2 x float> %c, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 9689// CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]] 9690// CHECK: [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]] 9691// CHECK: ret <4 x float> [[SUB]] 9692float32x4_t test_vmlsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t c) { 9693 return vmlsq_lane_f32(a, b, c, 1); 9694} 9695 9696 9697// CHECK-LABEL: define <4 x i16> @test_vmls_n_s16(<4 x i16> %a, <4 x i16> %b, i16 signext %c) #0 { 9698// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 9699// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 9700// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 9701// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 9702// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]] 9703// CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, [[MUL_I]] 9704// CHECK: ret <4 x i16> [[SUB_I]] 9705int16x4_t test_vmls_n_s16(int16x4_t a, int16x4_t b, int16_t c) { 9706 return vmls_n_s16(a, b, c); 9707} 9708 9709// CHECK-LABEL: define <2 x i32> @test_vmls_n_s32(<2 x i32> %a, <2 x i32> %b, i32 %c) #0 { 9710// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 9711// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 9712// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]] 9713// CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, [[MUL_I]] 9714// CHECK: ret <2 x i32> [[SUB_I]] 9715int32x2_t test_vmls_n_s32(int32x2_t a, int32x2_t b, int32_t c) { 9716 return vmls_n_s32(a, b, c); 9717} 9718 9719// CHECK-LABEL: define <4 x i16> @test_vmls_n_u16(<4 x i16> %a, <4 x i16> %b, i16 zeroext %c) #0 { 9720// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 9721// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 9722// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 9723// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 9724// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]] 9725// CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, [[MUL_I]] 9726// CHECK: ret <4 x i16> [[SUB_I]] 9727uint16x4_t test_vmls_n_u16(uint16x4_t a, uint16x4_t b, uint16_t c) { 9728 return vmls_n_u16(a, b, c); 9729} 9730 9731// CHECK-LABEL: define <2 x i32> @test_vmls_n_u32(<2 x i32> %a, <2 x i32> %b, i32 %c) #0 { 9732// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 9733// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 9734// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]] 9735// CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, [[MUL_I]] 9736// CHECK: ret <2 x i32> [[SUB_I]] 9737uint32x2_t test_vmls_n_u32(uint32x2_t a, uint32x2_t b, uint32_t c) { 9738 return vmls_n_u32(a, b, c); 9739} 9740 9741// CHECK-LABEL: define <2 x float> @test_vmls_n_f32(<2 x float> %a, <2 x float> %b, float %c) #0 { 9742// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %c, i32 0 9743// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %c, i32 1 9744// CHECK: [[MUL_I:%.*]] = fmul <2 x float> %b, [[VECINIT1_I]] 9745// CHECK: [[SUB_I:%.*]] = fsub <2 x float> %a, [[MUL_I]] 9746// CHECK: ret <2 x float> [[SUB_I]] 9747float32x2_t test_vmls_n_f32(float32x2_t a, float32x2_t b, float32_t c) { 9748 return vmls_n_f32(a, b, c); 9749} 9750 9751// CHECK-LABEL: define <8 x i16> @test_vmlsq_n_s16(<8 x i16> %a, <8 x i16> %b, i16 signext %c) #0 { 9752// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0 9753// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1 9754// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2 9755// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3 9756// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4 9757// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5 9758// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6 9759// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7 9760// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]] 9761// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[MUL_I]] 9762// CHECK: ret <8 x i16> [[SUB_I]] 9763int16x8_t test_vmlsq_n_s16(int16x8_t a, int16x8_t b, int16_t c) { 9764 return vmlsq_n_s16(a, b, c); 9765} 9766 9767// CHECK-LABEL: define <4 x i32> @test_vmlsq_n_s32(<4 x i32> %a, <4 x i32> %b, i32 %c) #0 { 9768// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0 9769// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1 9770// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2 9771// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3 9772// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]] 9773// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[MUL_I]] 9774// CHECK: ret <4 x i32> [[SUB_I]] 9775int32x4_t test_vmlsq_n_s32(int32x4_t a, int32x4_t b, int32_t c) { 9776 return vmlsq_n_s32(a, b, c); 9777} 9778 9779// CHECK-LABEL: define <8 x i16> @test_vmlsq_n_u16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %c) #0 { 9780// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0 9781// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1 9782// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2 9783// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3 9784// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4 9785// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5 9786// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6 9787// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7 9788// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]] 9789// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[MUL_I]] 9790// CHECK: ret <8 x i16> [[SUB_I]] 9791uint16x8_t test_vmlsq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c) { 9792 return vmlsq_n_u16(a, b, c); 9793} 9794 9795// CHECK-LABEL: define <4 x i32> @test_vmlsq_n_u32(<4 x i32> %a, <4 x i32> %b, i32 %c) #0 { 9796// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0 9797// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1 9798// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2 9799// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3 9800// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]] 9801// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[MUL_I]] 9802// CHECK: ret <4 x i32> [[SUB_I]] 9803uint32x4_t test_vmlsq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c) { 9804 return vmlsq_n_u32(a, b, c); 9805} 9806 9807// CHECK-LABEL: define <4 x float> @test_vmlsq_n_f32(<4 x float> %a, <4 x float> %b, float %c) #0 { 9808// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %c, i32 0 9809// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %c, i32 1 9810// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %c, i32 2 9811// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %c, i32 3 9812// CHECK: [[MUL_I:%.*]] = fmul <4 x float> %b, [[VECINIT3_I]] 9813// CHECK: [[SUB_I:%.*]] = fsub <4 x float> %a, [[MUL_I]] 9814// CHECK: ret <4 x float> [[SUB_I]] 9815float32x4_t test_vmlsq_n_f32(float32x4_t a, float32x4_t b, float32_t c) { 9816 return vmlsq_n_f32(a, b, c); 9817} 9818 9819 9820// CHECK-LABEL: define <8 x i16> @test_vmovl_s8(<8 x i8> %a) #0 { 9821// CHECK: [[VMOVL_I:%.*]] = sext <8 x i8> %a to <8 x i16> 9822// CHECK: ret <8 x i16> [[VMOVL_I]] 9823int16x8_t test_vmovl_s8(int8x8_t a) { 9824 return vmovl_s8(a); 9825} 9826 9827// CHECK-LABEL: define <4 x i32> @test_vmovl_s16(<4 x i16> %a) #0 { 9828// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 9829// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 9830// CHECK: [[VMOVL_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> 9831// CHECK: ret <4 x i32> [[VMOVL_I]] 9832int32x4_t test_vmovl_s16(int16x4_t a) { 9833 return vmovl_s16(a); 9834} 9835 9836// CHECK-LABEL: define <2 x i64> @test_vmovl_s32(<2 x i32> %a) #0 { 9837// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 9838// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 9839// CHECK: [[VMOVL_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> 9840// CHECK: ret <2 x i64> [[VMOVL_I]] 9841int64x2_t test_vmovl_s32(int32x2_t a) { 9842 return vmovl_s32(a); 9843} 9844 9845// CHECK-LABEL: define <8 x i16> @test_vmovl_u8(<8 x i8> %a) #0 { 9846// CHECK: [[VMOVL_I:%.*]] = zext <8 x i8> %a to <8 x i16> 9847// CHECK: ret <8 x i16> [[VMOVL_I]] 9848uint16x8_t test_vmovl_u8(uint8x8_t a) { 9849 return vmovl_u8(a); 9850} 9851 9852// CHECK-LABEL: define <4 x i32> @test_vmovl_u16(<4 x i16> %a) #0 { 9853// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 9854// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 9855// CHECK: [[VMOVL_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> 9856// CHECK: ret <4 x i32> [[VMOVL_I]] 9857uint32x4_t test_vmovl_u16(uint16x4_t a) { 9858 return vmovl_u16(a); 9859} 9860 9861// CHECK-LABEL: define <2 x i64> @test_vmovl_u32(<2 x i32> %a) #0 { 9862// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 9863// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 9864// CHECK: [[VMOVL_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> 9865// CHECK: ret <2 x i64> [[VMOVL_I]] 9866uint64x2_t test_vmovl_u32(uint32x2_t a) { 9867 return vmovl_u32(a); 9868} 9869 9870 9871// CHECK-LABEL: define <8 x i8> @test_vmovn_s16(<8 x i16> %a) #0 { 9872// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 9873// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 9874// CHECK: [[VMOVN_I:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8> 9875// CHECK: ret <8 x i8> [[VMOVN_I]] 9876int8x8_t test_vmovn_s16(int16x8_t a) { 9877 return vmovn_s16(a); 9878} 9879 9880// CHECK-LABEL: define <4 x i16> @test_vmovn_s32(<4 x i32> %a) #0 { 9881// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 9882// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 9883// CHECK: [[VMOVN_I:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16> 9884// CHECK: ret <4 x i16> [[VMOVN_I]] 9885int16x4_t test_vmovn_s32(int32x4_t a) { 9886 return vmovn_s32(a); 9887} 9888 9889// CHECK-LABEL: define <2 x i32> @test_vmovn_s64(<2 x i64> %a) #0 { 9890// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 9891// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 9892// CHECK: [[VMOVN_I:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32> 9893// CHECK: ret <2 x i32> [[VMOVN_I]] 9894int32x2_t test_vmovn_s64(int64x2_t a) { 9895 return vmovn_s64(a); 9896} 9897 9898// CHECK-LABEL: define <8 x i8> @test_vmovn_u16(<8 x i16> %a) #0 { 9899// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 9900// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 9901// CHECK: [[VMOVN_I:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8> 9902// CHECK: ret <8 x i8> [[VMOVN_I]] 9903uint8x8_t test_vmovn_u16(uint16x8_t a) { 9904 return vmovn_u16(a); 9905} 9906 9907// CHECK-LABEL: define <4 x i16> @test_vmovn_u32(<4 x i32> %a) #0 { 9908// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 9909// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 9910// CHECK: [[VMOVN_I:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16> 9911// CHECK: ret <4 x i16> [[VMOVN_I]] 9912uint16x4_t test_vmovn_u32(uint32x4_t a) { 9913 return vmovn_u32(a); 9914} 9915 9916// CHECK-LABEL: define <2 x i32> @test_vmovn_u64(<2 x i64> %a) #0 { 9917// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 9918// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 9919// CHECK: [[VMOVN_I:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32> 9920// CHECK: ret <2 x i32> [[VMOVN_I]] 9921uint32x2_t test_vmovn_u64(uint64x2_t a) { 9922 return vmovn_u64(a); 9923} 9924 9925 9926// CHECK-LABEL: define <8 x i8> @test_vmov_n_u8(i8 zeroext %a) #0 { 9927// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0 9928// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1 9929// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2 9930// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3 9931// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4 9932// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5 9933// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6 9934// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7 9935// CHECK: ret <8 x i8> [[VECINIT7_I]] 9936uint8x8_t test_vmov_n_u8(uint8_t a) { 9937 return vmov_n_u8(a); 9938} 9939 9940// CHECK-LABEL: define <4 x i16> @test_vmov_n_u16(i16 zeroext %a) #0 { 9941// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0 9942// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1 9943// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2 9944// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3 9945// CHECK: ret <4 x i16> [[VECINIT3_I]] 9946uint16x4_t test_vmov_n_u16(uint16_t a) { 9947 return vmov_n_u16(a); 9948} 9949 9950// CHECK-LABEL: define <2 x i32> @test_vmov_n_u32(i32 %a) #0 { 9951// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %a, i32 0 9952// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %a, i32 1 9953// CHECK: ret <2 x i32> [[VECINIT1_I]] 9954uint32x2_t test_vmov_n_u32(uint32_t a) { 9955 return vmov_n_u32(a); 9956} 9957 9958// CHECK-LABEL: define <8 x i8> @test_vmov_n_s8(i8 signext %a) #0 { 9959// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0 9960// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1 9961// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2 9962// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3 9963// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4 9964// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5 9965// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6 9966// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7 9967// CHECK: ret <8 x i8> [[VECINIT7_I]] 9968int8x8_t test_vmov_n_s8(int8_t a) { 9969 return vmov_n_s8(a); 9970} 9971 9972// CHECK-LABEL: define <4 x i16> @test_vmov_n_s16(i16 signext %a) #0 { 9973// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0 9974// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1 9975// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2 9976// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3 9977// CHECK: ret <4 x i16> [[VECINIT3_I]] 9978int16x4_t test_vmov_n_s16(int16_t a) { 9979 return vmov_n_s16(a); 9980} 9981 9982// CHECK-LABEL: define <2 x i32> @test_vmov_n_s32(i32 %a) #0 { 9983// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %a, i32 0 9984// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %a, i32 1 9985// CHECK: ret <2 x i32> [[VECINIT1_I]] 9986int32x2_t test_vmov_n_s32(int32_t a) { 9987 return vmov_n_s32(a); 9988} 9989 9990// CHECK-LABEL: define <8 x i8> @test_vmov_n_p8(i8 signext %a) #0 { 9991// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0 9992// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1 9993// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2 9994// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3 9995// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4 9996// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5 9997// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6 9998// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7 9999// CHECK: ret <8 x i8> [[VECINIT7_I]] 10000poly8x8_t test_vmov_n_p8(poly8_t a) { 10001 return vmov_n_p8(a); 10002} 10003 10004// CHECK-LABEL: define <4 x i16> @test_vmov_n_p16(i16 signext %a) #0 { 10005// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0 10006// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1 10007// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2 10008// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3 10009// CHECK: ret <4 x i16> [[VECINIT3_I]] 10010poly16x4_t test_vmov_n_p16(poly16_t a) { 10011 return vmov_n_p16(a); 10012} 10013 10014// CHECK-LABEL: define <4 x half> @test_vmov_n_f16(half* %a) #0 { 10015// CHECK: [[TMP0:%.*]] = load half, half* %a, align 2 10016// CHECK: [[VECINIT:%.*]] = insertelement <4 x half> undef, half [[TMP0]], i32 0 10017// CHECK: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP0]], i32 1 10018// CHECK: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[TMP0]], i32 2 10019// CHECK: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[TMP0]], i32 3 10020// CHECK: ret <4 x half> [[VECINIT3]] 10021float16x4_t test_vmov_n_f16(float16_t *a) { 10022 return vmov_n_f16(*a); 10023} 10024 10025// CHECK-LABEL: define <2 x float> @test_vmov_n_f32(float %a) #0 { 10026// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %a, i32 0 10027// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %a, i32 1 10028// CHECK: ret <2 x float> [[VECINIT1_I]] 10029float32x2_t test_vmov_n_f32(float32_t a) { 10030 return vmov_n_f32(a); 10031} 10032 10033// CHECK-LABEL: define <16 x i8> @test_vmovq_n_u8(i8 zeroext %a) #0 { 10034// CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0 10035// CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1 10036// CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2 10037// CHECK: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3 10038// CHECK: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4 10039// CHECK: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5 10040// CHECK: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6 10041// CHECK: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7 10042// CHECK: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8 10043// CHECK: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9 10044// CHECK: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10 10045// CHECK: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11 10046// CHECK: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12 10047// CHECK: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13 10048// CHECK: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14 10049// CHECK: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15 10050// CHECK: ret <16 x i8> [[VECINIT15_I]] 10051uint8x16_t test_vmovq_n_u8(uint8_t a) { 10052 return vmovq_n_u8(a); 10053} 10054 10055// CHECK-LABEL: define <8 x i16> @test_vmovq_n_u16(i16 zeroext %a) #0 { 10056// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0 10057// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1 10058// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2 10059// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3 10060// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4 10061// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5 10062// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6 10063// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7 10064// CHECK: ret <8 x i16> [[VECINIT7_I]] 10065uint16x8_t test_vmovq_n_u16(uint16_t a) { 10066 return vmovq_n_u16(a); 10067} 10068 10069// CHECK-LABEL: define <4 x i32> @test_vmovq_n_u32(i32 %a) #0 { 10070// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %a, i32 0 10071// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %a, i32 1 10072// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %a, i32 2 10073// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %a, i32 3 10074// CHECK: ret <4 x i32> [[VECINIT3_I]] 10075uint32x4_t test_vmovq_n_u32(uint32_t a) { 10076 return vmovq_n_u32(a); 10077} 10078 10079// CHECK-LABEL: define <16 x i8> @test_vmovq_n_s8(i8 signext %a) #0 { 10080// CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0 10081// CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1 10082// CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2 10083// CHECK: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3 10084// CHECK: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4 10085// CHECK: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5 10086// CHECK: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6 10087// CHECK: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7 10088// CHECK: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8 10089// CHECK: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9 10090// CHECK: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10 10091// CHECK: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11 10092// CHECK: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12 10093// CHECK: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13 10094// CHECK: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14 10095// CHECK: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15 10096// CHECK: ret <16 x i8> [[VECINIT15_I]] 10097int8x16_t test_vmovq_n_s8(int8_t a) { 10098 return vmovq_n_s8(a); 10099} 10100 10101// CHECK-LABEL: define <8 x i16> @test_vmovq_n_s16(i16 signext %a) #0 { 10102// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0 10103// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1 10104// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2 10105// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3 10106// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4 10107// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5 10108// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6 10109// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7 10110// CHECK: ret <8 x i16> [[VECINIT7_I]] 10111int16x8_t test_vmovq_n_s16(int16_t a) { 10112 return vmovq_n_s16(a); 10113} 10114 10115// CHECK-LABEL: define <4 x i32> @test_vmovq_n_s32(i32 %a) #0 { 10116// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %a, i32 0 10117// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %a, i32 1 10118// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %a, i32 2 10119// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %a, i32 3 10120// CHECK: ret <4 x i32> [[VECINIT3_I]] 10121int32x4_t test_vmovq_n_s32(int32_t a) { 10122 return vmovq_n_s32(a); 10123} 10124 10125// CHECK-LABEL: define <16 x i8> @test_vmovq_n_p8(i8 signext %a) #0 { 10126// CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0 10127// CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1 10128// CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2 10129// CHECK: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3 10130// CHECK: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4 10131// CHECK: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5 10132// CHECK: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6 10133// CHECK: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7 10134// CHECK: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8 10135// CHECK: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9 10136// CHECK: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10 10137// CHECK: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11 10138// CHECK: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12 10139// CHECK: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13 10140// CHECK: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14 10141// CHECK: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15 10142// CHECK: ret <16 x i8> [[VECINIT15_I]] 10143poly8x16_t test_vmovq_n_p8(poly8_t a) { 10144 return vmovq_n_p8(a); 10145} 10146 10147// CHECK-LABEL: define <8 x i16> @test_vmovq_n_p16(i16 signext %a) #0 { 10148// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0 10149// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1 10150// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2 10151// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3 10152// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4 10153// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5 10154// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6 10155// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7 10156// CHECK: ret <8 x i16> [[VECINIT7_I]] 10157poly16x8_t test_vmovq_n_p16(poly16_t a) { 10158 return vmovq_n_p16(a); 10159} 10160 10161// CHECK-LABEL: define <8 x half> @test_vmovq_n_f16(half* %a) #0 { 10162// CHECK: [[TMP0:%.*]] = load half, half* %a, align 2 10163// CHECK: [[VECINIT:%.*]] = insertelement <8 x half> undef, half [[TMP0]], i32 0 10164// CHECK: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP0]], i32 1 10165// CHECK: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[TMP0]], i32 2 10166// CHECK: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[TMP0]], i32 3 10167// CHECK: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[TMP0]], i32 4 10168// CHECK: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[TMP0]], i32 5 10169// CHECK: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[TMP0]], i32 6 10170// CHECK: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[TMP0]], i32 7 10171// CHECK: ret <8 x half> [[VECINIT7]] 10172float16x8_t test_vmovq_n_f16(float16_t *a) { 10173 return vmovq_n_f16(*a); 10174} 10175 10176// CHECK-LABEL: define <4 x float> @test_vmovq_n_f32(float %a) #0 { 10177// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %a, i32 0 10178// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %a, i32 1 10179// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %a, i32 2 10180// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %a, i32 3 10181// CHECK: ret <4 x float> [[VECINIT3_I]] 10182float32x4_t test_vmovq_n_f32(float32_t a) { 10183 return vmovq_n_f32(a); 10184} 10185 10186// CHECK-LABEL: define <1 x i64> @test_vmov_n_s64(i64 %a) #0 { 10187// CHECK: [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0 10188// CHECK: [[ADD_I:%.*]] = add <1 x i64> [[VECINIT_I]], [[VECINIT_I]] 10189// CHECK: ret <1 x i64> [[ADD_I]] 10190int64x1_t test_vmov_n_s64(int64_t a) { 10191 int64x1_t tmp = vmov_n_s64(a); 10192 return vadd_s64(tmp, tmp); 10193} 10194 10195// CHECK-LABEL: define <1 x i64> @test_vmov_n_u64(i64 %a) #0 { 10196// CHECK: [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0 10197// CHECK: [[ADD_I:%.*]] = add <1 x i64> [[VECINIT_I]], [[VECINIT_I]] 10198// CHECK: ret <1 x i64> [[ADD_I]] 10199uint64x1_t test_vmov_n_u64(uint64_t a) { 10200 uint64x1_t tmp = vmov_n_u64(a); 10201 return vadd_u64(tmp, tmp); 10202} 10203 10204// CHECK-LABEL: define <2 x i64> @test_vmovq_n_s64(i64 %a) #0 { 10205// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0 10206// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1 10207// CHECK: ret <2 x i64> [[VECINIT1_I]] 10208int64x2_t test_vmovq_n_s64(int64_t a) { 10209 return vmovq_n_s64(a); 10210} 10211 10212// CHECK-LABEL: define <2 x i64> @test_vmovq_n_u64(i64 %a) #0 { 10213// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0 10214// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1 10215// CHECK: ret <2 x i64> [[VECINIT1_I]] 10216uint64x2_t test_vmovq_n_u64(uint64_t a) { 10217 return vmovq_n_u64(a); 10218} 10219 10220 10221// CHECK-LABEL: define <8 x i8> @test_vmul_s8(<8 x i8> %a, <8 x i8> %b) #0 { 10222// CHECK: [[MUL_I:%.*]] = mul <8 x i8> %a, %b 10223// CHECK: ret <8 x i8> [[MUL_I]] 10224int8x8_t test_vmul_s8(int8x8_t a, int8x8_t b) { 10225 return vmul_s8(a, b); 10226} 10227 10228// CHECK-LABEL: define <4 x i16> @test_vmul_s16(<4 x i16> %a, <4 x i16> %b) #0 { 10229// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %a, %b 10230// CHECK: ret <4 x i16> [[MUL_I]] 10231int16x4_t test_vmul_s16(int16x4_t a, int16x4_t b) { 10232 return vmul_s16(a, b); 10233} 10234 10235// CHECK-LABEL: define <2 x i32> @test_vmul_s32(<2 x i32> %a, <2 x i32> %b) #0 { 10236// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %a, %b 10237// CHECK: ret <2 x i32> [[MUL_I]] 10238int32x2_t test_vmul_s32(int32x2_t a, int32x2_t b) { 10239 return vmul_s32(a, b); 10240} 10241 10242// CHECK-LABEL: define <2 x float> @test_vmul_f32(<2 x float> %a, <2 x float> %b) #0 { 10243// CHECK: [[MUL_I:%.*]] = fmul <2 x float> %a, %b 10244// CHECK: ret <2 x float> [[MUL_I]] 10245float32x2_t test_vmul_f32(float32x2_t a, float32x2_t b) { 10246 return vmul_f32(a, b); 10247} 10248 10249// CHECK-LABEL: define <8 x i8> @test_vmul_u8(<8 x i8> %a, <8 x i8> %b) #0 { 10250// CHECK: [[MUL_I:%.*]] = mul <8 x i8> %a, %b 10251// CHECK: ret <8 x i8> [[MUL_I]] 10252uint8x8_t test_vmul_u8(uint8x8_t a, uint8x8_t b) { 10253 return vmul_u8(a, b); 10254} 10255 10256// CHECK-LABEL: define <4 x i16> @test_vmul_u16(<4 x i16> %a, <4 x i16> %b) #0 { 10257// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %a, %b 10258// CHECK: ret <4 x i16> [[MUL_I]] 10259uint16x4_t test_vmul_u16(uint16x4_t a, uint16x4_t b) { 10260 return vmul_u16(a, b); 10261} 10262 10263// CHECK-LABEL: define <2 x i32> @test_vmul_u32(<2 x i32> %a, <2 x i32> %b) #0 { 10264// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %a, %b 10265// CHECK: ret <2 x i32> [[MUL_I]] 10266uint32x2_t test_vmul_u32(uint32x2_t a, uint32x2_t b) { 10267 return vmul_u32(a, b); 10268} 10269 10270// CHECK-LABEL: define <16 x i8> @test_vmulq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 10271// CHECK: [[MUL_I:%.*]] = mul <16 x i8> %a, %b 10272// CHECK: ret <16 x i8> [[MUL_I]] 10273int8x16_t test_vmulq_s8(int8x16_t a, int8x16_t b) { 10274 return vmulq_s8(a, b); 10275} 10276 10277// CHECK-LABEL: define <8 x i16> @test_vmulq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 10278// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %a, %b 10279// CHECK: ret <8 x i16> [[MUL_I]] 10280int16x8_t test_vmulq_s16(int16x8_t a, int16x8_t b) { 10281 return vmulq_s16(a, b); 10282} 10283 10284// CHECK-LABEL: define <4 x i32> @test_vmulq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 10285// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %a, %b 10286// CHECK: ret <4 x i32> [[MUL_I]] 10287int32x4_t test_vmulq_s32(int32x4_t a, int32x4_t b) { 10288 return vmulq_s32(a, b); 10289} 10290 10291// CHECK-LABEL: define <4 x float> @test_vmulq_f32(<4 x float> %a, <4 x float> %b) #0 { 10292// CHECK: [[MUL_I:%.*]] = fmul <4 x float> %a, %b 10293// CHECK: ret <4 x float> [[MUL_I]] 10294float32x4_t test_vmulq_f32(float32x4_t a, float32x4_t b) { 10295 return vmulq_f32(a, b); 10296} 10297 10298// CHECK-LABEL: define <16 x i8> @test_vmulq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 10299// CHECK: [[MUL_I:%.*]] = mul <16 x i8> %a, %b 10300// CHECK: ret <16 x i8> [[MUL_I]] 10301uint8x16_t test_vmulq_u8(uint8x16_t a, uint8x16_t b) { 10302 return vmulq_u8(a, b); 10303} 10304 10305// CHECK-LABEL: define <8 x i16> @test_vmulq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 10306// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %a, %b 10307// CHECK: ret <8 x i16> [[MUL_I]] 10308uint16x8_t test_vmulq_u16(uint16x8_t a, uint16x8_t b) { 10309 return vmulq_u16(a, b); 10310} 10311 10312// CHECK-LABEL: define <4 x i32> @test_vmulq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 10313// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %a, %b 10314// CHECK: ret <4 x i32> [[MUL_I]] 10315uint32x4_t test_vmulq_u32(uint32x4_t a, uint32x4_t b) { 10316 return vmulq_u32(a, b); 10317} 10318 10319 10320// CHECK-LABEL: define <8 x i16> @test_vmull_s8(<8 x i8> %a, <8 x i8> %b) #0 { 10321// CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %a, <8 x i8> %b) #4 10322// CHECK: ret <8 x i16> [[VMULL_I]] 10323int16x8_t test_vmull_s8(int8x8_t a, int8x8_t b) { 10324 return vmull_s8(a, b); 10325} 10326 10327// CHECK-LABEL: define <4 x i32> @test_vmull_s16(<4 x i16> %a, <4 x i16> %b) #0 { 10328// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 10329// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 10330// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 10331// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 10332// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4 10333// CHECK: ret <4 x i32> [[VMULL2_I]] 10334int32x4_t test_vmull_s16(int16x4_t a, int16x4_t b) { 10335 return vmull_s16(a, b); 10336} 10337 10338// CHECK-LABEL: define <2 x i64> @test_vmull_s32(<2 x i32> %a, <2 x i32> %b) #0 { 10339// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 10340// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 10341// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 10342// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 10343// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4 10344// CHECK: ret <2 x i64> [[VMULL2_I]] 10345int64x2_t test_vmull_s32(int32x2_t a, int32x2_t b) { 10346 return vmull_s32(a, b); 10347} 10348 10349// CHECK-LABEL: define <8 x i16> @test_vmull_u8(<8 x i8> %a, <8 x i8> %b) #0 { 10350// CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %a, <8 x i8> %b) #4 10351// CHECK: ret <8 x i16> [[VMULL_I]] 10352uint16x8_t test_vmull_u8(uint8x8_t a, uint8x8_t b) { 10353 return vmull_u8(a, b); 10354} 10355 10356// CHECK-LABEL: define <4 x i32> @test_vmull_u16(<4 x i16> %a, <4 x i16> %b) #0 { 10357// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 10358// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 10359// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 10360// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 10361// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4 10362// CHECK: ret <4 x i32> [[VMULL2_I]] 10363uint32x4_t test_vmull_u16(uint16x4_t a, uint16x4_t b) { 10364 return vmull_u16(a, b); 10365} 10366 10367// CHECK-LABEL: define <2 x i64> @test_vmull_u32(<2 x i32> %a, <2 x i32> %b) #0 { 10368// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 10369// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 10370// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 10371// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 10372// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4 10373// CHECK: ret <2 x i64> [[VMULL2_I]] 10374uint64x2_t test_vmull_u32(uint32x2_t a, uint32x2_t b) { 10375 return vmull_u32(a, b); 10376} 10377 10378// CHECK-LABEL: define <8 x i16> @test_vmull_p8(<8 x i8> %a, <8 x i8> %b) #0 { 10379// CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8> %a, <8 x i8> %b) #4 10380// CHECK: ret <8 x i16> [[VMULL_I]] 10381poly16x8_t test_vmull_p8(poly8x8_t a, poly8x8_t b) { 10382 return vmull_p8(a, b); 10383} 10384 10385 10386// CHECK-LABEL: define <4 x i32> @test_vmull_lane_s16(<4 x i16> %a, <4 x i16> %b) #0 { 10387// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 10388// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 10389// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 10390// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 10391// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 10392// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4 10393// CHECK: ret <4 x i32> [[VMULL2_I]] 10394int32x4_t test_vmull_lane_s16(int16x4_t a, int16x4_t b) { 10395 return vmull_lane_s16(a, b, 3); 10396} 10397 10398// CHECK-LABEL: define <2 x i64> @test_vmull_lane_s32(<2 x i32> %a, <2 x i32> %b) #0 { 10399// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <2 x i32> <i32 1, i32 1> 10400// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 10401// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 10402// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 10403// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 10404// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4 10405// CHECK: ret <2 x i64> [[VMULL2_I]] 10406int64x2_t test_vmull_lane_s32(int32x2_t a, int32x2_t b) { 10407 return vmull_lane_s32(a, b, 1); 10408} 10409 10410// CHECK-LABEL: define <4 x i32> @test_vmull_lane_u16(<4 x i16> %a, <4 x i16> %b) #0 { 10411// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 10412// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 10413// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 10414// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 10415// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 10416// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4 10417// CHECK: ret <4 x i32> [[VMULL2_I]] 10418uint32x4_t test_vmull_lane_u16(uint16x4_t a, uint16x4_t b) { 10419 return vmull_lane_u16(a, b, 3); 10420} 10421 10422// CHECK-LABEL: define <2 x i64> @test_vmull_lane_u32(<2 x i32> %a, <2 x i32> %b) #0 { 10423// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <2 x i32> <i32 1, i32 1> 10424// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 10425// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 10426// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 10427// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 10428// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4 10429// CHECK: ret <2 x i64> [[VMULL2_I]] 10430uint64x2_t test_vmull_lane_u32(uint32x2_t a, uint32x2_t b) { 10431 return vmull_lane_u32(a, b, 1); 10432} 10433 10434 10435// CHECK-LABEL: define <4 x i32> @test_vmull_n_s16(<4 x i16> %a, i16 signext %b) #0 { 10436// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 10437// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0 10438// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1 10439// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 10440// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3 10441// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 10442// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 10443// CHECK: [[VMULL4_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 10444// CHECK: [[VMULL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL4_I]]) #4 10445// CHECK: ret <4 x i32> [[VMULL5_I]] 10446int32x4_t test_vmull_n_s16(int16x4_t a, int16_t b) { 10447 return vmull_n_s16(a, b); 10448} 10449 10450// CHECK-LABEL: define <2 x i64> @test_vmull_n_s32(<2 x i32> %a, i32 %b) #0 { 10451// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 10452// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 10453// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 10454// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 10455// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 10456// CHECK: [[VMULL2_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 10457// CHECK: [[VMULL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL2_I]]) #4 10458// CHECK: ret <2 x i64> [[VMULL3_I]] 10459int64x2_t test_vmull_n_s32(int32x2_t a, int32_t b) { 10460 return vmull_n_s32(a, b); 10461} 10462 10463// CHECK-LABEL: define <4 x i32> @test_vmull_n_u16(<4 x i16> %a, i16 zeroext %b) #0 { 10464// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 10465// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0 10466// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1 10467// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 10468// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3 10469// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 10470// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 10471// CHECK: [[VMULL4_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 10472// CHECK: [[VMULL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL4_I]]) #4 10473// CHECK: ret <4 x i32> [[VMULL5_I]] 10474uint32x4_t test_vmull_n_u16(uint16x4_t a, uint16_t b) { 10475 return vmull_n_u16(a, b); 10476} 10477 10478// CHECK-LABEL: define <2 x i64> @test_vmull_n_u32(<2 x i32> %a, i32 %b) #0 { 10479// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 10480// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 10481// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 10482// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 10483// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 10484// CHECK: [[VMULL2_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 10485// CHECK: [[VMULL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL2_I]]) #4 10486// CHECK: ret <2 x i64> [[VMULL3_I]] 10487uint64x2_t test_vmull_n_u32(uint32x2_t a, uint32_t b) { 10488 return vmull_n_u32(a, b); 10489} 10490 10491 10492// CHECK-LABEL: define <8 x i8> @test_vmul_p8(<8 x i8> %a, <8 x i8> %b) #0 { 10493// CHECK: [[VMUL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8> %a, <8 x i8> %b) #4 10494// CHECK: ret <8 x i8> [[VMUL_V_I]] 10495poly8x8_t test_vmul_p8(poly8x8_t a, poly8x8_t b) { 10496 return vmul_p8(a, b); 10497} 10498 10499// CHECK-LABEL: define <16 x i8> @test_vmulq_p8(<16 x i8> %a, <16 x i8> %b) #0 { 10500// CHECK: [[VMULQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8> %a, <16 x i8> %b) #4 10501// CHECK: ret <16 x i8> [[VMULQ_V_I]] 10502poly8x16_t test_vmulq_p8(poly8x16_t a, poly8x16_t b) { 10503 return vmulq_p8(a, b); 10504} 10505 10506 10507// CHECK-LABEL: define <4 x i16> @test_vmul_lane_s16(<4 x i16> %a, <4 x i16> %b) #0 { 10508// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 10509// CHECK: [[MUL:%.*]] = mul <4 x i16> %a, [[SHUFFLE]] 10510// CHECK: ret <4 x i16> [[MUL]] 10511int16x4_t test_vmul_lane_s16(int16x4_t a, int16x4_t b) { 10512 return vmul_lane_s16(a, b, 3); 10513} 10514 10515// CHECK-LABEL: define <2 x i32> @test_vmul_lane_s32(<2 x i32> %a, <2 x i32> %b) #0 { 10516// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <2 x i32> <i32 1, i32 1> 10517// CHECK: [[MUL:%.*]] = mul <2 x i32> %a, [[SHUFFLE]] 10518// CHECK: ret <2 x i32> [[MUL]] 10519int32x2_t test_vmul_lane_s32(int32x2_t a, int32x2_t b) { 10520 return vmul_lane_s32(a, b, 1); 10521} 10522 10523// CHECK-LABEL: define <2 x float> @test_vmul_lane_f32(<2 x float> %a, <2 x float> %b) #0 { 10524// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %b, <2 x float> %b, <2 x i32> <i32 1, i32 1> 10525// CHECK: [[MUL:%.*]] = fmul <2 x float> %a, [[SHUFFLE]] 10526// CHECK: ret <2 x float> [[MUL]] 10527float32x2_t test_vmul_lane_f32(float32x2_t a, float32x2_t b) { 10528 return vmul_lane_f32(a, b, 1); 10529} 10530 10531// CHECK-LABEL: define <4 x i16> @test_vmul_lane_u16(<4 x i16> %a, <4 x i16> %b) #0 { 10532// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 10533// CHECK: [[MUL:%.*]] = mul <4 x i16> %a, [[SHUFFLE]] 10534// CHECK: ret <4 x i16> [[MUL]] 10535uint16x4_t test_vmul_lane_u16(uint16x4_t a, uint16x4_t b) { 10536 return vmul_lane_u16(a, b, 3); 10537} 10538 10539// CHECK-LABEL: define <2 x i32> @test_vmul_lane_u32(<2 x i32> %a, <2 x i32> %b) #0 { 10540// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <2 x i32> <i32 1, i32 1> 10541// CHECK: [[MUL:%.*]] = mul <2 x i32> %a, [[SHUFFLE]] 10542// CHECK: ret <2 x i32> [[MUL]] 10543uint32x2_t test_vmul_lane_u32(uint32x2_t a, uint32x2_t b) { 10544 return vmul_lane_u32(a, b, 1); 10545} 10546 10547// CHECK-LABEL: define <8 x i16> @test_vmulq_lane_s16(<8 x i16> %a, <4 x i16> %b) #0 { 10548// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 10549// CHECK: [[MUL:%.*]] = mul <8 x i16> %a, [[SHUFFLE]] 10550// CHECK: ret <8 x i16> [[MUL]] 10551int16x8_t test_vmulq_lane_s16(int16x8_t a, int16x4_t b) { 10552 return vmulq_lane_s16(a, b, 3); 10553} 10554 10555// CHECK-LABEL: define <4 x i32> @test_vmulq_lane_s32(<4 x i32> %a, <2 x i32> %b) #0 { 10556// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 10557// CHECK: [[MUL:%.*]] = mul <4 x i32> %a, [[SHUFFLE]] 10558// CHECK: ret <4 x i32> [[MUL]] 10559int32x4_t test_vmulq_lane_s32(int32x4_t a, int32x2_t b) { 10560 return vmulq_lane_s32(a, b, 1); 10561} 10562 10563// CHECK-LABEL: define <4 x float> @test_vmulq_lane_f32(<4 x float> %a, <2 x float> %b) #0 { 10564// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %b, <2 x float> %b, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 10565// CHECK: [[MUL:%.*]] = fmul <4 x float> %a, [[SHUFFLE]] 10566// CHECK: ret <4 x float> [[MUL]] 10567float32x4_t test_vmulq_lane_f32(float32x4_t a, float32x2_t b) { 10568 return vmulq_lane_f32(a, b, 1); 10569} 10570 10571// CHECK-LABEL: define <8 x i16> @test_vmulq_lane_u16(<8 x i16> %a, <4 x i16> %b) #0 { 10572// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 10573// CHECK: [[MUL:%.*]] = mul <8 x i16> %a, [[SHUFFLE]] 10574// CHECK: ret <8 x i16> [[MUL]] 10575uint16x8_t test_vmulq_lane_u16(uint16x8_t a, uint16x4_t b) { 10576 return vmulq_lane_u16(a, b, 3); 10577} 10578 10579// CHECK-LABEL: define <4 x i32> @test_vmulq_lane_u32(<4 x i32> %a, <2 x i32> %b) #0 { 10580// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 10581// CHECK: [[MUL:%.*]] = mul <4 x i32> %a, [[SHUFFLE]] 10582// CHECK: ret <4 x i32> [[MUL]] 10583uint32x4_t test_vmulq_lane_u32(uint32x4_t a, uint32x2_t b) { 10584 return vmulq_lane_u32(a, b, 1); 10585} 10586 10587 10588// CHECK-LABEL: define <4 x i16> @test_vmul_n_s16(<4 x i16> %a, i16 signext %b) #0 { 10589// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0 10590// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1 10591// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 10592// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3 10593// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %a, [[VECINIT3_I]] 10594// CHECK: ret <4 x i16> [[MUL_I]] 10595int16x4_t test_vmul_n_s16(int16x4_t a, int16_t b) { 10596 return vmul_n_s16(a, b); 10597} 10598 10599// CHECK-LABEL: define <2 x i32> @test_vmul_n_s32(<2 x i32> %a, i32 %b) #0 { 10600// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 10601// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 10602// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %a, [[VECINIT1_I]] 10603// CHECK: ret <2 x i32> [[MUL_I]] 10604int32x2_t test_vmul_n_s32(int32x2_t a, int32_t b) { 10605 return vmul_n_s32(a, b); 10606} 10607 10608// CHECK-LABEL: define <2 x float> @test_vmul_n_f32(<2 x float> %a, float %b) #0 { 10609// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %b, i32 0 10610// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %b, i32 1 10611// CHECK: [[MUL_I:%.*]] = fmul <2 x float> %a, [[VECINIT1_I]] 10612// CHECK: ret <2 x float> [[MUL_I]] 10613float32x2_t test_vmul_n_f32(float32x2_t a, float32_t b) { 10614 return vmul_n_f32(a, b); 10615} 10616 10617// CHECK-LABEL: define <4 x i16> @test_vmul_n_u16(<4 x i16> %a, i16 zeroext %b) #0 { 10618// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0 10619// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1 10620// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 10621// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3 10622// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %a, [[VECINIT3_I]] 10623// CHECK: ret <4 x i16> [[MUL_I]] 10624uint16x4_t test_vmul_n_u16(uint16x4_t a, uint16_t b) { 10625 return vmul_n_u16(a, b); 10626} 10627 10628// CHECK-LABEL: define <2 x i32> @test_vmul_n_u32(<2 x i32> %a, i32 %b) #0 { 10629// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 10630// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 10631// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %a, [[VECINIT1_I]] 10632// CHECK: ret <2 x i32> [[MUL_I]] 10633uint32x2_t test_vmul_n_u32(uint32x2_t a, uint32_t b) { 10634 return vmul_n_u32(a, b); 10635} 10636 10637// CHECK-LABEL: define <8 x i16> @test_vmulq_n_s16(<8 x i16> %a, i16 signext %b) #0 { 10638// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0 10639// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1 10640// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2 10641// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3 10642// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4 10643// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5 10644// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6 10645// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7 10646// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %a, [[VECINIT7_I]] 10647// CHECK: ret <8 x i16> [[MUL_I]] 10648int16x8_t test_vmulq_n_s16(int16x8_t a, int16_t b) { 10649 return vmulq_n_s16(a, b); 10650} 10651 10652// CHECK-LABEL: define <4 x i32> @test_vmulq_n_s32(<4 x i32> %a, i32 %b) #0 { 10653// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0 10654// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1 10655// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2 10656// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3 10657// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %a, [[VECINIT3_I]] 10658// CHECK: ret <4 x i32> [[MUL_I]] 10659int32x4_t test_vmulq_n_s32(int32x4_t a, int32_t b) { 10660 return vmulq_n_s32(a, b); 10661} 10662 10663// CHECK-LABEL: define <4 x float> @test_vmulq_n_f32(<4 x float> %a, float %b) #0 { 10664// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %b, i32 0 10665// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %b, i32 1 10666// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %b, i32 2 10667// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %b, i32 3 10668// CHECK: [[MUL_I:%.*]] = fmul <4 x float> %a, [[VECINIT3_I]] 10669// CHECK: ret <4 x float> [[MUL_I]] 10670float32x4_t test_vmulq_n_f32(float32x4_t a, float32_t b) { 10671 return vmulq_n_f32(a, b); 10672} 10673 10674// CHECK-LABEL: define <8 x i16> @test_vmulq_n_u16(<8 x i16> %a, i16 zeroext %b) #0 { 10675// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0 10676// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1 10677// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2 10678// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3 10679// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4 10680// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5 10681// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6 10682// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7 10683// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %a, [[VECINIT7_I]] 10684// CHECK: ret <8 x i16> [[MUL_I]] 10685uint16x8_t test_vmulq_n_u16(uint16x8_t a, uint16_t b) { 10686 return vmulq_n_u16(a, b); 10687} 10688 10689// CHECK-LABEL: define <4 x i32> @test_vmulq_n_u32(<4 x i32> %a, i32 %b) #0 { 10690// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0 10691// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1 10692// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2 10693// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3 10694// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %a, [[VECINIT3_I]] 10695// CHECK: ret <4 x i32> [[MUL_I]] 10696uint32x4_t test_vmulq_n_u32(uint32x4_t a, uint32_t b) { 10697 return vmulq_n_u32(a, b); 10698} 10699 10700 10701// CHECK-LABEL: define <8 x i8> @test_vmvn_s8(<8 x i8> %a) #0 { 10702// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 10703// CHECK: ret <8 x i8> [[NEG_I]] 10704int8x8_t test_vmvn_s8(int8x8_t a) { 10705 return vmvn_s8(a); 10706} 10707 10708// CHECK-LABEL: define <4 x i16> @test_vmvn_s16(<4 x i16> %a) #0 { 10709// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1> 10710// CHECK: ret <4 x i16> [[NEG_I]] 10711int16x4_t test_vmvn_s16(int16x4_t a) { 10712 return vmvn_s16(a); 10713} 10714 10715// CHECK-LABEL: define <2 x i32> @test_vmvn_s32(<2 x i32> %a) #0 { 10716// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %a, <i32 -1, i32 -1> 10717// CHECK: ret <2 x i32> [[NEG_I]] 10718int32x2_t test_vmvn_s32(int32x2_t a) { 10719 return vmvn_s32(a); 10720} 10721 10722// CHECK-LABEL: define <8 x i8> @test_vmvn_u8(<8 x i8> %a) #0 { 10723// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 10724// CHECK: ret <8 x i8> [[NEG_I]] 10725uint8x8_t test_vmvn_u8(uint8x8_t a) { 10726 return vmvn_u8(a); 10727} 10728 10729// CHECK-LABEL: define <4 x i16> @test_vmvn_u16(<4 x i16> %a) #0 { 10730// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1> 10731// CHECK: ret <4 x i16> [[NEG_I]] 10732uint16x4_t test_vmvn_u16(uint16x4_t a) { 10733 return vmvn_u16(a); 10734} 10735 10736// CHECK-LABEL: define <2 x i32> @test_vmvn_u32(<2 x i32> %a) #0 { 10737// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %a, <i32 -1, i32 -1> 10738// CHECK: ret <2 x i32> [[NEG_I]] 10739uint32x2_t test_vmvn_u32(uint32x2_t a) { 10740 return vmvn_u32(a); 10741} 10742 10743// CHECK-LABEL: define <8 x i8> @test_vmvn_p8(<8 x i8> %a) #0 { 10744// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 10745// CHECK: ret <8 x i8> [[NEG_I]] 10746poly8x8_t test_vmvn_p8(poly8x8_t a) { 10747 return vmvn_p8(a); 10748} 10749 10750// CHECK-LABEL: define <16 x i8> @test_vmvnq_s8(<16 x i8> %a) #0 { 10751// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 10752// CHECK: ret <16 x i8> [[NEG_I]] 10753int8x16_t test_vmvnq_s8(int8x16_t a) { 10754 return vmvnq_s8(a); 10755} 10756 10757// CHECK-LABEL: define <8 x i16> @test_vmvnq_s16(<8 x i16> %a) #0 { 10758// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 10759// CHECK: ret <8 x i16> [[NEG_I]] 10760int16x8_t test_vmvnq_s16(int16x8_t a) { 10761 return vmvnq_s16(a); 10762} 10763 10764// CHECK-LABEL: define <4 x i32> @test_vmvnq_s32(<4 x i32> %a) #0 { 10765// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1> 10766// CHECK: ret <4 x i32> [[NEG_I]] 10767int32x4_t test_vmvnq_s32(int32x4_t a) { 10768 return vmvnq_s32(a); 10769} 10770 10771// CHECK-LABEL: define <16 x i8> @test_vmvnq_u8(<16 x i8> %a) #0 { 10772// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 10773// CHECK: ret <16 x i8> [[NEG_I]] 10774uint8x16_t test_vmvnq_u8(uint8x16_t a) { 10775 return vmvnq_u8(a); 10776} 10777 10778// CHECK-LABEL: define <8 x i16> @test_vmvnq_u16(<8 x i16> %a) #0 { 10779// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 10780// CHECK: ret <8 x i16> [[NEG_I]] 10781uint16x8_t test_vmvnq_u16(uint16x8_t a) { 10782 return vmvnq_u16(a); 10783} 10784 10785// CHECK-LABEL: define <4 x i32> @test_vmvnq_u32(<4 x i32> %a) #0 { 10786// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1> 10787// CHECK: ret <4 x i32> [[NEG_I]] 10788uint32x4_t test_vmvnq_u32(uint32x4_t a) { 10789 return vmvnq_u32(a); 10790} 10791 10792// CHECK-LABEL: define <16 x i8> @test_vmvnq_p8(<16 x i8> %a) #0 { 10793// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 10794// CHECK: ret <16 x i8> [[NEG_I]] 10795poly8x16_t test_vmvnq_p8(poly8x16_t a) { 10796 return vmvnq_p8(a); 10797} 10798 10799 10800// CHECK-LABEL: define <8 x i8> @test_vneg_s8(<8 x i8> %a) #0 { 10801// CHECK: [[SUB_I:%.*]] = sub <8 x i8> zeroinitializer, %a 10802// CHECK: ret <8 x i8> [[SUB_I]] 10803int8x8_t test_vneg_s8(int8x8_t a) { 10804 return vneg_s8(a); 10805} 10806 10807// CHECK-LABEL: define <4 x i16> @test_vneg_s16(<4 x i16> %a) #0 { 10808// CHECK: [[SUB_I:%.*]] = sub <4 x i16> zeroinitializer, %a 10809// CHECK: ret <4 x i16> [[SUB_I]] 10810int16x4_t test_vneg_s16(int16x4_t a) { 10811 return vneg_s16(a); 10812} 10813 10814// CHECK-LABEL: define <2 x i32> @test_vneg_s32(<2 x i32> %a) #0 { 10815// CHECK: [[SUB_I:%.*]] = sub <2 x i32> zeroinitializer, %a 10816// CHECK: ret <2 x i32> [[SUB_I]] 10817int32x2_t test_vneg_s32(int32x2_t a) { 10818 return vneg_s32(a); 10819} 10820 10821// CHECK-LABEL: define <2 x float> @test_vneg_f32(<2 x float> %a) #0 { 10822// CHECK: [[SUB_I:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %a 10823// CHECK: ret <2 x float> [[SUB_I]] 10824float32x2_t test_vneg_f32(float32x2_t a) { 10825 return vneg_f32(a); 10826} 10827 10828// CHECK-LABEL: define <16 x i8> @test_vnegq_s8(<16 x i8> %a) #0 { 10829// CHECK: [[SUB_I:%.*]] = sub <16 x i8> zeroinitializer, %a 10830// CHECK: ret <16 x i8> [[SUB_I]] 10831int8x16_t test_vnegq_s8(int8x16_t a) { 10832 return vnegq_s8(a); 10833} 10834 10835// CHECK-LABEL: define <8 x i16> @test_vnegq_s16(<8 x i16> %a) #0 { 10836// CHECK: [[SUB_I:%.*]] = sub <8 x i16> zeroinitializer, %a 10837// CHECK: ret <8 x i16> [[SUB_I]] 10838int16x8_t test_vnegq_s16(int16x8_t a) { 10839 return vnegq_s16(a); 10840} 10841 10842// CHECK-LABEL: define <4 x i32> @test_vnegq_s32(<4 x i32> %a) #0 { 10843// CHECK: [[SUB_I:%.*]] = sub <4 x i32> zeroinitializer, %a 10844// CHECK: ret <4 x i32> [[SUB_I]] 10845int32x4_t test_vnegq_s32(int32x4_t a) { 10846 return vnegq_s32(a); 10847} 10848 10849// CHECK-LABEL: define <4 x float> @test_vnegq_f32(<4 x float> %a) #0 { 10850// CHECK: [[SUB_I:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a 10851// CHECK: ret <4 x float> [[SUB_I]] 10852float32x4_t test_vnegq_f32(float32x4_t a) { 10853 return vnegq_f32(a); 10854} 10855 10856 10857// CHECK-LABEL: define <8 x i8> @test_vorn_s8(<8 x i8> %a, <8 x i8> %b) #0 { 10858// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 10859// CHECK: [[OR_I:%.*]] = or <8 x i8> %a, [[NEG_I]] 10860// CHECK: ret <8 x i8> [[OR_I]] 10861int8x8_t test_vorn_s8(int8x8_t a, int8x8_t b) { 10862 return vorn_s8(a, b); 10863} 10864 10865// CHECK-LABEL: define <4 x i16> @test_vorn_s16(<4 x i16> %a, <4 x i16> %b) #0 { 10866// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1> 10867// CHECK: [[OR_I:%.*]] = or <4 x i16> %a, [[NEG_I]] 10868// CHECK: ret <4 x i16> [[OR_I]] 10869int16x4_t test_vorn_s16(int16x4_t a, int16x4_t b) { 10870 return vorn_s16(a, b); 10871} 10872 10873// CHECK-LABEL: define <2 x i32> @test_vorn_s32(<2 x i32> %a, <2 x i32> %b) #0 { 10874// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, <i32 -1, i32 -1> 10875// CHECK: [[OR_I:%.*]] = or <2 x i32> %a, [[NEG_I]] 10876// CHECK: ret <2 x i32> [[OR_I]] 10877int32x2_t test_vorn_s32(int32x2_t a, int32x2_t b) { 10878 return vorn_s32(a, b); 10879} 10880 10881// CHECK-LABEL: define <1 x i64> @test_vorn_s64(<1 x i64> %a, <1 x i64> %b) #0 { 10882// CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, <i64 -1> 10883// CHECK: [[OR_I:%.*]] = or <1 x i64> %a, [[NEG_I]] 10884// CHECK: ret <1 x i64> [[OR_I]] 10885int64x1_t test_vorn_s64(int64x1_t a, int64x1_t b) { 10886 return vorn_s64(a, b); 10887} 10888 10889// CHECK-LABEL: define <8 x i8> @test_vorn_u8(<8 x i8> %a, <8 x i8> %b) #0 { 10890// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 10891// CHECK: [[OR_I:%.*]] = or <8 x i8> %a, [[NEG_I]] 10892// CHECK: ret <8 x i8> [[OR_I]] 10893uint8x8_t test_vorn_u8(uint8x8_t a, uint8x8_t b) { 10894 return vorn_u8(a, b); 10895} 10896 10897// CHECK-LABEL: define <4 x i16> @test_vorn_u16(<4 x i16> %a, <4 x i16> %b) #0 { 10898// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1> 10899// CHECK: [[OR_I:%.*]] = or <4 x i16> %a, [[NEG_I]] 10900// CHECK: ret <4 x i16> [[OR_I]] 10901uint16x4_t test_vorn_u16(uint16x4_t a, uint16x4_t b) { 10902 return vorn_u16(a, b); 10903} 10904 10905// CHECK-LABEL: define <2 x i32> @test_vorn_u32(<2 x i32> %a, <2 x i32> %b) #0 { 10906// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, <i32 -1, i32 -1> 10907// CHECK: [[OR_I:%.*]] = or <2 x i32> %a, [[NEG_I]] 10908// CHECK: ret <2 x i32> [[OR_I]] 10909uint32x2_t test_vorn_u32(uint32x2_t a, uint32x2_t b) { 10910 return vorn_u32(a, b); 10911} 10912 10913// CHECK-LABEL: define <1 x i64> @test_vorn_u64(<1 x i64> %a, <1 x i64> %b) #0 { 10914// CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, <i64 -1> 10915// CHECK: [[OR_I:%.*]] = or <1 x i64> %a, [[NEG_I]] 10916// CHECK: ret <1 x i64> [[OR_I]] 10917uint64x1_t test_vorn_u64(uint64x1_t a, uint64x1_t b) { 10918 return vorn_u64(a, b); 10919} 10920 10921// CHECK-LABEL: define <16 x i8> @test_vornq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 10922// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 10923// CHECK: [[OR_I:%.*]] = or <16 x i8> %a, [[NEG_I]] 10924// CHECK: ret <16 x i8> [[OR_I]] 10925int8x16_t test_vornq_s8(int8x16_t a, int8x16_t b) { 10926 return vornq_s8(a, b); 10927} 10928 10929// CHECK-LABEL: define <8 x i16> @test_vornq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 10930// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 10931// CHECK: [[OR_I:%.*]] = or <8 x i16> %a, [[NEG_I]] 10932// CHECK: ret <8 x i16> [[OR_I]] 10933int16x8_t test_vornq_s16(int16x8_t a, int16x8_t b) { 10934 return vornq_s16(a, b); 10935} 10936 10937// CHECK-LABEL: define <4 x i32> @test_vornq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 10938// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1> 10939// CHECK: [[OR_I:%.*]] = or <4 x i32> %a, [[NEG_I]] 10940// CHECK: ret <4 x i32> [[OR_I]] 10941int32x4_t test_vornq_s32(int32x4_t a, int32x4_t b) { 10942 return vornq_s32(a, b); 10943} 10944 10945// CHECK-LABEL: define <2 x i64> @test_vornq_s64(<2 x i64> %a, <2 x i64> %b) #0 { 10946// CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1> 10947// CHECK: [[OR_I:%.*]] = or <2 x i64> %a, [[NEG_I]] 10948// CHECK: ret <2 x i64> [[OR_I]] 10949int64x2_t test_vornq_s64(int64x2_t a, int64x2_t b) { 10950 return vornq_s64(a, b); 10951} 10952 10953// CHECK-LABEL: define <16 x i8> @test_vornq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 10954// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 10955// CHECK: [[OR_I:%.*]] = or <16 x i8> %a, [[NEG_I]] 10956// CHECK: ret <16 x i8> [[OR_I]] 10957uint8x16_t test_vornq_u8(uint8x16_t a, uint8x16_t b) { 10958 return vornq_u8(a, b); 10959} 10960 10961// CHECK-LABEL: define <8 x i16> @test_vornq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 10962// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 10963// CHECK: [[OR_I:%.*]] = or <8 x i16> %a, [[NEG_I]] 10964// CHECK: ret <8 x i16> [[OR_I]] 10965uint16x8_t test_vornq_u16(uint16x8_t a, uint16x8_t b) { 10966 return vornq_u16(a, b); 10967} 10968 10969// CHECK-LABEL: define <4 x i32> @test_vornq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 10970// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1> 10971// CHECK: [[OR_I:%.*]] = or <4 x i32> %a, [[NEG_I]] 10972// CHECK: ret <4 x i32> [[OR_I]] 10973uint32x4_t test_vornq_u32(uint32x4_t a, uint32x4_t b) { 10974 return vornq_u32(a, b); 10975} 10976 10977// CHECK-LABEL: define <2 x i64> @test_vornq_u64(<2 x i64> %a, <2 x i64> %b) #0 { 10978// CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1> 10979// CHECK: [[OR_I:%.*]] = or <2 x i64> %a, [[NEG_I]] 10980// CHECK: ret <2 x i64> [[OR_I]] 10981uint64x2_t test_vornq_u64(uint64x2_t a, uint64x2_t b) { 10982 return vornq_u64(a, b); 10983} 10984 10985 10986// CHECK-LABEL: define <8 x i8> @test_vorr_s8(<8 x i8> %a, <8 x i8> %b) #0 { 10987// CHECK: [[OR_I:%.*]] = or <8 x i8> %a, %b 10988// CHECK: ret <8 x i8> [[OR_I]] 10989int8x8_t test_vorr_s8(int8x8_t a, int8x8_t b) { 10990 return vorr_s8(a, b); 10991} 10992 10993// CHECK-LABEL: define <4 x i16> @test_vorr_s16(<4 x i16> %a, <4 x i16> %b) #0 { 10994// CHECK: [[OR_I:%.*]] = or <4 x i16> %a, %b 10995// CHECK: ret <4 x i16> [[OR_I]] 10996int16x4_t test_vorr_s16(int16x4_t a, int16x4_t b) { 10997 return vorr_s16(a, b); 10998} 10999 11000// CHECK-LABEL: define <2 x i32> @test_vorr_s32(<2 x i32> %a, <2 x i32> %b) #0 { 11001// CHECK: [[OR_I:%.*]] = or <2 x i32> %a, %b 11002// CHECK: ret <2 x i32> [[OR_I]] 11003int32x2_t test_vorr_s32(int32x2_t a, int32x2_t b) { 11004 return vorr_s32(a, b); 11005} 11006 11007// CHECK-LABEL: define <1 x i64> @test_vorr_s64(<1 x i64> %a, <1 x i64> %b) #0 { 11008// CHECK: [[OR_I:%.*]] = or <1 x i64> %a, %b 11009// CHECK: ret <1 x i64> [[OR_I]] 11010int64x1_t test_vorr_s64(int64x1_t a, int64x1_t b) { 11011 return vorr_s64(a, b); 11012} 11013 11014// CHECK-LABEL: define <8 x i8> @test_vorr_u8(<8 x i8> %a, <8 x i8> %b) #0 { 11015// CHECK: [[OR_I:%.*]] = or <8 x i8> %a, %b 11016// CHECK: ret <8 x i8> [[OR_I]] 11017uint8x8_t test_vorr_u8(uint8x8_t a, uint8x8_t b) { 11018 return vorr_u8(a, b); 11019} 11020 11021// CHECK-LABEL: define <4 x i16> @test_vorr_u16(<4 x i16> %a, <4 x i16> %b) #0 { 11022// CHECK: [[OR_I:%.*]] = or <4 x i16> %a, %b 11023// CHECK: ret <4 x i16> [[OR_I]] 11024uint16x4_t test_vorr_u16(uint16x4_t a, uint16x4_t b) { 11025 return vorr_u16(a, b); 11026} 11027 11028// CHECK-LABEL: define <2 x i32> @test_vorr_u32(<2 x i32> %a, <2 x i32> %b) #0 { 11029// CHECK: [[OR_I:%.*]] = or <2 x i32> %a, %b 11030// CHECK: ret <2 x i32> [[OR_I]] 11031uint32x2_t test_vorr_u32(uint32x2_t a, uint32x2_t b) { 11032 return vorr_u32(a, b); 11033} 11034 11035// CHECK-LABEL: define <1 x i64> @test_vorr_u64(<1 x i64> %a, <1 x i64> %b) #0 { 11036// CHECK: [[OR_I:%.*]] = or <1 x i64> %a, %b 11037// CHECK: ret <1 x i64> [[OR_I]] 11038uint64x1_t test_vorr_u64(uint64x1_t a, uint64x1_t b) { 11039 return vorr_u64(a, b); 11040} 11041 11042// CHECK-LABEL: define <16 x i8> @test_vorrq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 11043// CHECK: [[OR_I:%.*]] = or <16 x i8> %a, %b 11044// CHECK: ret <16 x i8> [[OR_I]] 11045int8x16_t test_vorrq_s8(int8x16_t a, int8x16_t b) { 11046 return vorrq_s8(a, b); 11047} 11048 11049// CHECK-LABEL: define <8 x i16> @test_vorrq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 11050// CHECK: [[OR_I:%.*]] = or <8 x i16> %a, %b 11051// CHECK: ret <8 x i16> [[OR_I]] 11052int16x8_t test_vorrq_s16(int16x8_t a, int16x8_t b) { 11053 return vorrq_s16(a, b); 11054} 11055 11056// CHECK-LABEL: define <4 x i32> @test_vorrq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 11057// CHECK: [[OR_I:%.*]] = or <4 x i32> %a, %b 11058// CHECK: ret <4 x i32> [[OR_I]] 11059int32x4_t test_vorrq_s32(int32x4_t a, int32x4_t b) { 11060 return vorrq_s32(a, b); 11061} 11062 11063// CHECK-LABEL: define <2 x i64> @test_vorrq_s64(<2 x i64> %a, <2 x i64> %b) #0 { 11064// CHECK: [[OR_I:%.*]] = or <2 x i64> %a, %b 11065// CHECK: ret <2 x i64> [[OR_I]] 11066int64x2_t test_vorrq_s64(int64x2_t a, int64x2_t b) { 11067 return vorrq_s64(a, b); 11068} 11069 11070// CHECK-LABEL: define <16 x i8> @test_vorrq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 11071// CHECK: [[OR_I:%.*]] = or <16 x i8> %a, %b 11072// CHECK: ret <16 x i8> [[OR_I]] 11073uint8x16_t test_vorrq_u8(uint8x16_t a, uint8x16_t b) { 11074 return vorrq_u8(a, b); 11075} 11076 11077// CHECK-LABEL: define <8 x i16> @test_vorrq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 11078// CHECK: [[OR_I:%.*]] = or <8 x i16> %a, %b 11079// CHECK: ret <8 x i16> [[OR_I]] 11080uint16x8_t test_vorrq_u16(uint16x8_t a, uint16x8_t b) { 11081 return vorrq_u16(a, b); 11082} 11083 11084// CHECK-LABEL: define <4 x i32> @test_vorrq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 11085// CHECK: [[OR_I:%.*]] = or <4 x i32> %a, %b 11086// CHECK: ret <4 x i32> [[OR_I]] 11087uint32x4_t test_vorrq_u32(uint32x4_t a, uint32x4_t b) { 11088 return vorrq_u32(a, b); 11089} 11090 11091// CHECK-LABEL: define <2 x i64> @test_vorrq_u64(<2 x i64> %a, <2 x i64> %b) #0 { 11092// CHECK: [[OR_I:%.*]] = or <2 x i64> %a, %b 11093// CHECK: ret <2 x i64> [[OR_I]] 11094uint64x2_t test_vorrq_u64(uint64x2_t a, uint64x2_t b) { 11095 return vorrq_u64(a, b); 11096} 11097 11098 11099// CHECK-LABEL: define <4 x i16> @test_vpadal_s8(<4 x i16> %a, <8 x i8> %b) #0 { 11100// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 11101// CHECK: [[VPADAL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 11102// CHECK: [[VPADAL_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadals.v4i16.v8i8(<4 x i16> [[VPADAL_V_I]], <8 x i8> %b) #4 11103// CHECK: ret <4 x i16> [[VPADAL_V1_I]] 11104int16x4_t test_vpadal_s8(int16x4_t a, int8x8_t b) { 11105 return vpadal_s8(a, b); 11106} 11107 11108// CHECK-LABEL: define <2 x i32> @test_vpadal_s16(<2 x i32> %a, <4 x i16> %b) #0 { 11109// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 11110// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 11111// CHECK: [[VPADAL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 11112// CHECK: [[VPADAL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 11113// CHECK: [[VPADAL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadals.v2i32.v4i16(<2 x i32> [[VPADAL_V_I]], <4 x i16> [[VPADAL_V1_I]]) #4 11114// CHECK: ret <2 x i32> [[VPADAL_V2_I]] 11115int32x2_t test_vpadal_s16(int32x2_t a, int16x4_t b) { 11116 return vpadal_s16(a, b); 11117} 11118 11119// CHECK-LABEL: define <1 x i64> @test_vpadal_s32(<1 x i64> %a, <2 x i32> %b) #0 { 11120// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 11121// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 11122// CHECK: [[VPADAL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 11123// CHECK: [[VPADAL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 11124// CHECK: [[VPADAL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpadals.v1i64.v2i32(<1 x i64> [[VPADAL_V_I]], <2 x i32> [[VPADAL_V1_I]]) #4 11125// CHECK: ret <1 x i64> [[VPADAL_V2_I]] 11126int64x1_t test_vpadal_s32(int64x1_t a, int32x2_t b) { 11127 return vpadal_s32(a, b); 11128} 11129 11130// CHECK-LABEL: define <4 x i16> @test_vpadal_u8(<4 x i16> %a, <8 x i8> %b) #0 { 11131// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 11132// CHECK: [[VPADAL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 11133// CHECK: [[VPADAL_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadalu.v4i16.v8i8(<4 x i16> [[VPADAL_V_I]], <8 x i8> %b) #4 11134// CHECK: ret <4 x i16> [[VPADAL_V1_I]] 11135uint16x4_t test_vpadal_u8(uint16x4_t a, uint8x8_t b) { 11136 return vpadal_u8(a, b); 11137} 11138 11139// CHECK-LABEL: define <2 x i32> @test_vpadal_u16(<2 x i32> %a, <4 x i16> %b) #0 { 11140// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 11141// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 11142// CHECK: [[VPADAL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 11143// CHECK: [[VPADAL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 11144// CHECK: [[VPADAL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadalu.v2i32.v4i16(<2 x i32> [[VPADAL_V_I]], <4 x i16> [[VPADAL_V1_I]]) #4 11145// CHECK: ret <2 x i32> [[VPADAL_V2_I]] 11146uint32x2_t test_vpadal_u16(uint32x2_t a, uint16x4_t b) { 11147 return vpadal_u16(a, b); 11148} 11149 11150// CHECK-LABEL: define <1 x i64> @test_vpadal_u32(<1 x i64> %a, <2 x i32> %b) #0 { 11151// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 11152// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 11153// CHECK: [[VPADAL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 11154// CHECK: [[VPADAL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 11155// CHECK: [[VPADAL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpadalu.v1i64.v2i32(<1 x i64> [[VPADAL_V_I]], <2 x i32> [[VPADAL_V1_I]]) #4 11156// CHECK: ret <1 x i64> [[VPADAL_V2_I]] 11157uint64x1_t test_vpadal_u32(uint64x1_t a, uint32x2_t b) { 11158 return vpadal_u32(a, b); 11159} 11160 11161// CHECK-LABEL: define <8 x i16> @test_vpadalq_s8(<8 x i16> %a, <16 x i8> %b) #0 { 11162// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 11163// CHECK: [[VPADALQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 11164// CHECK: [[VPADALQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpadals.v8i16.v16i8(<8 x i16> [[VPADALQ_V_I]], <16 x i8> %b) #4 11165// CHECK: ret <8 x i16> [[VPADALQ_V1_I]] 11166int16x8_t test_vpadalq_s8(int16x8_t a, int8x16_t b) { 11167 return vpadalq_s8(a, b); 11168} 11169 11170// CHECK-LABEL: define <4 x i32> @test_vpadalq_s16(<4 x i32> %a, <8 x i16> %b) #0 { 11171// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 11172// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 11173// CHECK: [[VPADALQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 11174// CHECK: [[VPADALQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 11175// CHECK: [[VPADALQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpadals.v4i32.v8i16(<4 x i32> [[VPADALQ_V_I]], <8 x i16> [[VPADALQ_V1_I]]) #4 11176// CHECK: ret <4 x i32> [[VPADALQ_V2_I]] 11177int32x4_t test_vpadalq_s16(int32x4_t a, int16x8_t b) { 11178 return vpadalq_s16(a, b); 11179} 11180 11181// CHECK-LABEL: define <2 x i64> @test_vpadalq_s32(<2 x i64> %a, <4 x i32> %b) #0 { 11182// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 11183// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 11184// CHECK: [[VPADALQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 11185// CHECK: [[VPADALQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 11186// CHECK: [[VPADALQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpadals.v2i64.v4i32(<2 x i64> [[VPADALQ_V_I]], <4 x i32> [[VPADALQ_V1_I]]) #4 11187// CHECK: ret <2 x i64> [[VPADALQ_V2_I]] 11188int64x2_t test_vpadalq_s32(int64x2_t a, int32x4_t b) { 11189 return vpadalq_s32(a, b); 11190} 11191 11192// CHECK-LABEL: define <8 x i16> @test_vpadalq_u8(<8 x i16> %a, <16 x i8> %b) #0 { 11193// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 11194// CHECK: [[VPADALQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 11195// CHECK: [[VPADALQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpadalu.v8i16.v16i8(<8 x i16> [[VPADALQ_V_I]], <16 x i8> %b) #4 11196// CHECK: ret <8 x i16> [[VPADALQ_V1_I]] 11197uint16x8_t test_vpadalq_u8(uint16x8_t a, uint8x16_t b) { 11198 return vpadalq_u8(a, b); 11199} 11200 11201// CHECK-LABEL: define <4 x i32> @test_vpadalq_u16(<4 x i32> %a, <8 x i16> %b) #0 { 11202// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 11203// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 11204// CHECK: [[VPADALQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 11205// CHECK: [[VPADALQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 11206// CHECK: [[VPADALQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpadalu.v4i32.v8i16(<4 x i32> [[VPADALQ_V_I]], <8 x i16> [[VPADALQ_V1_I]]) #4 11207// CHECK: ret <4 x i32> [[VPADALQ_V2_I]] 11208uint32x4_t test_vpadalq_u16(uint32x4_t a, uint16x8_t b) { 11209 return vpadalq_u16(a, b); 11210} 11211 11212// CHECK-LABEL: define <2 x i64> @test_vpadalq_u32(<2 x i64> %a, <4 x i32> %b) #0 { 11213// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 11214// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 11215// CHECK: [[VPADALQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 11216// CHECK: [[VPADALQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 11217// CHECK: [[VPADALQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpadalu.v2i64.v4i32(<2 x i64> [[VPADALQ_V_I]], <4 x i32> [[VPADALQ_V1_I]]) #4 11218// CHECK: ret <2 x i64> [[VPADALQ_V2_I]] 11219uint64x2_t test_vpadalq_u32(uint64x2_t a, uint32x4_t b) { 11220 return vpadalq_u32(a, b); 11221} 11222 11223 11224// CHECK-LABEL: define <8 x i8> @test_vpadd_s8(<8 x i8> %a, <8 x i8> %b) #0 { 11225// CHECK: [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8> %a, <8 x i8> %b) #4 11226// CHECK: ret <8 x i8> [[VPADD_V_I]] 11227int8x8_t test_vpadd_s8(int8x8_t a, int8x8_t b) { 11228 return vpadd_s8(a, b); 11229} 11230 11231// CHECK-LABEL: define <4 x i16> @test_vpadd_s16(<4 x i16> %a, <4 x i16> %b) #0 { 11232// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 11233// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 11234// CHECK: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 11235// CHECK: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 11236// CHECK: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> [[VPADD_V_I]], <4 x i16> [[VPADD_V1_I]]) #4 11237// CHECK: [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8> 11238// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <4 x i16> 11239// CHECK: ret <4 x i16> [[TMP2]] 11240int16x4_t test_vpadd_s16(int16x4_t a, int16x4_t b) { 11241 return vpadd_s16(a, b); 11242} 11243 11244// CHECK-LABEL: define <2 x i32> @test_vpadd_s32(<2 x i32> %a, <2 x i32> %b) #0 { 11245// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 11246// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 11247// CHECK: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 11248// CHECK: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 11249// CHECK: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> [[VPADD_V_I]], <2 x i32> [[VPADD_V1_I]]) #4 11250// CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8> 11251// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <2 x i32> 11252// CHECK: ret <2 x i32> [[TMP2]] 11253int32x2_t test_vpadd_s32(int32x2_t a, int32x2_t b) { 11254 return vpadd_s32(a, b); 11255} 11256 11257// CHECK-LABEL: define <8 x i8> @test_vpadd_u8(<8 x i8> %a, <8 x i8> %b) #0 { 11258// CHECK: [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8> %a, <8 x i8> %b) #4 11259// CHECK: ret <8 x i8> [[VPADD_V_I]] 11260uint8x8_t test_vpadd_u8(uint8x8_t a, uint8x8_t b) { 11261 return vpadd_u8(a, b); 11262} 11263 11264// CHECK-LABEL: define <4 x i16> @test_vpadd_u16(<4 x i16> %a, <4 x i16> %b) #0 { 11265// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 11266// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 11267// CHECK: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 11268// CHECK: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 11269// CHECK: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> [[VPADD_V_I]], <4 x i16> [[VPADD_V1_I]]) #4 11270// CHECK: [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8> 11271// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <4 x i16> 11272// CHECK: ret <4 x i16> [[TMP2]] 11273uint16x4_t test_vpadd_u16(uint16x4_t a, uint16x4_t b) { 11274 return vpadd_u16(a, b); 11275} 11276 11277// CHECK-LABEL: define <2 x i32> @test_vpadd_u32(<2 x i32> %a, <2 x i32> %b) #0 { 11278// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 11279// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 11280// CHECK: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 11281// CHECK: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 11282// CHECK: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> [[VPADD_V_I]], <2 x i32> [[VPADD_V1_I]]) #4 11283// CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8> 11284// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <2 x i32> 11285// CHECK: ret <2 x i32> [[TMP2]] 11286uint32x2_t test_vpadd_u32(uint32x2_t a, uint32x2_t b) { 11287 return vpadd_u32(a, b); 11288} 11289 11290// CHECK-LABEL: define <2 x float> @test_vpadd_f32(<2 x float> %a, <2 x float> %b) #0 { 11291// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 11292// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 11293// CHECK: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 11294// CHECK: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 11295// CHECK: [[VPADD_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> [[VPADD_V_I]], <2 x float> [[VPADD_V1_I]]) #4 11296// CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x float> [[VPADD_V2_I]] to <8 x i8> 11297// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <2 x float> 11298// CHECK: ret <2 x float> [[TMP2]] 11299float32x2_t test_vpadd_f32(float32x2_t a, float32x2_t b) { 11300 return vpadd_f32(a, b); 11301} 11302 11303 11304// CHECK-LABEL: define <4 x i16> @test_vpaddl_s8(<8 x i8> %a) #0 { 11305// CHECK: [[VPADDL_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8> %a) #4 11306// CHECK: ret <4 x i16> [[VPADDL_I]] 11307int16x4_t test_vpaddl_s8(int8x8_t a) { 11308 return vpaddl_s8(a); 11309} 11310 11311// CHECK-LABEL: define <2 x i32> @test_vpaddl_s16(<4 x i16> %a) #0 { 11312// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 11313// CHECK: [[VPADDL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 11314// CHECK: [[VPADDL1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16> [[VPADDL_I]]) #4 11315// CHECK: ret <2 x i32> [[VPADDL1_I]] 11316int32x2_t test_vpaddl_s16(int16x4_t a) { 11317 return vpaddl_s16(a); 11318} 11319 11320// CHECK-LABEL: define <1 x i64> @test_vpaddl_s32(<2 x i32> %a) #0 { 11321// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 11322// CHECK: [[VPADDL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 11323// CHECK: [[VPADDL1_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32> [[VPADDL_I]]) #4 11324// CHECK: ret <1 x i64> [[VPADDL1_I]] 11325int64x1_t test_vpaddl_s32(int32x2_t a) { 11326 return vpaddl_s32(a); 11327} 11328 11329// CHECK-LABEL: define <4 x i16> @test_vpaddl_u8(<8 x i8> %a) #0 { 11330// CHECK: [[VPADDL_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8> %a) #4 11331// CHECK: ret <4 x i16> [[VPADDL_I]] 11332uint16x4_t test_vpaddl_u8(uint8x8_t a) { 11333 return vpaddl_u8(a); 11334} 11335 11336// CHECK-LABEL: define <2 x i32> @test_vpaddl_u16(<4 x i16> %a) #0 { 11337// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 11338// CHECK: [[VPADDL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 11339// CHECK: [[VPADDL1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16> [[VPADDL_I]]) #4 11340// CHECK: ret <2 x i32> [[VPADDL1_I]] 11341uint32x2_t test_vpaddl_u16(uint16x4_t a) { 11342 return vpaddl_u16(a); 11343} 11344 11345// CHECK-LABEL: define <1 x i64> @test_vpaddl_u32(<2 x i32> %a) #0 { 11346// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 11347// CHECK: [[VPADDL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 11348// CHECK: [[VPADDL1_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32> [[VPADDL_I]]) #4 11349// CHECK: ret <1 x i64> [[VPADDL1_I]] 11350uint64x1_t test_vpaddl_u32(uint32x2_t a) { 11351 return vpaddl_u32(a); 11352} 11353 11354// CHECK-LABEL: define <8 x i16> @test_vpaddlq_s8(<16 x i8> %a) #0 { 11355// CHECK: [[VPADDL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8> %a) #4 11356// CHECK: ret <8 x i16> [[VPADDL_I]] 11357int16x8_t test_vpaddlq_s8(int8x16_t a) { 11358 return vpaddlq_s8(a); 11359} 11360 11361// CHECK-LABEL: define <4 x i32> @test_vpaddlq_s16(<8 x i16> %a) #0 { 11362// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 11363// CHECK: [[VPADDL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 11364// CHECK: [[VPADDL1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16> [[VPADDL_I]]) #4 11365// CHECK: ret <4 x i32> [[VPADDL1_I]] 11366int32x4_t test_vpaddlq_s16(int16x8_t a) { 11367 return vpaddlq_s16(a); 11368} 11369 11370// CHECK-LABEL: define <2 x i64> @test_vpaddlq_s32(<4 x i32> %a) #0 { 11371// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 11372// CHECK: [[VPADDL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 11373// CHECK: [[VPADDL1_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32> [[VPADDL_I]]) #4 11374// CHECK: ret <2 x i64> [[VPADDL1_I]] 11375int64x2_t test_vpaddlq_s32(int32x4_t a) { 11376 return vpaddlq_s32(a); 11377} 11378 11379// CHECK-LABEL: define <8 x i16> @test_vpaddlq_u8(<16 x i8> %a) #0 { 11380// CHECK: [[VPADDL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8> %a) #4 11381// CHECK: ret <8 x i16> [[VPADDL_I]] 11382uint16x8_t test_vpaddlq_u8(uint8x16_t a) { 11383 return vpaddlq_u8(a); 11384} 11385 11386// CHECK-LABEL: define <4 x i32> @test_vpaddlq_u16(<8 x i16> %a) #0 { 11387// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 11388// CHECK: [[VPADDL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 11389// CHECK: [[VPADDL1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16> [[VPADDL_I]]) #4 11390// CHECK: ret <4 x i32> [[VPADDL1_I]] 11391uint32x4_t test_vpaddlq_u16(uint16x8_t a) { 11392 return vpaddlq_u16(a); 11393} 11394 11395// CHECK-LABEL: define <2 x i64> @test_vpaddlq_u32(<4 x i32> %a) #0 { 11396// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 11397// CHECK: [[VPADDL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 11398// CHECK: [[VPADDL1_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32> [[VPADDL_I]]) #4 11399// CHECK: ret <2 x i64> [[VPADDL1_I]] 11400uint64x2_t test_vpaddlq_u32(uint32x4_t a) { 11401 return vpaddlq_u32(a); 11402} 11403 11404 11405// CHECK-LABEL: define <8 x i8> @test_vpmax_s8(<8 x i8> %a, <8 x i8> %b) #0 { 11406// CHECK: [[VPMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8> %a, <8 x i8> %b) #4 11407// CHECK: ret <8 x i8> [[VPMAX_V_I]] 11408int8x8_t test_vpmax_s8(int8x8_t a, int8x8_t b) { 11409 return vpmax_s8(a, b); 11410} 11411 11412// CHECK-LABEL: define <4 x i16> @test_vpmax_s16(<4 x i16> %a, <4 x i16> %b) #0 { 11413// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 11414// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 11415// CHECK: [[VPMAX_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 11416// CHECK: [[VPMAX_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 11417// CHECK: [[VPMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16> [[VPMAX_V_I]], <4 x i16> [[VPMAX_V1_I]]) #4 11418// CHECK: [[VPMAX_V3_I:%.*]] = bitcast <4 x i16> [[VPMAX_V2_I]] to <8 x i8> 11419// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPMAX_V3_I]] to <4 x i16> 11420// CHECK: ret <4 x i16> [[TMP2]] 11421int16x4_t test_vpmax_s16(int16x4_t a, int16x4_t b) { 11422 return vpmax_s16(a, b); 11423} 11424 11425// CHECK-LABEL: define <2 x i32> @test_vpmax_s32(<2 x i32> %a, <2 x i32> %b) #0 { 11426// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 11427// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 11428// CHECK: [[VPMAX_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 11429// CHECK: [[VPMAX_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 11430// CHECK: [[VPMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32> [[VPMAX_V_I]], <2 x i32> [[VPMAX_V1_I]]) #4 11431// CHECK: [[VPMAX_V3_I:%.*]] = bitcast <2 x i32> [[VPMAX_V2_I]] to <8 x i8> 11432// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPMAX_V3_I]] to <2 x i32> 11433// CHECK: ret <2 x i32> [[TMP2]] 11434int32x2_t test_vpmax_s32(int32x2_t a, int32x2_t b) { 11435 return vpmax_s32(a, b); 11436} 11437 11438// CHECK-LABEL: define <8 x i8> @test_vpmax_u8(<8 x i8> %a, <8 x i8> %b) #0 { 11439// CHECK: [[VPMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 11440// CHECK: ret <8 x i8> [[VPMAX_V_I]] 11441uint8x8_t test_vpmax_u8(uint8x8_t a, uint8x8_t b) { 11442 return vpmax_u8(a, b); 11443} 11444 11445// CHECK-LABEL: define <4 x i16> @test_vpmax_u16(<4 x i16> %a, <4 x i16> %b) #0 { 11446// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 11447// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 11448// CHECK: [[VPMAX_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 11449// CHECK: [[VPMAX_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 11450// CHECK: [[VPMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16> [[VPMAX_V_I]], <4 x i16> [[VPMAX_V1_I]]) #4 11451// CHECK: [[VPMAX_V3_I:%.*]] = bitcast <4 x i16> [[VPMAX_V2_I]] to <8 x i8> 11452// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPMAX_V3_I]] to <4 x i16> 11453// CHECK: ret <4 x i16> [[TMP2]] 11454uint16x4_t test_vpmax_u16(uint16x4_t a, uint16x4_t b) { 11455 return vpmax_u16(a, b); 11456} 11457 11458// CHECK-LABEL: define <2 x i32> @test_vpmax_u32(<2 x i32> %a, <2 x i32> %b) #0 { 11459// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 11460// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 11461// CHECK: [[VPMAX_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 11462// CHECK: [[VPMAX_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 11463// CHECK: [[VPMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32> [[VPMAX_V_I]], <2 x i32> [[VPMAX_V1_I]]) #4 11464// CHECK: [[VPMAX_V3_I:%.*]] = bitcast <2 x i32> [[VPMAX_V2_I]] to <8 x i8> 11465// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPMAX_V3_I]] to <2 x i32> 11466// CHECK: ret <2 x i32> [[TMP2]] 11467uint32x2_t test_vpmax_u32(uint32x2_t a, uint32x2_t b) { 11468 return vpmax_u32(a, b); 11469} 11470 11471// CHECK-LABEL: define <2 x float> @test_vpmax_f32(<2 x float> %a, <2 x float> %b) #0 { 11472// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 11473// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 11474// CHECK: [[VPMAX_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 11475// CHECK: [[VPMAX_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 11476// CHECK: [[VPMAX_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float> [[VPMAX_V_I]], <2 x float> [[VPMAX_V1_I]]) #4 11477// CHECK: [[VPMAX_V3_I:%.*]] = bitcast <2 x float> [[VPMAX_V2_I]] to <8 x i8> 11478// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPMAX_V3_I]] to <2 x float> 11479// CHECK: ret <2 x float> [[TMP2]] 11480float32x2_t test_vpmax_f32(float32x2_t a, float32x2_t b) { 11481 return vpmax_f32(a, b); 11482} 11483 11484 11485// CHECK-LABEL: define <8 x i8> @test_vpmin_s8(<8 x i8> %a, <8 x i8> %b) #0 { 11486// CHECK: [[VPMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpmins.v8i8(<8 x i8> %a, <8 x i8> %b) #4 11487// CHECK: ret <8 x i8> [[VPMIN_V_I]] 11488int8x8_t test_vpmin_s8(int8x8_t a, int8x8_t b) { 11489 return vpmin_s8(a, b); 11490} 11491 11492// CHECK-LABEL: define <4 x i16> @test_vpmin_s16(<4 x i16> %a, <4 x i16> %b) #0 { 11493// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 11494// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 11495// CHECK: [[VPMIN_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 11496// CHECK: [[VPMIN_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 11497// CHECK: [[VPMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16> [[VPMIN_V_I]], <4 x i16> [[VPMIN_V1_I]]) #4 11498// CHECK: [[VPMIN_V3_I:%.*]] = bitcast <4 x i16> [[VPMIN_V2_I]] to <8 x i8> 11499// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPMIN_V3_I]] to <4 x i16> 11500// CHECK: ret <4 x i16> [[TMP2]] 11501int16x4_t test_vpmin_s16(int16x4_t a, int16x4_t b) { 11502 return vpmin_s16(a, b); 11503} 11504 11505// CHECK-LABEL: define <2 x i32> @test_vpmin_s32(<2 x i32> %a, <2 x i32> %b) #0 { 11506// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 11507// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 11508// CHECK: [[VPMIN_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 11509// CHECK: [[VPMIN_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 11510// CHECK: [[VPMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32> [[VPMIN_V_I]], <2 x i32> [[VPMIN_V1_I]]) #4 11511// CHECK: [[VPMIN_V3_I:%.*]] = bitcast <2 x i32> [[VPMIN_V2_I]] to <8 x i8> 11512// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPMIN_V3_I]] to <2 x i32> 11513// CHECK: ret <2 x i32> [[TMP2]] 11514int32x2_t test_vpmin_s32(int32x2_t a, int32x2_t b) { 11515 return vpmin_s32(a, b); 11516} 11517 11518// CHECK-LABEL: define <8 x i8> @test_vpmin_u8(<8 x i8> %a, <8 x i8> %b) #0 { 11519// CHECK: [[VPMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpminu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 11520// CHECK: ret <8 x i8> [[VPMIN_V_I]] 11521uint8x8_t test_vpmin_u8(uint8x8_t a, uint8x8_t b) { 11522 return vpmin_u8(a, b); 11523} 11524 11525// CHECK-LABEL: define <4 x i16> @test_vpmin_u16(<4 x i16> %a, <4 x i16> %b) #0 { 11526// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 11527// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 11528// CHECK: [[VPMIN_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 11529// CHECK: [[VPMIN_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 11530// CHECK: [[VPMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16> [[VPMIN_V_I]], <4 x i16> [[VPMIN_V1_I]]) #4 11531// CHECK: [[VPMIN_V3_I:%.*]] = bitcast <4 x i16> [[VPMIN_V2_I]] to <8 x i8> 11532// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPMIN_V3_I]] to <4 x i16> 11533// CHECK: ret <4 x i16> [[TMP2]] 11534uint16x4_t test_vpmin_u16(uint16x4_t a, uint16x4_t b) { 11535 return vpmin_u16(a, b); 11536} 11537 11538// CHECK-LABEL: define <2 x i32> @test_vpmin_u32(<2 x i32> %a, <2 x i32> %b) #0 { 11539// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 11540// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 11541// CHECK: [[VPMIN_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 11542// CHECK: [[VPMIN_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 11543// CHECK: [[VPMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32> [[VPMIN_V_I]], <2 x i32> [[VPMIN_V1_I]]) #4 11544// CHECK: [[VPMIN_V3_I:%.*]] = bitcast <2 x i32> [[VPMIN_V2_I]] to <8 x i8> 11545// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPMIN_V3_I]] to <2 x i32> 11546// CHECK: ret <2 x i32> [[TMP2]] 11547uint32x2_t test_vpmin_u32(uint32x2_t a, uint32x2_t b) { 11548 return vpmin_u32(a, b); 11549} 11550 11551// CHECK-LABEL: define <2 x float> @test_vpmin_f32(<2 x float> %a, <2 x float> %b) #0 { 11552// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 11553// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 11554// CHECK: [[VPMIN_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 11555// CHECK: [[VPMIN_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 11556// CHECK: [[VPMIN_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float> [[VPMIN_V_I]], <2 x float> [[VPMIN_V1_I]]) #4 11557// CHECK: [[VPMIN_V3_I:%.*]] = bitcast <2 x float> [[VPMIN_V2_I]] to <8 x i8> 11558// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPMIN_V3_I]] to <2 x float> 11559// CHECK: ret <2 x float> [[TMP2]] 11560float32x2_t test_vpmin_f32(float32x2_t a, float32x2_t b) { 11561 return vpmin_f32(a, b); 11562} 11563 11564 11565// CHECK-LABEL: define <8 x i8> @test_vqabs_s8(<8 x i8> %a) #0 { 11566// CHECK: [[VQABS_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqabs.v8i8(<8 x i8> %a) #4 11567// CHECK: ret <8 x i8> [[VQABS_V_I]] 11568int8x8_t test_vqabs_s8(int8x8_t a) { 11569 return vqabs_s8(a); 11570} 11571 11572// CHECK-LABEL: define <4 x i16> @test_vqabs_s16(<4 x i16> %a) #0 { 11573// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 11574// CHECK: [[VQABS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 11575// CHECK: [[VQABS_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16> [[VQABS_V_I]]) #4 11576// CHECK: [[VQABS_V2_I:%.*]] = bitcast <4 x i16> [[VQABS_V1_I]] to <8 x i8> 11577// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQABS_V2_I]] to <4 x i16> 11578// CHECK: ret <4 x i16> [[TMP1]] 11579int16x4_t test_vqabs_s16(int16x4_t a) { 11580 return vqabs_s16(a); 11581} 11582 11583// CHECK-LABEL: define <2 x i32> @test_vqabs_s32(<2 x i32> %a) #0 { 11584// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 11585// CHECK: [[VQABS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 11586// CHECK: [[VQABS_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32> [[VQABS_V_I]]) #4 11587// CHECK: [[VQABS_V2_I:%.*]] = bitcast <2 x i32> [[VQABS_V1_I]] to <8 x i8> 11588// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQABS_V2_I]] to <2 x i32> 11589// CHECK: ret <2 x i32> [[TMP1]] 11590int32x2_t test_vqabs_s32(int32x2_t a) { 11591 return vqabs_s32(a); 11592} 11593 11594// CHECK-LABEL: define <16 x i8> @test_vqabsq_s8(<16 x i8> %a) #0 { 11595// CHECK: [[VQABSQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8> %a) #4 11596// CHECK: ret <16 x i8> [[VQABSQ_V_I]] 11597int8x16_t test_vqabsq_s8(int8x16_t a) { 11598 return vqabsq_s8(a); 11599} 11600 11601// CHECK-LABEL: define <8 x i16> @test_vqabsq_s16(<8 x i16> %a) #0 { 11602// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 11603// CHECK: [[VQABSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 11604// CHECK: [[VQABSQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16> [[VQABSQ_V_I]]) #4 11605// CHECK: [[VQABSQ_V2_I:%.*]] = bitcast <8 x i16> [[VQABSQ_V1_I]] to <16 x i8> 11606// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VQABSQ_V2_I]] to <8 x i16> 11607// CHECK: ret <8 x i16> [[TMP1]] 11608int16x8_t test_vqabsq_s16(int16x8_t a) { 11609 return vqabsq_s16(a); 11610} 11611 11612// CHECK-LABEL: define <4 x i32> @test_vqabsq_s32(<4 x i32> %a) #0 { 11613// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 11614// CHECK: [[VQABSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 11615// CHECK: [[VQABSQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32> [[VQABSQ_V_I]]) #4 11616// CHECK: [[VQABSQ_V2_I:%.*]] = bitcast <4 x i32> [[VQABSQ_V1_I]] to <16 x i8> 11617// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VQABSQ_V2_I]] to <4 x i32> 11618// CHECK: ret <4 x i32> [[TMP1]] 11619int32x4_t test_vqabsq_s32(int32x4_t a) { 11620 return vqabsq_s32(a); 11621} 11622 11623 11624// CHECK-LABEL: define <8 x i8> @test_vqadd_s8(<8 x i8> %a, <8 x i8> %b) #0 { 11625// CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqadds.v8i8(<8 x i8> %a, <8 x i8> %b) #4 11626// CHECK: ret <8 x i8> [[VQADD_V_I]] 11627int8x8_t test_vqadd_s8(int8x8_t a, int8x8_t b) { 11628 return vqadd_s8(a, b); 11629} 11630 11631// CHECK-LABEL: define <4 x i16> @test_vqadd_s16(<4 x i16> %a, <4 x i16> %b) #0 { 11632// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 11633// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 11634// CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 11635// CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 11636// CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> [[VQADD_V_I]], <4 x i16> [[VQADD_V1_I]]) #4 11637// CHECK: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8> 11638// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <4 x i16> 11639// CHECK: ret <4 x i16> [[TMP2]] 11640int16x4_t test_vqadd_s16(int16x4_t a, int16x4_t b) { 11641 return vqadd_s16(a, b); 11642} 11643 11644// CHECK-LABEL: define <2 x i32> @test_vqadd_s32(<2 x i32> %a, <2 x i32> %b) #0 { 11645// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 11646// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 11647// CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 11648// CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 11649// CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> [[VQADD_V_I]], <2 x i32> [[VQADD_V1_I]]) #4 11650// CHECK: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8> 11651// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <2 x i32> 11652// CHECK: ret <2 x i32> [[TMP2]] 11653int32x2_t test_vqadd_s32(int32x2_t a, int32x2_t b) { 11654 return vqadd_s32(a, b); 11655} 11656 11657// CHECK-LABEL: define <1 x i64> @test_vqadd_s64(<1 x i64> %a, <1 x i64> %b) #0 { 11658// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 11659// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 11660// CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 11661// CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 11662// CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> [[VQADD_V_I]], <1 x i64> [[VQADD_V1_I]]) #4 11663// CHECK: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8> 11664// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <1 x i64> 11665// CHECK: ret <1 x i64> [[TMP2]] 11666int64x1_t test_vqadd_s64(int64x1_t a, int64x1_t b) { 11667 return vqadd_s64(a, b); 11668} 11669 11670// CHECK-LABEL: define <8 x i8> @test_vqadd_u8(<8 x i8> %a, <8 x i8> %b) #0 { 11671// CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqaddu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 11672// CHECK: ret <8 x i8> [[VQADD_V_I]] 11673uint8x8_t test_vqadd_u8(uint8x8_t a, uint8x8_t b) { 11674 return vqadd_u8(a, b); 11675} 11676 11677// CHECK-LABEL: define <4 x i16> @test_vqadd_u16(<4 x i16> %a, <4 x i16> %b) #0 { 11678// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 11679// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 11680// CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 11681// CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 11682// CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqaddu.v4i16(<4 x i16> [[VQADD_V_I]], <4 x i16> [[VQADD_V1_I]]) #4 11683// CHECK: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8> 11684// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <4 x i16> 11685// CHECK: ret <4 x i16> [[TMP2]] 11686uint16x4_t test_vqadd_u16(uint16x4_t a, uint16x4_t b) { 11687 return vqadd_u16(a, b); 11688} 11689 11690// CHECK-LABEL: define <2 x i32> @test_vqadd_u32(<2 x i32> %a, <2 x i32> %b) #0 { 11691// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 11692// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 11693// CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 11694// CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 11695// CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqaddu.v2i32(<2 x i32> [[VQADD_V_I]], <2 x i32> [[VQADD_V1_I]]) #4 11696// CHECK: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8> 11697// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <2 x i32> 11698// CHECK: ret <2 x i32> [[TMP2]] 11699uint32x2_t test_vqadd_u32(uint32x2_t a, uint32x2_t b) { 11700 return vqadd_u32(a, b); 11701} 11702 11703// CHECK-LABEL: define <1 x i64> @test_vqadd_u64(<1 x i64> %a, <1 x i64> %b) #0 { 11704// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 11705// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 11706// CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 11707// CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 11708// CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> [[VQADD_V_I]], <1 x i64> [[VQADD_V1_I]]) #4 11709// CHECK: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8> 11710// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <1 x i64> 11711// CHECK: ret <1 x i64> [[TMP2]] 11712uint64x1_t test_vqadd_u64(uint64x1_t a, uint64x1_t b) { 11713 return vqadd_u64(a, b); 11714} 11715 11716// CHECK-LABEL: define <16 x i8> @test_vqaddq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 11717// CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqadds.v16i8(<16 x i8> %a, <16 x i8> %b) #4 11718// CHECK: ret <16 x i8> [[VQADDQ_V_I]] 11719int8x16_t test_vqaddq_s8(int8x16_t a, int8x16_t b) { 11720 return vqaddq_s8(a, b); 11721} 11722 11723// CHECK-LABEL: define <8 x i16> @test_vqaddq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 11724// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 11725// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 11726// CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 11727// CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 11728// CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> [[VQADDQ_V_I]], <8 x i16> [[VQADDQ_V1_I]]) #4 11729// CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8> 11730// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <8 x i16> 11731// CHECK: ret <8 x i16> [[TMP2]] 11732int16x8_t test_vqaddq_s16(int16x8_t a, int16x8_t b) { 11733 return vqaddq_s16(a, b); 11734} 11735 11736// CHECK-LABEL: define <4 x i32> @test_vqaddq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 11737// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 11738// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 11739// CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 11740// CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 11741// CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> [[VQADDQ_V_I]], <4 x i32> [[VQADDQ_V1_I]]) #4 11742// CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8> 11743// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <4 x i32> 11744// CHECK: ret <4 x i32> [[TMP2]] 11745int32x4_t test_vqaddq_s32(int32x4_t a, int32x4_t b) { 11746 return vqaddq_s32(a, b); 11747} 11748 11749// CHECK-LABEL: define <2 x i64> @test_vqaddq_s64(<2 x i64> %a, <2 x i64> %b) #0 { 11750// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 11751// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 11752// CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 11753// CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 11754// CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> [[VQADDQ_V_I]], <2 x i64> [[VQADDQ_V1_I]]) #4 11755// CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8> 11756// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <2 x i64> 11757// CHECK: ret <2 x i64> [[TMP2]] 11758int64x2_t test_vqaddq_s64(int64x2_t a, int64x2_t b) { 11759 return vqaddq_s64(a, b); 11760} 11761 11762// CHECK-LABEL: define <16 x i8> @test_vqaddq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 11763// CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqaddu.v16i8(<16 x i8> %a, <16 x i8> %b) #4 11764// CHECK: ret <16 x i8> [[VQADDQ_V_I]] 11765uint8x16_t test_vqaddq_u8(uint8x16_t a, uint8x16_t b) { 11766 return vqaddq_u8(a, b); 11767} 11768 11769// CHECK-LABEL: define <8 x i16> @test_vqaddq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 11770// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 11771// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 11772// CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 11773// CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 11774// CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqaddu.v8i16(<8 x i16> [[VQADDQ_V_I]], <8 x i16> [[VQADDQ_V1_I]]) #4 11775// CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8> 11776// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <8 x i16> 11777// CHECK: ret <8 x i16> [[TMP2]] 11778uint16x8_t test_vqaddq_u16(uint16x8_t a, uint16x8_t b) { 11779 return vqaddq_u16(a, b); 11780} 11781 11782// CHECK-LABEL: define <4 x i32> @test_vqaddq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 11783// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 11784// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 11785// CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 11786// CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 11787// CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqaddu.v4i32(<4 x i32> [[VQADDQ_V_I]], <4 x i32> [[VQADDQ_V1_I]]) #4 11788// CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8> 11789// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <4 x i32> 11790// CHECK: ret <4 x i32> [[TMP2]] 11791uint32x4_t test_vqaddq_u32(uint32x4_t a, uint32x4_t b) { 11792 return vqaddq_u32(a, b); 11793} 11794 11795// CHECK-LABEL: define <2 x i64> @test_vqaddq_u64(<2 x i64> %a, <2 x i64> %b) #0 { 11796// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 11797// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 11798// CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 11799// CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 11800// CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64> [[VQADDQ_V_I]], <2 x i64> [[VQADDQ_V1_I]]) #4 11801// CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8> 11802// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <2 x i64> 11803// CHECK: ret <2 x i64> [[TMP2]] 11804uint64x2_t test_vqaddq_u64(uint64x2_t a, uint64x2_t b) { 11805 return vqaddq_u64(a, b); 11806} 11807 11808 11809// CHECK-LABEL: define <4 x i32> @test_vqdmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { 11810// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 11811// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 11812// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8> 11813// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 11814// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 11815// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #4 11816// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 11817// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #4 11818// CHECK: ret <4 x i32> [[VQDMLAL_V3_I]] 11819int32x4_t test_vqdmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) { 11820 return vqdmlal_s16(a, b, c); 11821} 11822 11823// CHECK-LABEL: define <2 x i64> @test_vqdmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { 11824// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 11825// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 11826// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8> 11827// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 11828// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 11829// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #4 11830// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 11831// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #4 11832// CHECK: ret <2 x i64> [[VQDMLAL_V3_I]] 11833int64x2_t test_vqdmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) { 11834 return vqdmlal_s32(a, b, c); 11835} 11836 11837 11838// CHECK-LABEL: define <4 x i32> @test_vqdmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { 11839// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 11840// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 11841// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 11842// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 11843// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 11844// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 11845// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #4 11846// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 11847// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #4 11848// CHECK: ret <4 x i32> [[VQDMLAL_V3_I]] 11849int32x4_t test_vqdmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) { 11850 return vqdmlal_lane_s16(a, b, c, 3); 11851} 11852 11853// CHECK-LABEL: define <2 x i64> @test_vqdmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { 11854// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> <i32 1, i32 1> 11855// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 11856// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 11857// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 11858// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 11859// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 11860// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #4 11861// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 11862// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #4 11863// CHECK: ret <2 x i64> [[VQDMLAL_V3_I]] 11864int64x2_t test_vqdmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) { 11865 return vqdmlal_lane_s32(a, b, c, 1); 11866} 11867 11868 11869// CHECK-LABEL: define <4 x i32> @test_vqdmlal_n_s16(<4 x i32> %a, <4 x i16> %b, i16 signext %c) #0 { 11870// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 11871// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 11872// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 11873// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 11874// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 11875// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 11876// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 11877// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 11878// CHECK: [[VQDMLAL4_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 11879// CHECK: [[VQDMLAL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL4_I]]) #4 11880// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 11881// CHECK: [[VQDMLAL_V6_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL5_I]]) #4 11882// CHECK: ret <4 x i32> [[VQDMLAL_V6_I]] 11883int32x4_t test_vqdmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) { 11884 return vqdmlal_n_s16(a, b, c); 11885} 11886 11887// CHECK-LABEL: define <2 x i64> @test_vqdmlal_n_s32(<2 x i64> %a, <2 x i32> %b, i32 %c) #0 { 11888// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 11889// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 11890// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 11891// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 11892// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 11893// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 11894// CHECK: [[VQDMLAL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 11895// CHECK: [[VQDMLAL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL2_I]]) #4 11896// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 11897// CHECK: [[VQDMLAL_V4_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL3_I]]) #4 11898// CHECK: ret <2 x i64> [[VQDMLAL_V4_I]] 11899int64x2_t test_vqdmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c) { 11900 return vqdmlal_n_s32(a, b, c); 11901} 11902 11903 11904// CHECK-LABEL: define <4 x i32> @test_vqdmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { 11905// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 11906// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 11907// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8> 11908// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 11909// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 11910// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #4 11911// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 11912// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #4 11913// CHECK: ret <4 x i32> [[VQDMLSL_V3_I]] 11914int32x4_t test_vqdmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) { 11915 return vqdmlsl_s16(a, b, c); 11916} 11917 11918// CHECK-LABEL: define <2 x i64> @test_vqdmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { 11919// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 11920// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 11921// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8> 11922// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 11923// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 11924// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #4 11925// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 11926// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #4 11927// CHECK: ret <2 x i64> [[VQDMLSL_V3_I]] 11928int64x2_t test_vqdmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) { 11929 return vqdmlsl_s32(a, b, c); 11930} 11931 11932 11933// CHECK-LABEL: define <4 x i32> @test_vqdmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { 11934// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 11935// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 11936// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 11937// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 11938// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 11939// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 11940// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #4 11941// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 11942// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #4 11943// CHECK: ret <4 x i32> [[VQDMLSL_V3_I]] 11944int32x4_t test_vqdmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) { 11945 return vqdmlsl_lane_s16(a, b, c, 3); 11946} 11947 11948// CHECK-LABEL: define <2 x i64> @test_vqdmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { 11949// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> <i32 1, i32 1> 11950// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 11951// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 11952// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 11953// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 11954// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 11955// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #4 11956// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 11957// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #4 11958// CHECK: ret <2 x i64> [[VQDMLSL_V3_I]] 11959int64x2_t test_vqdmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) { 11960 return vqdmlsl_lane_s32(a, b, c, 1); 11961} 11962 11963 11964// CHECK-LABEL: define <4 x i32> @test_vqdmlsl_n_s16(<4 x i32> %a, <4 x i16> %b, i16 signext %c) #0 { 11965// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 11966// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 11967// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 11968// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 11969// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 11970// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 11971// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 11972// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 11973// CHECK: [[VQDMLAL4_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 11974// CHECK: [[VQDMLAL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL4_I]]) #4 11975// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 11976// CHECK: [[VQDMLSL_V6_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL5_I]]) #4 11977// CHECK: ret <4 x i32> [[VQDMLSL_V6_I]] 11978int32x4_t test_vqdmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) { 11979 return vqdmlsl_n_s16(a, b, c); 11980} 11981 11982// CHECK-LABEL: define <2 x i64> @test_vqdmlsl_n_s32(<2 x i64> %a, <2 x i32> %b, i32 %c) #0 { 11983// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 11984// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 11985// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 11986// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 11987// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 11988// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 11989// CHECK: [[VQDMLAL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 11990// CHECK: [[VQDMLAL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL2_I]]) #4 11991// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 11992// CHECK: [[VQDMLSL_V4_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL3_I]]) #4 11993// CHECK: ret <2 x i64> [[VQDMLSL_V4_I]] 11994int64x2_t test_vqdmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c) { 11995 return vqdmlsl_n_s32(a, b, c); 11996} 11997 11998 11999// CHECK-LABEL: define <4 x i16> @test_vqdmulh_s16(<4 x i16> %a, <4 x i16> %b) #0 { 12000// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 12001// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 12002// CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 12003// CHECK: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 12004// CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> [[VQDMULH_V_I]], <4 x i16> [[VQDMULH_V1_I]]) #4 12005// CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8> 12006// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <4 x i16> 12007// CHECK: ret <4 x i16> [[TMP2]] 12008int16x4_t test_vqdmulh_s16(int16x4_t a, int16x4_t b) { 12009 return vqdmulh_s16(a, b); 12010} 12011 12012// CHECK-LABEL: define <2 x i32> @test_vqdmulh_s32(<2 x i32> %a, <2 x i32> %b) #0 { 12013// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 12014// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 12015// CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 12016// CHECK: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 12017// CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> [[VQDMULH_V_I]], <2 x i32> [[VQDMULH_V1_I]]) #4 12018// CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8> 12019// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <2 x i32> 12020// CHECK: ret <2 x i32> [[TMP2]] 12021int32x2_t test_vqdmulh_s32(int32x2_t a, int32x2_t b) { 12022 return vqdmulh_s32(a, b); 12023} 12024 12025// CHECK-LABEL: define <8 x i16> @test_vqdmulhq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 12026// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 12027// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 12028// CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 12029// CHECK: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 12030// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> [[VQDMULHQ_V_I]], <8 x i16> [[VQDMULHQ_V1_I]]) #4 12031// CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8> 12032// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <8 x i16> 12033// CHECK: ret <8 x i16> [[TMP2]] 12034int16x8_t test_vqdmulhq_s16(int16x8_t a, int16x8_t b) { 12035 return vqdmulhq_s16(a, b); 12036} 12037 12038// CHECK-LABEL: define <4 x i32> @test_vqdmulhq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 12039// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 12040// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 12041// CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 12042// CHECK: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 12043// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> [[VQDMULHQ_V_I]], <4 x i32> [[VQDMULHQ_V1_I]]) #4 12044// CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8> 12045// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <4 x i32> 12046// CHECK: ret <4 x i32> [[TMP2]] 12047int32x4_t test_vqdmulhq_s32(int32x4_t a, int32x4_t b) { 12048 return vqdmulhq_s32(a, b); 12049} 12050 12051 12052// CHECK-LABEL: define <4 x i16> @test_vqdmulh_lane_s16(<4 x i16> %a, <4 x i16> %b) #0 { 12053// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 12054// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 12055// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 12056// CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 12057// CHECK: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 12058// CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> [[VQDMULH_V_I]], <4 x i16> [[VQDMULH_V1_I]]) #4 12059// CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8> 12060// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <4 x i16> 12061// CHECK: ret <4 x i16> [[TMP2]] 12062int16x4_t test_vqdmulh_lane_s16(int16x4_t a, int16x4_t b) { 12063 return vqdmulh_lane_s16(a, b, 3); 12064} 12065 12066// CHECK-LABEL: define <2 x i32> @test_vqdmulh_lane_s32(<2 x i32> %a, <2 x i32> %b) #0 { 12067// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <2 x i32> <i32 1, i32 1> 12068// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 12069// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 12070// CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 12071// CHECK: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 12072// CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> [[VQDMULH_V_I]], <2 x i32> [[VQDMULH_V1_I]]) #4 12073// CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8> 12074// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <2 x i32> 12075// CHECK: ret <2 x i32> [[TMP2]] 12076int32x2_t test_vqdmulh_lane_s32(int32x2_t a, int32x2_t b) { 12077 return vqdmulh_lane_s32(a, b, 1); 12078} 12079 12080// CHECK-LABEL: define <8 x i16> @test_vqdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %b) #0 { 12081// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 12082// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 12083// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8> 12084// CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 12085// CHECK: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 12086// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> [[VQDMULHQ_V_I]], <8 x i16> [[VQDMULHQ_V1_I]]) #4 12087// CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8> 12088// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <8 x i16> 12089// CHECK: ret <8 x i16> [[TMP2]] 12090int16x8_t test_vqdmulhq_lane_s16(int16x8_t a, int16x4_t b) { 12091 return vqdmulhq_lane_s16(a, b, 3); 12092} 12093 12094// CHECK-LABEL: define <4 x i32> @test_vqdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %b) #0 { 12095// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 12096// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 12097// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8> 12098// CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 12099// CHECK: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 12100// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> [[VQDMULHQ_V_I]], <4 x i32> [[VQDMULHQ_V1_I]]) #4 12101// CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8> 12102// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <4 x i32> 12103// CHECK: ret <4 x i32> [[TMP2]] 12104int32x4_t test_vqdmulhq_lane_s32(int32x4_t a, int32x2_t b) { 12105 return vqdmulhq_lane_s32(a, b, 1); 12106} 12107 12108 12109// CHECK-LABEL: define <4 x i16> @test_vqdmulh_n_s16(<4 x i16> %a, i16 signext %b) #0 { 12110// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 12111// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0 12112// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1 12113// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 12114// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3 12115// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 12116// CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 12117// CHECK: [[VQDMULH_V4_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 12118// CHECK: [[VQDMULH_V5_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> [[VQDMULH_V_I]], <4 x i16> [[VQDMULH_V4_I]]) #4 12119// CHECK: [[VQDMULH_V6_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V5_I]] to <8 x i8> 12120// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V6_I]] to <4 x i16> 12121// CHECK: ret <4 x i16> [[TMP2]] 12122int16x4_t test_vqdmulh_n_s16(int16x4_t a, int16_t b) { 12123 return vqdmulh_n_s16(a, b); 12124} 12125 12126// CHECK-LABEL: define <2 x i32> @test_vqdmulh_n_s32(<2 x i32> %a, i32 %b) #0 { 12127// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 12128// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 12129// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 12130// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 12131// CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 12132// CHECK: [[VQDMULH_V2_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 12133// CHECK: [[VQDMULH_V3_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> [[VQDMULH_V_I]], <2 x i32> [[VQDMULH_V2_I]]) #4 12134// CHECK: [[VQDMULH_V4_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V3_I]] to <8 x i8> 12135// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V4_I]] to <2 x i32> 12136// CHECK: ret <2 x i32> [[TMP2]] 12137int32x2_t test_vqdmulh_n_s32(int32x2_t a, int32_t b) { 12138 return vqdmulh_n_s32(a, b); 12139} 12140 12141// CHECK-LABEL: define <8 x i16> @test_vqdmulhq_n_s16(<8 x i16> %a, i16 signext %b) #0 { 12142// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 12143// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0 12144// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1 12145// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2 12146// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3 12147// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4 12148// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5 12149// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6 12150// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7 12151// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <16 x i8> 12152// CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 12153// CHECK: [[VQDMULHQ_V8_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 12154// CHECK: [[VQDMULHQ_V9_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> [[VQDMULHQ_V_I]], <8 x i16> [[VQDMULHQ_V8_I]]) #4 12155// CHECK: [[VQDMULHQ_V10_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V9_I]] to <16 x i8> 12156// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V10_I]] to <8 x i16> 12157// CHECK: ret <8 x i16> [[TMP2]] 12158int16x8_t test_vqdmulhq_n_s16(int16x8_t a, int16_t b) { 12159 return vqdmulhq_n_s16(a, b); 12160} 12161 12162// CHECK-LABEL: define <4 x i32> @test_vqdmulhq_n_s32(<4 x i32> %a, i32 %b) #0 { 12163// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 12164// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0 12165// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1 12166// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2 12167// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3 12168// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT3_I]] to <16 x i8> 12169// CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 12170// CHECK: [[VQDMULHQ_V4_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 12171// CHECK: [[VQDMULHQ_V5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> [[VQDMULHQ_V_I]], <4 x i32> [[VQDMULHQ_V4_I]]) #4 12172// CHECK: [[VQDMULHQ_V6_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V5_I]] to <16 x i8> 12173// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V6_I]] to <4 x i32> 12174// CHECK: ret <4 x i32> [[TMP2]] 12175int32x4_t test_vqdmulhq_n_s32(int32x4_t a, int32_t b) { 12176 return vqdmulhq_n_s32(a, b); 12177} 12178 12179 12180// CHECK-LABEL: define <4 x i32> @test_vqdmull_s16(<4 x i16> %a, <4 x i16> %b) #0 { 12181// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 12182// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 12183// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 12184// CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 12185// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]]) #4 12186// CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> 12187// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32> 12188// CHECK: ret <4 x i32> [[TMP2]] 12189int32x4_t test_vqdmull_s16(int16x4_t a, int16x4_t b) { 12190 return vqdmull_s16(a, b); 12191} 12192 12193// CHECK-LABEL: define <2 x i64> @test_vqdmull_s32(<2 x i32> %a, <2 x i32> %b) #0 { 12194// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 12195// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 12196// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 12197// CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 12198// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]]) #4 12199// CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> 12200// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64> 12201// CHECK: ret <2 x i64> [[TMP2]] 12202int64x2_t test_vqdmull_s32(int32x2_t a, int32x2_t b) { 12203 return vqdmull_s32(a, b); 12204} 12205 12206 12207// CHECK-LABEL: define <4 x i32> @test_vqdmull_lane_s16(<4 x i16> %a, <4 x i16> %b) #0 { 12208// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 12209// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 12210// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 12211// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 12212// CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 12213// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]]) #4 12214// CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> 12215// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32> 12216// CHECK: ret <4 x i32> [[TMP2]] 12217int32x4_t test_vqdmull_lane_s16(int16x4_t a, int16x4_t b) { 12218 return vqdmull_lane_s16(a, b, 3); 12219} 12220 12221// CHECK-LABEL: define <2 x i64> @test_vqdmull_lane_s32(<2 x i32> %a, <2 x i32> %b) #0 { 12222// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <2 x i32> <i32 1, i32 1> 12223// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 12224// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 12225// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 12226// CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 12227// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]]) #4 12228// CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> 12229// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64> 12230// CHECK: ret <2 x i64> [[TMP2]] 12231int64x2_t test_vqdmull_lane_s32(int32x2_t a, int32x2_t b) { 12232 return vqdmull_lane_s32(a, b, 1); 12233} 12234 12235 12236// CHECK-LABEL: define <4 x i32> @test_vqdmull_n_s16(<4 x i16> %a, i16 signext %b) #0 { 12237// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 12238// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0 12239// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1 12240// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 12241// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3 12242// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 12243// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 12244// CHECK: [[VQDMULL_V4_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 12245// CHECK: [[VQDMULL_V5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V4_I]]) #4 12246// CHECK: [[VQDMULL_V6_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V5_I]] to <16 x i8> 12247// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V6_I]] to <4 x i32> 12248// CHECK: ret <4 x i32> [[TMP2]] 12249int32x4_t test_vqdmull_n_s16(int16x4_t a, int16_t b) { 12250 return vqdmull_n_s16(a, b); 12251} 12252 12253// CHECK-LABEL: define <2 x i64> @test_vqdmull_n_s32(<2 x i32> %a, i32 %b) #0 { 12254// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 12255// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 12256// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 12257// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 12258// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 12259// CHECK: [[VQDMULL_V2_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 12260// CHECK: [[VQDMULL_V3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V2_I]]) #4 12261// CHECK: [[VQDMULL_V4_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V3_I]] to <16 x i8> 12262// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V4_I]] to <2 x i64> 12263// CHECK: ret <2 x i64> [[TMP2]] 12264int64x2_t test_vqdmull_n_s32(int32x2_t a, int32_t b) { 12265 return vqdmull_n_s32(a, b); 12266} 12267 12268 12269// CHECK-LABEL: define <8 x i8> @test_vqmovn_s16(<8 x i16> %a) #0 { 12270// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 12271// CHECK: [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 12272// CHECK: [[VQMOVN_V1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16> [[VQMOVN_V_I]]) #4 12273// CHECK: ret <8 x i8> [[VQMOVN_V1_I]] 12274int8x8_t test_vqmovn_s16(int16x8_t a) { 12275 return vqmovn_s16(a); 12276} 12277 12278// CHECK-LABEL: define <4 x i16> @test_vqmovn_s32(<4 x i32> %a) #0 { 12279// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 12280// CHECK: [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 12281// CHECK: [[VQMOVN_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32> [[VQMOVN_V_I]]) #4 12282// CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVN_V1_I]] to <8 x i8> 12283// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I]] to <4 x i16> 12284// CHECK: ret <4 x i16> [[TMP1]] 12285int16x4_t test_vqmovn_s32(int32x4_t a) { 12286 return vqmovn_s32(a); 12287} 12288 12289// CHECK-LABEL: define <2 x i32> @test_vqmovn_s64(<2 x i64> %a) #0 { 12290// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 12291// CHECK: [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 12292// CHECK: [[VQMOVN_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64> [[VQMOVN_V_I]]) #4 12293// CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVN_V1_I]] to <8 x i8> 12294// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I]] to <2 x i32> 12295// CHECK: ret <2 x i32> [[TMP1]] 12296int32x2_t test_vqmovn_s64(int64x2_t a) { 12297 return vqmovn_s64(a); 12298} 12299 12300// CHECK-LABEL: define <8 x i8> @test_vqmovn_u16(<8 x i16> %a) #0 { 12301// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 12302// CHECK: [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 12303// CHECK: [[VQMOVN_V1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> [[VQMOVN_V_I]]) #4 12304// CHECK: ret <8 x i8> [[VQMOVN_V1_I]] 12305uint8x8_t test_vqmovn_u16(uint16x8_t a) { 12306 return vqmovn_u16(a); 12307} 12308 12309// CHECK-LABEL: define <4 x i16> @test_vqmovn_u32(<4 x i32> %a) #0 { 12310// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 12311// CHECK: [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 12312// CHECK: [[VQMOVN_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32> [[VQMOVN_V_I]]) #4 12313// CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVN_V1_I]] to <8 x i8> 12314// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I]] to <4 x i16> 12315// CHECK: ret <4 x i16> [[TMP1]] 12316uint16x4_t test_vqmovn_u32(uint32x4_t a) { 12317 return vqmovn_u32(a); 12318} 12319 12320// CHECK-LABEL: define <2 x i32> @test_vqmovn_u64(<2 x i64> %a) #0 { 12321// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 12322// CHECK: [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 12323// CHECK: [[VQMOVN_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64> [[VQMOVN_V_I]]) #4 12324// CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVN_V1_I]] to <8 x i8> 12325// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I]] to <2 x i32> 12326// CHECK: ret <2 x i32> [[TMP1]] 12327uint32x2_t test_vqmovn_u64(uint64x2_t a) { 12328 return vqmovn_u64(a); 12329} 12330 12331 12332// CHECK-LABEL: define <8 x i8> @test_vqmovun_s16(<8 x i16> %a) #0 { 12333// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 12334// CHECK: [[VQMOVUN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 12335// CHECK: [[VQMOVUN_V1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16> [[VQMOVUN_V_I]]) #4 12336// CHECK: ret <8 x i8> [[VQMOVUN_V1_I]] 12337uint8x8_t test_vqmovun_s16(int16x8_t a) { 12338 return vqmovun_s16(a); 12339} 12340 12341// CHECK-LABEL: define <4 x i16> @test_vqmovun_s32(<4 x i32> %a) #0 { 12342// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 12343// CHECK: [[VQMOVUN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 12344// CHECK: [[VQMOVUN_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32> [[VQMOVUN_V_I]]) #4 12345// CHECK: [[VQMOVUN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVUN_V1_I]] to <8 x i8> 12346// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVUN_V2_I]] to <4 x i16> 12347// CHECK: ret <4 x i16> [[TMP1]] 12348uint16x4_t test_vqmovun_s32(int32x4_t a) { 12349 return vqmovun_s32(a); 12350} 12351 12352// CHECK-LABEL: define <2 x i32> @test_vqmovun_s64(<2 x i64> %a) #0 { 12353// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 12354// CHECK: [[VQMOVUN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 12355// CHECK: [[VQMOVUN_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64> [[VQMOVUN_V_I]]) #4 12356// CHECK: [[VQMOVUN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVUN_V1_I]] to <8 x i8> 12357// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVUN_V2_I]] to <2 x i32> 12358// CHECK: ret <2 x i32> [[TMP1]] 12359uint32x2_t test_vqmovun_s64(int64x2_t a) { 12360 return vqmovun_s64(a); 12361} 12362 12363 12364// CHECK-LABEL: define <8 x i8> @test_vqneg_s8(<8 x i8> %a) #0 { 12365// CHECK: [[VQNEG_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqneg.v8i8(<8 x i8> %a) #4 12366// CHECK: ret <8 x i8> [[VQNEG_V_I]] 12367int8x8_t test_vqneg_s8(int8x8_t a) { 12368 return vqneg_s8(a); 12369} 12370 12371// CHECK-LABEL: define <4 x i16> @test_vqneg_s16(<4 x i16> %a) #0 { 12372// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 12373// CHECK: [[VQNEG_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 12374// CHECK: [[VQNEG_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16> [[VQNEG_V_I]]) #4 12375// CHECK: [[VQNEG_V2_I:%.*]] = bitcast <4 x i16> [[VQNEG_V1_I]] to <8 x i8> 12376// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQNEG_V2_I]] to <4 x i16> 12377// CHECK: ret <4 x i16> [[TMP1]] 12378int16x4_t test_vqneg_s16(int16x4_t a) { 12379 return vqneg_s16(a); 12380} 12381 12382// CHECK-LABEL: define <2 x i32> @test_vqneg_s32(<2 x i32> %a) #0 { 12383// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 12384// CHECK: [[VQNEG_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 12385// CHECK: [[VQNEG_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32> [[VQNEG_V_I]]) #4 12386// CHECK: [[VQNEG_V2_I:%.*]] = bitcast <2 x i32> [[VQNEG_V1_I]] to <8 x i8> 12387// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQNEG_V2_I]] to <2 x i32> 12388// CHECK: ret <2 x i32> [[TMP1]] 12389int32x2_t test_vqneg_s32(int32x2_t a) { 12390 return vqneg_s32(a); 12391} 12392 12393// CHECK-LABEL: define <16 x i8> @test_vqnegq_s8(<16 x i8> %a) #0 { 12394// CHECK: [[VQNEGQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8> %a) #4 12395// CHECK: ret <16 x i8> [[VQNEGQ_V_I]] 12396int8x16_t test_vqnegq_s8(int8x16_t a) { 12397 return vqnegq_s8(a); 12398} 12399 12400// CHECK-LABEL: define <8 x i16> @test_vqnegq_s16(<8 x i16> %a) #0 { 12401// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 12402// CHECK: [[VQNEGQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 12403// CHECK: [[VQNEGQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16> [[VQNEGQ_V_I]]) #4 12404// CHECK: [[VQNEGQ_V2_I:%.*]] = bitcast <8 x i16> [[VQNEGQ_V1_I]] to <16 x i8> 12405// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VQNEGQ_V2_I]] to <8 x i16> 12406// CHECK: ret <8 x i16> [[TMP1]] 12407int16x8_t test_vqnegq_s16(int16x8_t a) { 12408 return vqnegq_s16(a); 12409} 12410 12411// CHECK-LABEL: define <4 x i32> @test_vqnegq_s32(<4 x i32> %a) #0 { 12412// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 12413// CHECK: [[VQNEGQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 12414// CHECK: [[VQNEGQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32> [[VQNEGQ_V_I]]) #4 12415// CHECK: [[VQNEGQ_V2_I:%.*]] = bitcast <4 x i32> [[VQNEGQ_V1_I]] to <16 x i8> 12416// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VQNEGQ_V2_I]] to <4 x i32> 12417// CHECK: ret <4 x i32> [[TMP1]] 12418int32x4_t test_vqnegq_s32(int32x4_t a) { 12419 return vqnegq_s32(a); 12420} 12421 12422 12423// CHECK-LABEL: define <4 x i16> @test_vqrdmulh_s16(<4 x i16> %a, <4 x i16> %b) #0 { 12424// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 12425// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 12426// CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 12427// CHECK: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 12428// CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> [[VQRDMULH_V_I]], <4 x i16> [[VQRDMULH_V1_I]]) #4 12429// CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8> 12430// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <4 x i16> 12431// CHECK: ret <4 x i16> [[TMP2]] 12432int16x4_t test_vqrdmulh_s16(int16x4_t a, int16x4_t b) { 12433 return vqrdmulh_s16(a, b); 12434} 12435 12436// CHECK-LABEL: define <2 x i32> @test_vqrdmulh_s32(<2 x i32> %a, <2 x i32> %b) #0 { 12437// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 12438// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 12439// CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 12440// CHECK: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 12441// CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> [[VQRDMULH_V_I]], <2 x i32> [[VQRDMULH_V1_I]]) #4 12442// CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8> 12443// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <2 x i32> 12444// CHECK: ret <2 x i32> [[TMP2]] 12445int32x2_t test_vqrdmulh_s32(int32x2_t a, int32x2_t b) { 12446 return vqrdmulh_s32(a, b); 12447} 12448 12449// CHECK-LABEL: define <8 x i16> @test_vqrdmulhq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 12450// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 12451// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 12452// CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 12453// CHECK: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 12454// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> [[VQRDMULHQ_V_I]], <8 x i16> [[VQRDMULHQ_V1_I]]) #4 12455// CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8> 12456// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <8 x i16> 12457// CHECK: ret <8 x i16> [[TMP2]] 12458int16x8_t test_vqrdmulhq_s16(int16x8_t a, int16x8_t b) { 12459 return vqrdmulhq_s16(a, b); 12460} 12461 12462// CHECK-LABEL: define <4 x i32> @test_vqrdmulhq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 12463// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 12464// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 12465// CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 12466// CHECK: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 12467// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> [[VQRDMULHQ_V_I]], <4 x i32> [[VQRDMULHQ_V1_I]]) #4 12468// CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8> 12469// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <4 x i32> 12470// CHECK: ret <4 x i32> [[TMP2]] 12471int32x4_t test_vqrdmulhq_s32(int32x4_t a, int32x4_t b) { 12472 return vqrdmulhq_s32(a, b); 12473} 12474 12475 12476// CHECK-LABEL: define <4 x i16> @test_vqrdmulh_lane_s16(<4 x i16> %a, <4 x i16> %b) #0 { 12477// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 12478// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 12479// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 12480// CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 12481// CHECK: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 12482// CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> [[VQRDMULH_V_I]], <4 x i16> [[VQRDMULH_V1_I]]) #4 12483// CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8> 12484// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <4 x i16> 12485// CHECK: ret <4 x i16> [[TMP2]] 12486int16x4_t test_vqrdmulh_lane_s16(int16x4_t a, int16x4_t b) { 12487 return vqrdmulh_lane_s16(a, b, 3); 12488} 12489 12490// CHECK-LABEL: define <2 x i32> @test_vqrdmulh_lane_s32(<2 x i32> %a, <2 x i32> %b) #0 { 12491// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <2 x i32> <i32 1, i32 1> 12492// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 12493// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 12494// CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 12495// CHECK: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 12496// CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> [[VQRDMULH_V_I]], <2 x i32> [[VQRDMULH_V1_I]]) #4 12497// CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8> 12498// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <2 x i32> 12499// CHECK: ret <2 x i32> [[TMP2]] 12500int32x2_t test_vqrdmulh_lane_s32(int32x2_t a, int32x2_t b) { 12501 return vqrdmulh_lane_s32(a, b, 1); 12502} 12503 12504// CHECK-LABEL: define <8 x i16> @test_vqrdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %b) #0 { 12505// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 12506// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 12507// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8> 12508// CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 12509// CHECK: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 12510// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> [[VQRDMULHQ_V_I]], <8 x i16> [[VQRDMULHQ_V1_I]]) #4 12511// CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8> 12512// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <8 x i16> 12513// CHECK: ret <8 x i16> [[TMP2]] 12514int16x8_t test_vqrdmulhq_lane_s16(int16x8_t a, int16x4_t b) { 12515 return vqrdmulhq_lane_s16(a, b, 3); 12516} 12517 12518// CHECK-LABEL: define <4 x i32> @test_vqrdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %b) #0 { 12519// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 12520// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 12521// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8> 12522// CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 12523// CHECK: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 12524// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> [[VQRDMULHQ_V_I]], <4 x i32> [[VQRDMULHQ_V1_I]]) #4 12525// CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8> 12526// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <4 x i32> 12527// CHECK: ret <4 x i32> [[TMP2]] 12528int32x4_t test_vqrdmulhq_lane_s32(int32x4_t a, int32x2_t b) { 12529 return vqrdmulhq_lane_s32(a, b, 1); 12530} 12531 12532 12533// CHECK-LABEL: define <4 x i16> @test_vqrdmulh_n_s16(<4 x i16> %a, i16 signext %b) #0 { 12534// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 12535// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0 12536// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1 12537// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 12538// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3 12539// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 12540// CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 12541// CHECK: [[VQRDMULH_V4_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 12542// CHECK: [[VQRDMULH_V5_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> [[VQRDMULH_V_I]], <4 x i16> [[VQRDMULH_V4_I]]) #4 12543// CHECK: [[VQRDMULH_V6_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V5_I]] to <8 x i8> 12544// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V6_I]] to <4 x i16> 12545// CHECK: ret <4 x i16> [[TMP2]] 12546int16x4_t test_vqrdmulh_n_s16(int16x4_t a, int16_t b) { 12547 return vqrdmulh_n_s16(a, b); 12548} 12549 12550// CHECK-LABEL: define <2 x i32> @test_vqrdmulh_n_s32(<2 x i32> %a, i32 %b) #0 { 12551// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 12552// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 12553// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 12554// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 12555// CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 12556// CHECK: [[VQRDMULH_V2_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 12557// CHECK: [[VQRDMULH_V3_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> [[VQRDMULH_V_I]], <2 x i32> [[VQRDMULH_V2_I]]) #4 12558// CHECK: [[VQRDMULH_V4_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V3_I]] to <8 x i8> 12559// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V4_I]] to <2 x i32> 12560// CHECK: ret <2 x i32> [[TMP2]] 12561int32x2_t test_vqrdmulh_n_s32(int32x2_t a, int32_t b) { 12562 return vqrdmulh_n_s32(a, b); 12563} 12564 12565// CHECK-LABEL: define <8 x i16> @test_vqrdmulhq_n_s16(<8 x i16> %a, i16 signext %b) #0 { 12566// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 12567// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0 12568// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1 12569// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2 12570// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3 12571// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4 12572// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5 12573// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6 12574// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7 12575// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <16 x i8> 12576// CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 12577// CHECK: [[VQRDMULHQ_V8_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 12578// CHECK: [[VQRDMULHQ_V9_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> [[VQRDMULHQ_V_I]], <8 x i16> [[VQRDMULHQ_V8_I]]) #4 12579// CHECK: [[VQRDMULHQ_V10_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V9_I]] to <16 x i8> 12580// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V10_I]] to <8 x i16> 12581// CHECK: ret <8 x i16> [[TMP2]] 12582int16x8_t test_vqrdmulhq_n_s16(int16x8_t a, int16_t b) { 12583 return vqrdmulhq_n_s16(a, b); 12584} 12585 12586// CHECK-LABEL: define <4 x i32> @test_vqrdmulhq_n_s32(<4 x i32> %a, i32 %b) #0 { 12587// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 12588// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0 12589// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1 12590// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2 12591// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3 12592// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT3_I]] to <16 x i8> 12593// CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 12594// CHECK: [[VQRDMULHQ_V4_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 12595// CHECK: [[VQRDMULHQ_V5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> [[VQRDMULHQ_V_I]], <4 x i32> [[VQRDMULHQ_V4_I]]) #4 12596// CHECK: [[VQRDMULHQ_V6_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V5_I]] to <16 x i8> 12597// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V6_I]] to <4 x i32> 12598// CHECK: ret <4 x i32> [[TMP2]] 12599int32x4_t test_vqrdmulhq_n_s32(int32x4_t a, int32_t b) { 12600 return vqrdmulhq_n_s32(a, b); 12601} 12602 12603 12604// CHECK-LABEL: define <8 x i8> @test_vqrshl_s8(<8 x i8> %a, <8 x i8> %b) #0 { 12605// CHECK: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8> %a, <8 x i8> %b) #4 12606// CHECK: ret <8 x i8> [[VQRSHL_V_I]] 12607int8x8_t test_vqrshl_s8(int8x8_t a, int8x8_t b) { 12608 return vqrshl_s8(a, b); 12609} 12610 12611// CHECK-LABEL: define <4 x i16> @test_vqrshl_s16(<4 x i16> %a, <4 x i16> %b) #0 { 12612// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 12613// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 12614// CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 12615// CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 12616// CHECK: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16> [[VQRSHL_V_I]], <4 x i16> [[VQRSHL_V1_I]]) #4 12617// CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8> 12618// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <4 x i16> 12619// CHECK: ret <4 x i16> [[TMP2]] 12620int16x4_t test_vqrshl_s16(int16x4_t a, int16x4_t b) { 12621 return vqrshl_s16(a, b); 12622} 12623 12624// CHECK-LABEL: define <2 x i32> @test_vqrshl_s32(<2 x i32> %a, <2 x i32> %b) #0 { 12625// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 12626// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 12627// CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 12628// CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 12629// CHECK: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32> [[VQRSHL_V_I]], <2 x i32> [[VQRSHL_V1_I]]) #4 12630// CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8> 12631// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <2 x i32> 12632// CHECK: ret <2 x i32> [[TMP2]] 12633int32x2_t test_vqrshl_s32(int32x2_t a, int32x2_t b) { 12634 return vqrshl_s32(a, b); 12635} 12636 12637// CHECK-LABEL: define <1 x i64> @test_vqrshl_s64(<1 x i64> %a, <1 x i64> %b) #0 { 12638// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 12639// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 12640// CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 12641// CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 12642// CHECK: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> [[VQRSHL_V_I]], <1 x i64> [[VQRSHL_V1_I]]) #4 12643// CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8> 12644// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <1 x i64> 12645// CHECK: ret <1 x i64> [[TMP2]] 12646int64x1_t test_vqrshl_s64(int64x1_t a, int64x1_t b) { 12647 return vqrshl_s64(a, b); 12648} 12649 12650// CHECK-LABEL: define <8 x i8> @test_vqrshl_u8(<8 x i8> %a, <8 x i8> %b) #0 { 12651// CHECK: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 12652// CHECK: ret <8 x i8> [[VQRSHL_V_I]] 12653uint8x8_t test_vqrshl_u8(uint8x8_t a, int8x8_t b) { 12654 return vqrshl_u8(a, b); 12655} 12656 12657// CHECK-LABEL: define <4 x i16> @test_vqrshl_u16(<4 x i16> %a, <4 x i16> %b) #0 { 12658// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 12659// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 12660// CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 12661// CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 12662// CHECK: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16> [[VQRSHL_V_I]], <4 x i16> [[VQRSHL_V1_I]]) #4 12663// CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8> 12664// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <4 x i16> 12665// CHECK: ret <4 x i16> [[TMP2]] 12666uint16x4_t test_vqrshl_u16(uint16x4_t a, int16x4_t b) { 12667 return vqrshl_u16(a, b); 12668} 12669 12670// CHECK-LABEL: define <2 x i32> @test_vqrshl_u32(<2 x i32> %a, <2 x i32> %b) #0 { 12671// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 12672// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 12673// CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 12674// CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 12675// CHECK: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32> [[VQRSHL_V_I]], <2 x i32> [[VQRSHL_V1_I]]) #4 12676// CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8> 12677// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <2 x i32> 12678// CHECK: ret <2 x i32> [[TMP2]] 12679uint32x2_t test_vqrshl_u32(uint32x2_t a, int32x2_t b) { 12680 return vqrshl_u32(a, b); 12681} 12682 12683// CHECK-LABEL: define <1 x i64> @test_vqrshl_u64(<1 x i64> %a, <1 x i64> %b) #0 { 12684// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 12685// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 12686// CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 12687// CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 12688// CHECK: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> [[VQRSHL_V_I]], <1 x i64> [[VQRSHL_V1_I]]) #4 12689// CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8> 12690// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <1 x i64> 12691// CHECK: ret <1 x i64> [[TMP2]] 12692uint64x1_t test_vqrshl_u64(uint64x1_t a, int64x1_t b) { 12693 return vqrshl_u64(a, b); 12694} 12695 12696// CHECK-LABEL: define <16 x i8> @test_vqrshlq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 12697// CHECK: [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8> %a, <16 x i8> %b) #4 12698// CHECK: ret <16 x i8> [[VQRSHLQ_V_I]] 12699int8x16_t test_vqrshlq_s8(int8x16_t a, int8x16_t b) { 12700 return vqrshlq_s8(a, b); 12701} 12702 12703// CHECK-LABEL: define <8 x i16> @test_vqrshlq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 12704// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 12705// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 12706// CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 12707// CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 12708// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16> [[VQRSHLQ_V_I]], <8 x i16> [[VQRSHLQ_V1_I]]) #4 12709// CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8> 12710// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <8 x i16> 12711// CHECK: ret <8 x i16> [[TMP2]] 12712int16x8_t test_vqrshlq_s16(int16x8_t a, int16x8_t b) { 12713 return vqrshlq_s16(a, b); 12714} 12715 12716// CHECK-LABEL: define <4 x i32> @test_vqrshlq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 12717// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 12718// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 12719// CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 12720// CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 12721// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32> [[VQRSHLQ_V_I]], <4 x i32> [[VQRSHLQ_V1_I]]) #4 12722// CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8> 12723// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <4 x i32> 12724// CHECK: ret <4 x i32> [[TMP2]] 12725int32x4_t test_vqrshlq_s32(int32x4_t a, int32x4_t b) { 12726 return vqrshlq_s32(a, b); 12727} 12728 12729// CHECK-LABEL: define <2 x i64> @test_vqrshlq_s64(<2 x i64> %a, <2 x i64> %b) #0 { 12730// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 12731// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 12732// CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 12733// CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 12734// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64> [[VQRSHLQ_V_I]], <2 x i64> [[VQRSHLQ_V1_I]]) #4 12735// CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8> 12736// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <2 x i64> 12737// CHECK: ret <2 x i64> [[TMP2]] 12738int64x2_t test_vqrshlq_s64(int64x2_t a, int64x2_t b) { 12739 return vqrshlq_s64(a, b); 12740} 12741 12742// CHECK-LABEL: define <16 x i8> @test_vqrshlq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 12743// CHECK: [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8> %a, <16 x i8> %b) #4 12744// CHECK: ret <16 x i8> [[VQRSHLQ_V_I]] 12745uint8x16_t test_vqrshlq_u8(uint8x16_t a, int8x16_t b) { 12746 return vqrshlq_u8(a, b); 12747} 12748 12749// CHECK-LABEL: define <8 x i16> @test_vqrshlq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 12750// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 12751// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 12752// CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 12753// CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 12754// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16> [[VQRSHLQ_V_I]], <8 x i16> [[VQRSHLQ_V1_I]]) #4 12755// CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8> 12756// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <8 x i16> 12757// CHECK: ret <8 x i16> [[TMP2]] 12758uint16x8_t test_vqrshlq_u16(uint16x8_t a, int16x8_t b) { 12759 return vqrshlq_u16(a, b); 12760} 12761 12762// CHECK-LABEL: define <4 x i32> @test_vqrshlq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 12763// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 12764// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 12765// CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 12766// CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 12767// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32> [[VQRSHLQ_V_I]], <4 x i32> [[VQRSHLQ_V1_I]]) #4 12768// CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8> 12769// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <4 x i32> 12770// CHECK: ret <4 x i32> [[TMP2]] 12771uint32x4_t test_vqrshlq_u32(uint32x4_t a, int32x4_t b) { 12772 return vqrshlq_u32(a, b); 12773} 12774 12775// CHECK-LABEL: define <2 x i64> @test_vqrshlq_u64(<2 x i64> %a, <2 x i64> %b) #0 { 12776// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 12777// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 12778// CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 12779// CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 12780// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64> [[VQRSHLQ_V_I]], <2 x i64> [[VQRSHLQ_V1_I]]) #4 12781// CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8> 12782// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <2 x i64> 12783// CHECK: ret <2 x i64> [[TMP2]] 12784uint64x2_t test_vqrshlq_u64(uint64x2_t a, int64x2_t b) { 12785 return vqrshlq_u64(a, b); 12786} 12787 12788 12789// CHECK-LABEL: define <8 x i8> @test_vqrshrn_n_s16(<8 x i16> %a) #0 { 12790// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 12791// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 12792// CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftns.v8i8(<8 x i16> [[VQRSHRN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>) 12793// CHECK: ret <8 x i8> [[VQRSHRN_N1]] 12794int8x8_t test_vqrshrn_n_s16(int16x8_t a) { 12795 return vqrshrn_n_s16(a, 1); 12796} 12797 12798// CHECK-LABEL: define <4 x i16> @test_vqrshrn_n_s32(<4 x i32> %a) #0 { 12799// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 12800// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 12801// CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32> [[VQRSHRN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>) 12802// CHECK: ret <4 x i16> [[VQRSHRN_N1]] 12803int16x4_t test_vqrshrn_n_s32(int32x4_t a) { 12804 return vqrshrn_n_s32(a, 1); 12805} 12806 12807// CHECK-LABEL: define <2 x i32> @test_vqrshrn_n_s64(<2 x i64> %a) #0 { 12808// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 12809// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 12810// CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftns.v2i32(<2 x i64> [[VQRSHRN_N]], <2 x i64> <i64 -1, i64 -1>) 12811// CHECK: ret <2 x i32> [[VQRSHRN_N1]] 12812int32x2_t test_vqrshrn_n_s64(int64x2_t a) { 12813 return vqrshrn_n_s64(a, 1); 12814} 12815 12816// CHECK-LABEL: define <8 x i8> @test_vqrshrn_n_u16(<8 x i16> %a) #0 { 12817// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 12818// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 12819// CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftnu.v8i8(<8 x i16> [[VQRSHRN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>) 12820// CHECK: ret <8 x i8> [[VQRSHRN_N1]] 12821uint8x8_t test_vqrshrn_n_u16(uint16x8_t a) { 12822 return vqrshrn_n_u16(a, 1); 12823} 12824 12825// CHECK-LABEL: define <4 x i16> @test_vqrshrn_n_u32(<4 x i32> %a) #0 { 12826// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 12827// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 12828// CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftnu.v4i16(<4 x i32> [[VQRSHRN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>) 12829// CHECK: ret <4 x i16> [[VQRSHRN_N1]] 12830uint16x4_t test_vqrshrn_n_u32(uint32x4_t a) { 12831 return vqrshrn_n_u32(a, 1); 12832} 12833 12834// CHECK-LABEL: define <2 x i32> @test_vqrshrn_n_u64(<2 x i64> %a) #0 { 12835// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 12836// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 12837// CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftnu.v2i32(<2 x i64> [[VQRSHRN_N]], <2 x i64> <i64 -1, i64 -1>) 12838// CHECK: ret <2 x i32> [[VQRSHRN_N1]] 12839uint32x2_t test_vqrshrn_n_u64(uint64x2_t a) { 12840 return vqrshrn_n_u64(a, 1); 12841} 12842 12843 12844// CHECK-LABEL: define <8 x i8> @test_vqrshrun_n_s16(<8 x i16> %a) #0 { 12845// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 12846// CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 12847// CHECK: [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftnsu.v8i8(<8 x i16> [[VQRSHRUN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>) 12848// CHECK: ret <8 x i8> [[VQRSHRUN_N1]] 12849uint8x8_t test_vqrshrun_n_s16(int16x8_t a) { 12850 return vqrshrun_n_s16(a, 1); 12851} 12852 12853// CHECK-LABEL: define <4 x i16> @test_vqrshrun_n_s32(<4 x i32> %a) #0 { 12854// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 12855// CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 12856// CHECK: [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftnsu.v4i16(<4 x i32> [[VQRSHRUN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>) 12857// CHECK: ret <4 x i16> [[VQRSHRUN_N1]] 12858uint16x4_t test_vqrshrun_n_s32(int32x4_t a) { 12859 return vqrshrun_n_s32(a, 1); 12860} 12861 12862// CHECK-LABEL: define <2 x i32> @test_vqrshrun_n_s64(<2 x i64> %a) #0 { 12863// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 12864// CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 12865// CHECK: [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftnsu.v2i32(<2 x i64> [[VQRSHRUN_N]], <2 x i64> <i64 -1, i64 -1>) 12866// CHECK: ret <2 x i32> [[VQRSHRUN_N1]] 12867uint32x2_t test_vqrshrun_n_s64(int64x2_t a) { 12868 return vqrshrun_n_s64(a, 1); 12869} 12870 12871 12872// CHECK-LABEL: define <8 x i8> @test_vqshl_s8(<8 x i8> %a, <8 x i8> %b) #0 { 12873// CHECK: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %a, <8 x i8> %b) #4 12874// CHECK: ret <8 x i8> [[VQSHL_V_I]] 12875int8x8_t test_vqshl_s8(int8x8_t a, int8x8_t b) { 12876 return vqshl_s8(a, b); 12877} 12878 12879// CHECK-LABEL: define <4 x i16> @test_vqshl_s16(<4 x i16> %a, <4 x i16> %b) #0 { 12880// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 12881// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 12882// CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 12883// CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 12884// CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> [[VQSHL_V_I]], <4 x i16> [[VQSHL_V1_I]]) #4 12885// CHECK: [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8> 12886// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <4 x i16> 12887// CHECK: ret <4 x i16> [[TMP2]] 12888int16x4_t test_vqshl_s16(int16x4_t a, int16x4_t b) { 12889 return vqshl_s16(a, b); 12890} 12891 12892// CHECK-LABEL: define <2 x i32> @test_vqshl_s32(<2 x i32> %a, <2 x i32> %b) #0 { 12893// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 12894// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 12895// CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 12896// CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 12897// CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> [[VQSHL_V_I]], <2 x i32> [[VQSHL_V1_I]]) #4 12898// CHECK: [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8> 12899// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <2 x i32> 12900// CHECK: ret <2 x i32> [[TMP2]] 12901int32x2_t test_vqshl_s32(int32x2_t a, int32x2_t b) { 12902 return vqshl_s32(a, b); 12903} 12904 12905// CHECK-LABEL: define <1 x i64> @test_vqshl_s64(<1 x i64> %a, <1 x i64> %b) #0 { 12906// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 12907// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 12908// CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 12909// CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 12910// CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> [[VQSHL_V_I]], <1 x i64> [[VQSHL_V1_I]]) #4 12911// CHECK: [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8> 12912// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <1 x i64> 12913// CHECK: ret <1 x i64> [[TMP2]] 12914int64x1_t test_vqshl_s64(int64x1_t a, int64x1_t b) { 12915 return vqshl_s64(a, b); 12916} 12917 12918// CHECK-LABEL: define <8 x i8> @test_vqshl_u8(<8 x i8> %a, <8 x i8> %b) #0 { 12919// CHECK: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 12920// CHECK: ret <8 x i8> [[VQSHL_V_I]] 12921uint8x8_t test_vqshl_u8(uint8x8_t a, int8x8_t b) { 12922 return vqshl_u8(a, b); 12923} 12924 12925// CHECK-LABEL: define <4 x i16> @test_vqshl_u16(<4 x i16> %a, <4 x i16> %b) #0 { 12926// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 12927// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 12928// CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 12929// CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 12930// CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> [[VQSHL_V_I]], <4 x i16> [[VQSHL_V1_I]]) #4 12931// CHECK: [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8> 12932// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <4 x i16> 12933// CHECK: ret <4 x i16> [[TMP2]] 12934uint16x4_t test_vqshl_u16(uint16x4_t a, int16x4_t b) { 12935 return vqshl_u16(a, b); 12936} 12937 12938// CHECK-LABEL: define <2 x i32> @test_vqshl_u32(<2 x i32> %a, <2 x i32> %b) #0 { 12939// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 12940// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 12941// CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 12942// CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 12943// CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> [[VQSHL_V_I]], <2 x i32> [[VQSHL_V1_I]]) #4 12944// CHECK: [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8> 12945// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <2 x i32> 12946// CHECK: ret <2 x i32> [[TMP2]] 12947uint32x2_t test_vqshl_u32(uint32x2_t a, int32x2_t b) { 12948 return vqshl_u32(a, b); 12949} 12950 12951// CHECK-LABEL: define <1 x i64> @test_vqshl_u64(<1 x i64> %a, <1 x i64> %b) #0 { 12952// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 12953// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 12954// CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 12955// CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 12956// CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> [[VQSHL_V_I]], <1 x i64> [[VQSHL_V1_I]]) #4 12957// CHECK: [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8> 12958// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <1 x i64> 12959// CHECK: ret <1 x i64> [[TMP2]] 12960uint64x1_t test_vqshl_u64(uint64x1_t a, int64x1_t b) { 12961 return vqshl_u64(a, b); 12962} 12963 12964// CHECK-LABEL: define <16 x i8> @test_vqshlq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 12965// CHECK: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %a, <16 x i8> %b) #4 12966// CHECK: ret <16 x i8> [[VQSHLQ_V_I]] 12967int8x16_t test_vqshlq_s8(int8x16_t a, int8x16_t b) { 12968 return vqshlq_s8(a, b); 12969} 12970 12971// CHECK-LABEL: define <8 x i16> @test_vqshlq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 12972// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 12973// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 12974// CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 12975// CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 12976// CHECK: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> [[VQSHLQ_V_I]], <8 x i16> [[VQSHLQ_V1_I]]) #4 12977// CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8> 12978// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <8 x i16> 12979// CHECK: ret <8 x i16> [[TMP2]] 12980int16x8_t test_vqshlq_s16(int16x8_t a, int16x8_t b) { 12981 return vqshlq_s16(a, b); 12982} 12983 12984// CHECK-LABEL: define <4 x i32> @test_vqshlq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 12985// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 12986// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 12987// CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 12988// CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 12989// CHECK: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> [[VQSHLQ_V_I]], <4 x i32> [[VQSHLQ_V1_I]]) #4 12990// CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8> 12991// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <4 x i32> 12992// CHECK: ret <4 x i32> [[TMP2]] 12993int32x4_t test_vqshlq_s32(int32x4_t a, int32x4_t b) { 12994 return vqshlq_s32(a, b); 12995} 12996 12997// CHECK-LABEL: define <2 x i64> @test_vqshlq_s64(<2 x i64> %a, <2 x i64> %b) #0 { 12998// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 12999// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 13000// CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 13001// CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 13002// CHECK: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> [[VQSHLQ_V_I]], <2 x i64> [[VQSHLQ_V1_I]]) #4 13003// CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8> 13004// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <2 x i64> 13005// CHECK: ret <2 x i64> [[TMP2]] 13006int64x2_t test_vqshlq_s64(int64x2_t a, int64x2_t b) { 13007 return vqshlq_s64(a, b); 13008} 13009 13010// CHECK-LABEL: define <16 x i8> @test_vqshlq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 13011// CHECK: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %a, <16 x i8> %b) #4 13012// CHECK: ret <16 x i8> [[VQSHLQ_V_I]] 13013uint8x16_t test_vqshlq_u8(uint8x16_t a, int8x16_t b) { 13014 return vqshlq_u8(a, b); 13015} 13016 13017// CHECK-LABEL: define <8 x i16> @test_vqshlq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 13018// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 13019// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 13020// CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 13021// CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 13022// CHECK: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> [[VQSHLQ_V_I]], <8 x i16> [[VQSHLQ_V1_I]]) #4 13023// CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8> 13024// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <8 x i16> 13025// CHECK: ret <8 x i16> [[TMP2]] 13026uint16x8_t test_vqshlq_u16(uint16x8_t a, int16x8_t b) { 13027 return vqshlq_u16(a, b); 13028} 13029 13030// CHECK-LABEL: define <4 x i32> @test_vqshlq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 13031// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 13032// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 13033// CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 13034// CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 13035// CHECK: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> [[VQSHLQ_V_I]], <4 x i32> [[VQSHLQ_V1_I]]) #4 13036// CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8> 13037// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <4 x i32> 13038// CHECK: ret <4 x i32> [[TMP2]] 13039uint32x4_t test_vqshlq_u32(uint32x4_t a, int32x4_t b) { 13040 return vqshlq_u32(a, b); 13041} 13042 13043// CHECK-LABEL: define <2 x i64> @test_vqshlq_u64(<2 x i64> %a, <2 x i64> %b) #0 { 13044// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 13045// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 13046// CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 13047// CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 13048// CHECK: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> [[VQSHLQ_V_I]], <2 x i64> [[VQSHLQ_V1_I]]) #4 13049// CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8> 13050// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <2 x i64> 13051// CHECK: ret <2 x i64> [[TMP2]] 13052uint64x2_t test_vqshlq_u64(uint64x2_t a, int64x2_t b) { 13053 return vqshlq_u64(a, b); 13054} 13055 13056 13057// CHECK-LABEL: define <8 x i8> @test_vqshlu_n_s8(<8 x i8> %a) #0 { 13058// CHECK: [[VQSHLU_N:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftsu.v8i8(<8 x i8> %a, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>) 13059// CHECK: ret <8 x i8> [[VQSHLU_N]] 13060uint8x8_t test_vqshlu_n_s8(int8x8_t a) { 13061 return vqshlu_n_s8(a, 1); 13062} 13063 13064// CHECK-LABEL: define <4 x i16> @test_vqshlu_n_s16(<4 x i16> %a) #0 { 13065// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 13066// CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 13067// CHECK: [[VQSHLU_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftsu.v4i16(<4 x i16> [[VQSHLU_N]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>) 13068// CHECK: ret <4 x i16> [[VQSHLU_N1]] 13069uint16x4_t test_vqshlu_n_s16(int16x4_t a) { 13070 return vqshlu_n_s16(a, 1); 13071} 13072 13073// CHECK-LABEL: define <2 x i32> @test_vqshlu_n_s32(<2 x i32> %a) #0 { 13074// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 13075// CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 13076// CHECK: [[VQSHLU_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftsu.v2i32(<2 x i32> [[VQSHLU_N]], <2 x i32> <i32 1, i32 1>) 13077// CHECK: ret <2 x i32> [[VQSHLU_N1]] 13078uint32x2_t test_vqshlu_n_s32(int32x2_t a) { 13079 return vqshlu_n_s32(a, 1); 13080} 13081 13082// CHECK-LABEL: define <1 x i64> @test_vqshlu_n_s64(<1 x i64> %a) #0 { 13083// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 13084// CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 13085// CHECK: [[VQSHLU_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vqshiftsu.v1i64(<1 x i64> [[VQSHLU_N]], <1 x i64> <i64 1>) 13086// CHECK: ret <1 x i64> [[VQSHLU_N1]] 13087uint64x1_t test_vqshlu_n_s64(int64x1_t a) { 13088 return vqshlu_n_s64(a, 1); 13089} 13090 13091// CHECK-LABEL: define <16 x i8> @test_vqshluq_n_s8(<16 x i8> %a) #0 { 13092// CHECK: [[VQSHLU_N:%.*]] = call <16 x i8> @llvm.arm.neon.vqshiftsu.v16i8(<16 x i8> %a, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>) 13093// CHECK: ret <16 x i8> [[VQSHLU_N]] 13094uint8x16_t test_vqshluq_n_s8(int8x16_t a) { 13095 return vqshluq_n_s8(a, 1); 13096} 13097 13098// CHECK-LABEL: define <8 x i16> @test_vqshluq_n_s16(<8 x i16> %a) #0 { 13099// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 13100// CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 13101// CHECK: [[VQSHLU_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vqshiftsu.v8i16(<8 x i16> [[VQSHLU_N]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) 13102// CHECK: ret <8 x i16> [[VQSHLU_N1]] 13103uint16x8_t test_vqshluq_n_s16(int16x8_t a) { 13104 return vqshluq_n_s16(a, 1); 13105} 13106 13107// CHECK-LABEL: define <4 x i32> @test_vqshluq_n_s32(<4 x i32> %a) #0 { 13108// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 13109// CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 13110// CHECK: [[VQSHLU_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vqshiftsu.v4i32(<4 x i32> [[VQSHLU_N]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>) 13111// CHECK: ret <4 x i32> [[VQSHLU_N1]] 13112uint32x4_t test_vqshluq_n_s32(int32x4_t a) { 13113 return vqshluq_n_s32(a, 1); 13114} 13115 13116// CHECK-LABEL: define <2 x i64> @test_vqshluq_n_s64(<2 x i64> %a) #0 { 13117// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 13118// CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 13119// CHECK: [[VQSHLU_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vqshiftsu.v2i64(<2 x i64> [[VQSHLU_N]], <2 x i64> <i64 1, i64 1>) 13120// CHECK: ret <2 x i64> [[VQSHLU_N1]] 13121uint64x2_t test_vqshluq_n_s64(int64x2_t a) { 13122 return vqshluq_n_s64(a, 1); 13123} 13124 13125 13126// CHECK-LABEL: define <8 x i8> @test_vqshl_n_s8(<8 x i8> %a) #0 { 13127// CHECK: [[VQSHL_N:%.*]] = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %a, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>) 13128// CHECK: ret <8 x i8> [[VQSHL_N]] 13129int8x8_t test_vqshl_n_s8(int8x8_t a) { 13130 return vqshl_n_s8(a, 1); 13131} 13132 13133// CHECK-LABEL: define <4 x i16> @test_vqshl_n_s16(<4 x i16> %a) #0 { 13134// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 13135// CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 13136// CHECK: [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>) 13137// CHECK: ret <4 x i16> [[VQSHL_N1]] 13138int16x4_t test_vqshl_n_s16(int16x4_t a) { 13139 return vqshl_n_s16(a, 1); 13140} 13141 13142// CHECK-LABEL: define <2 x i32> @test_vqshl_n_s32(<2 x i32> %a) #0 { 13143// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 13144// CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 13145// CHECK: [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> <i32 1, i32 1>) 13146// CHECK: ret <2 x i32> [[VQSHL_N1]] 13147int32x2_t test_vqshl_n_s32(int32x2_t a) { 13148 return vqshl_n_s32(a, 1); 13149} 13150 13151// CHECK-LABEL: define <1 x i64> @test_vqshl_n_s64(<1 x i64> %a) #0 { 13152// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 13153// CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 13154// CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> <i64 1>) 13155// CHECK: ret <1 x i64> [[VQSHL_N1]] 13156int64x1_t test_vqshl_n_s64(int64x1_t a) { 13157 return vqshl_n_s64(a, 1); 13158} 13159 13160// CHECK-LABEL: define <8 x i8> @test_vqshl_n_u8(<8 x i8> %a) #0 { 13161// CHECK: [[VQSHL_N:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %a, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>) 13162// CHECK: ret <8 x i8> [[VQSHL_N]] 13163uint8x8_t test_vqshl_n_u8(uint8x8_t a) { 13164 return vqshl_n_u8(a, 1); 13165} 13166 13167// CHECK-LABEL: define <4 x i16> @test_vqshl_n_u16(<4 x i16> %a) #0 { 13168// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 13169// CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 13170// CHECK: [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>) 13171// CHECK: ret <4 x i16> [[VQSHL_N1]] 13172uint16x4_t test_vqshl_n_u16(uint16x4_t a) { 13173 return vqshl_n_u16(a, 1); 13174} 13175 13176// CHECK-LABEL: define <2 x i32> @test_vqshl_n_u32(<2 x i32> %a) #0 { 13177// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 13178// CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 13179// CHECK: [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> <i32 1, i32 1>) 13180// CHECK: ret <2 x i32> [[VQSHL_N1]] 13181uint32x2_t test_vqshl_n_u32(uint32x2_t a) { 13182 return vqshl_n_u32(a, 1); 13183} 13184 13185// CHECK-LABEL: define <1 x i64> @test_vqshl_n_u64(<1 x i64> %a) #0 { 13186// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 13187// CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 13188// CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> <i64 1>) 13189// CHECK: ret <1 x i64> [[VQSHL_N1]] 13190uint64x1_t test_vqshl_n_u64(uint64x1_t a) { 13191 return vqshl_n_u64(a, 1); 13192} 13193 13194// CHECK-LABEL: define <16 x i8> @test_vqshlq_n_s8(<16 x i8> %a) #0 { 13195// CHECK: [[VQSHL_N:%.*]] = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %a, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>) 13196// CHECK: ret <16 x i8> [[VQSHL_N]] 13197int8x16_t test_vqshlq_n_s8(int8x16_t a) { 13198 return vqshlq_n_s8(a, 1); 13199} 13200 13201// CHECK-LABEL: define <8 x i16> @test_vqshlq_n_s16(<8 x i16> %a) #0 { 13202// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 13203// CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 13204// CHECK: [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) 13205// CHECK: ret <8 x i16> [[VQSHL_N1]] 13206int16x8_t test_vqshlq_n_s16(int16x8_t a) { 13207 return vqshlq_n_s16(a, 1); 13208} 13209 13210// CHECK-LABEL: define <4 x i32> @test_vqshlq_n_s32(<4 x i32> %a) #0 { 13211// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 13212// CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 13213// CHECK: [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>) 13214// CHECK: ret <4 x i32> [[VQSHL_N1]] 13215int32x4_t test_vqshlq_n_s32(int32x4_t a) { 13216 return vqshlq_n_s32(a, 1); 13217} 13218 13219// CHECK-LABEL: define <2 x i64> @test_vqshlq_n_s64(<2 x i64> %a) #0 { 13220// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 13221// CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 13222// CHECK: [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> <i64 1, i64 1>) 13223// CHECK: ret <2 x i64> [[VQSHL_N1]] 13224int64x2_t test_vqshlq_n_s64(int64x2_t a) { 13225 return vqshlq_n_s64(a, 1); 13226} 13227 13228// CHECK-LABEL: define <16 x i8> @test_vqshlq_n_u8(<16 x i8> %a) #0 { 13229// CHECK: [[VQSHL_N:%.*]] = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %a, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>) 13230// CHECK: ret <16 x i8> [[VQSHL_N]] 13231uint8x16_t test_vqshlq_n_u8(uint8x16_t a) { 13232 return vqshlq_n_u8(a, 1); 13233} 13234 13235// CHECK-LABEL: define <8 x i16> @test_vqshlq_n_u16(<8 x i16> %a) #0 { 13236// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 13237// CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 13238// CHECK: [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) 13239// CHECK: ret <8 x i16> [[VQSHL_N1]] 13240uint16x8_t test_vqshlq_n_u16(uint16x8_t a) { 13241 return vqshlq_n_u16(a, 1); 13242} 13243 13244// CHECK-LABEL: define <4 x i32> @test_vqshlq_n_u32(<4 x i32> %a) #0 { 13245// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 13246// CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 13247// CHECK: [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>) 13248// CHECK: ret <4 x i32> [[VQSHL_N1]] 13249uint32x4_t test_vqshlq_n_u32(uint32x4_t a) { 13250 return vqshlq_n_u32(a, 1); 13251} 13252 13253// CHECK-LABEL: define <2 x i64> @test_vqshlq_n_u64(<2 x i64> %a) #0 { 13254// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 13255// CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 13256// CHECK: [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> <i64 1, i64 1>) 13257// CHECK: ret <2 x i64> [[VQSHL_N1]] 13258uint64x2_t test_vqshlq_n_u64(uint64x2_t a) { 13259 return vqshlq_n_u64(a, 1); 13260} 13261 13262 13263// CHECK-LABEL: define <8 x i8> @test_vqshrn_n_s16(<8 x i16> %a) #0 { 13264// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 13265// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 13266// CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftns.v8i8(<8 x i16> [[VQSHRN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>) 13267// CHECK: ret <8 x i8> [[VQSHRN_N1]] 13268int8x8_t test_vqshrn_n_s16(int16x8_t a) { 13269 return vqshrn_n_s16(a, 1); 13270} 13271 13272// CHECK-LABEL: define <4 x i16> @test_vqshrn_n_s32(<4 x i32> %a) #0 { 13273// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 13274// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 13275// CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftns.v4i16(<4 x i32> [[VQSHRN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>) 13276// CHECK: ret <4 x i16> [[VQSHRN_N1]] 13277int16x4_t test_vqshrn_n_s32(int32x4_t a) { 13278 return vqshrn_n_s32(a, 1); 13279} 13280 13281// CHECK-LABEL: define <2 x i32> @test_vqshrn_n_s64(<2 x i64> %a) #0 { 13282// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 13283// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 13284// CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftns.v2i32(<2 x i64> [[VQSHRN_N]], <2 x i64> <i64 -1, i64 -1>) 13285// CHECK: ret <2 x i32> [[VQSHRN_N1]] 13286int32x2_t test_vqshrn_n_s64(int64x2_t a) { 13287 return vqshrn_n_s64(a, 1); 13288} 13289 13290// CHECK-LABEL: define <8 x i8> @test_vqshrn_n_u16(<8 x i16> %a) #0 { 13291// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 13292// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 13293// CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftnu.v8i8(<8 x i16> [[VQSHRN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>) 13294// CHECK: ret <8 x i8> [[VQSHRN_N1]] 13295uint8x8_t test_vqshrn_n_u16(uint16x8_t a) { 13296 return vqshrn_n_u16(a, 1); 13297} 13298 13299// CHECK-LABEL: define <4 x i16> @test_vqshrn_n_u32(<4 x i32> %a) #0 { 13300// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 13301// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 13302// CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftnu.v4i16(<4 x i32> [[VQSHRN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>) 13303// CHECK: ret <4 x i16> [[VQSHRN_N1]] 13304uint16x4_t test_vqshrn_n_u32(uint32x4_t a) { 13305 return vqshrn_n_u32(a, 1); 13306} 13307 13308// CHECK-LABEL: define <2 x i32> @test_vqshrn_n_u64(<2 x i64> %a) #0 { 13309// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 13310// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 13311// CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftnu.v2i32(<2 x i64> [[VQSHRN_N]], <2 x i64> <i64 -1, i64 -1>) 13312// CHECK: ret <2 x i32> [[VQSHRN_N1]] 13313uint32x2_t test_vqshrn_n_u64(uint64x2_t a) { 13314 return vqshrn_n_u64(a, 1); 13315} 13316 13317 13318// CHECK-LABEL: define <8 x i8> @test_vqshrun_n_s16(<8 x i16> %a) #0 { 13319// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 13320// CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 13321// CHECK: [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftnsu.v8i8(<8 x i16> [[VQSHRUN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>) 13322// CHECK: ret <8 x i8> [[VQSHRUN_N1]] 13323uint8x8_t test_vqshrun_n_s16(int16x8_t a) { 13324 return vqshrun_n_s16(a, 1); 13325} 13326 13327// CHECK-LABEL: define <4 x i16> @test_vqshrun_n_s32(<4 x i32> %a) #0 { 13328// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 13329// CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 13330// CHECK: [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32> [[VQSHRUN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>) 13331// CHECK: ret <4 x i16> [[VQSHRUN_N1]] 13332uint16x4_t test_vqshrun_n_s32(int32x4_t a) { 13333 return vqshrun_n_s32(a, 1); 13334} 13335 13336// CHECK-LABEL: define <2 x i32> @test_vqshrun_n_s64(<2 x i64> %a) #0 { 13337// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 13338// CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 13339// CHECK: [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64> [[VQSHRUN_N]], <2 x i64> <i64 -1, i64 -1>) 13340// CHECK: ret <2 x i32> [[VQSHRUN_N1]] 13341uint32x2_t test_vqshrun_n_s64(int64x2_t a) { 13342 return vqshrun_n_s64(a, 1); 13343} 13344 13345 13346// CHECK-LABEL: define <8 x i8> @test_vqsub_s8(<8 x i8> %a, <8 x i8> %b) #0 { 13347// CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqsubs.v8i8(<8 x i8> %a, <8 x i8> %b) #4 13348// CHECK: ret <8 x i8> [[VQSUB_V_I]] 13349int8x8_t test_vqsub_s8(int8x8_t a, int8x8_t b) { 13350 return vqsub_s8(a, b); 13351} 13352 13353// CHECK-LABEL: define <4 x i16> @test_vqsub_s16(<4 x i16> %a, <4 x i16> %b) #0 { 13354// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 13355// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 13356// CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 13357// CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 13358// CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> [[VQSUB_V_I]], <4 x i16> [[VQSUB_V1_I]]) #4 13359// CHECK: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8> 13360// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <4 x i16> 13361// CHECK: ret <4 x i16> [[TMP2]] 13362int16x4_t test_vqsub_s16(int16x4_t a, int16x4_t b) { 13363 return vqsub_s16(a, b); 13364} 13365 13366// CHECK-LABEL: define <2 x i32> @test_vqsub_s32(<2 x i32> %a, <2 x i32> %b) #0 { 13367// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 13368// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 13369// CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 13370// CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 13371// CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> [[VQSUB_V_I]], <2 x i32> [[VQSUB_V1_I]]) #4 13372// CHECK: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8> 13373// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <2 x i32> 13374// CHECK: ret <2 x i32> [[TMP2]] 13375int32x2_t test_vqsub_s32(int32x2_t a, int32x2_t b) { 13376 return vqsub_s32(a, b); 13377} 13378 13379// CHECK-LABEL: define <1 x i64> @test_vqsub_s64(<1 x i64> %a, <1 x i64> %b) #0 { 13380// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 13381// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 13382// CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 13383// CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 13384// CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> [[VQSUB_V_I]], <1 x i64> [[VQSUB_V1_I]]) #4 13385// CHECK: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8> 13386// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <1 x i64> 13387// CHECK: ret <1 x i64> [[TMP2]] 13388int64x1_t test_vqsub_s64(int64x1_t a, int64x1_t b) { 13389 return vqsub_s64(a, b); 13390} 13391 13392// CHECK-LABEL: define <8 x i8> @test_vqsub_u8(<8 x i8> %a, <8 x i8> %b) #0 { 13393// CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqsubu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 13394// CHECK: ret <8 x i8> [[VQSUB_V_I]] 13395uint8x8_t test_vqsub_u8(uint8x8_t a, uint8x8_t b) { 13396 return vqsub_u8(a, b); 13397} 13398 13399// CHECK-LABEL: define <4 x i16> @test_vqsub_u16(<4 x i16> %a, <4 x i16> %b) #0 { 13400// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 13401// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 13402// CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 13403// CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 13404// CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqsubu.v4i16(<4 x i16> [[VQSUB_V_I]], <4 x i16> [[VQSUB_V1_I]]) #4 13405// CHECK: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8> 13406// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <4 x i16> 13407// CHECK: ret <4 x i16> [[TMP2]] 13408uint16x4_t test_vqsub_u16(uint16x4_t a, uint16x4_t b) { 13409 return vqsub_u16(a, b); 13410} 13411 13412// CHECK-LABEL: define <2 x i32> @test_vqsub_u32(<2 x i32> %a, <2 x i32> %b) #0 { 13413// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 13414// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 13415// CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 13416// CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 13417// CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqsubu.v2i32(<2 x i32> [[VQSUB_V_I]], <2 x i32> [[VQSUB_V1_I]]) #4 13418// CHECK: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8> 13419// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <2 x i32> 13420// CHECK: ret <2 x i32> [[TMP2]] 13421uint32x2_t test_vqsub_u32(uint32x2_t a, uint32x2_t b) { 13422 return vqsub_u32(a, b); 13423} 13424 13425// CHECK-LABEL: define <1 x i64> @test_vqsub_u64(<1 x i64> %a, <1 x i64> %b) #0 { 13426// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 13427// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 13428// CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 13429// CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 13430// CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> [[VQSUB_V_I]], <1 x i64> [[VQSUB_V1_I]]) #4 13431// CHECK: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8> 13432// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <1 x i64> 13433// CHECK: ret <1 x i64> [[TMP2]] 13434uint64x1_t test_vqsub_u64(uint64x1_t a, uint64x1_t b) { 13435 return vqsub_u64(a, b); 13436} 13437 13438// CHECK-LABEL: define <16 x i8> @test_vqsubq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 13439// CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqsubs.v16i8(<16 x i8> %a, <16 x i8> %b) #4 13440// CHECK: ret <16 x i8> [[VQSUBQ_V_I]] 13441int8x16_t test_vqsubq_s8(int8x16_t a, int8x16_t b) { 13442 return vqsubq_s8(a, b); 13443} 13444 13445// CHECK-LABEL: define <8 x i16> @test_vqsubq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 13446// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 13447// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 13448// CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 13449// CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 13450// CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> [[VQSUBQ_V_I]], <8 x i16> [[VQSUBQ_V1_I]]) #4 13451// CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8> 13452// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <8 x i16> 13453// CHECK: ret <8 x i16> [[TMP2]] 13454int16x8_t test_vqsubq_s16(int16x8_t a, int16x8_t b) { 13455 return vqsubq_s16(a, b); 13456} 13457 13458// CHECK-LABEL: define <4 x i32> @test_vqsubq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 13459// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 13460// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 13461// CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 13462// CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 13463// CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> [[VQSUBQ_V_I]], <4 x i32> [[VQSUBQ_V1_I]]) #4 13464// CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8> 13465// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <4 x i32> 13466// CHECK: ret <4 x i32> [[TMP2]] 13467int32x4_t test_vqsubq_s32(int32x4_t a, int32x4_t b) { 13468 return vqsubq_s32(a, b); 13469} 13470 13471// CHECK-LABEL: define <2 x i64> @test_vqsubq_s64(<2 x i64> %a, <2 x i64> %b) #0 { 13472// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 13473// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 13474// CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 13475// CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 13476// CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> [[VQSUBQ_V_I]], <2 x i64> [[VQSUBQ_V1_I]]) #4 13477// CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8> 13478// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <2 x i64> 13479// CHECK: ret <2 x i64> [[TMP2]] 13480int64x2_t test_vqsubq_s64(int64x2_t a, int64x2_t b) { 13481 return vqsubq_s64(a, b); 13482} 13483 13484// CHECK-LABEL: define <16 x i8> @test_vqsubq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 13485// CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqsubu.v16i8(<16 x i8> %a, <16 x i8> %b) #4 13486// CHECK: ret <16 x i8> [[VQSUBQ_V_I]] 13487uint8x16_t test_vqsubq_u8(uint8x16_t a, uint8x16_t b) { 13488 return vqsubq_u8(a, b); 13489} 13490 13491// CHECK-LABEL: define <8 x i16> @test_vqsubq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 13492// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 13493// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 13494// CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 13495// CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 13496// CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16> [[VQSUBQ_V_I]], <8 x i16> [[VQSUBQ_V1_I]]) #4 13497// CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8> 13498// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <8 x i16> 13499// CHECK: ret <8 x i16> [[TMP2]] 13500uint16x8_t test_vqsubq_u16(uint16x8_t a, uint16x8_t b) { 13501 return vqsubq_u16(a, b); 13502} 13503 13504// CHECK-LABEL: define <4 x i32> @test_vqsubq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 13505// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 13506// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 13507// CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 13508// CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 13509// CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqsubu.v4i32(<4 x i32> [[VQSUBQ_V_I]], <4 x i32> [[VQSUBQ_V1_I]]) #4 13510// CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8> 13511// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <4 x i32> 13512// CHECK: ret <4 x i32> [[TMP2]] 13513uint32x4_t test_vqsubq_u32(uint32x4_t a, uint32x4_t b) { 13514 return vqsubq_u32(a, b); 13515} 13516 13517// CHECK-LABEL: define <2 x i64> @test_vqsubq_u64(<2 x i64> %a, <2 x i64> %b) #0 { 13518// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 13519// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 13520// CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 13521// CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 13522// CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqsubu.v2i64(<2 x i64> [[VQSUBQ_V_I]], <2 x i64> [[VQSUBQ_V1_I]]) #4 13523// CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8> 13524// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <2 x i64> 13525// CHECK: ret <2 x i64> [[TMP2]] 13526uint64x2_t test_vqsubq_u64(uint64x2_t a, uint64x2_t b) { 13527 return vqsubq_u64(a, b); 13528} 13529 13530 13531// CHECK-LABEL: define <8 x i8> @test_vraddhn_s16(<8 x i16> %a, <8 x i16> %b) #0 { 13532// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 13533// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 13534// CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 13535// CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 13536// CHECK: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> [[VRADDHN_V_I]], <8 x i16> [[VRADDHN_V1_I]]) #4 13537// CHECK: ret <8 x i8> [[VRADDHN_V2_I]] 13538int8x8_t test_vraddhn_s16(int16x8_t a, int16x8_t b) { 13539 return vraddhn_s16(a, b); 13540} 13541 13542// CHECK-LABEL: define <4 x i16> @test_vraddhn_s32(<4 x i32> %a, <4 x i32> %b) #0 { 13543// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 13544// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 13545// CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 13546// CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 13547// CHECK: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> [[VRADDHN_V_I]], <4 x i32> [[VRADDHN_V1_I]]) #4 13548// CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8> 13549// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <4 x i16> 13550// CHECK: ret <4 x i16> [[TMP2]] 13551int16x4_t test_vraddhn_s32(int32x4_t a, int32x4_t b) { 13552 return vraddhn_s32(a, b); 13553} 13554 13555// CHECK-LABEL: define <2 x i32> @test_vraddhn_s64(<2 x i64> %a, <2 x i64> %b) #0 { 13556// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 13557// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 13558// CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 13559// CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 13560// CHECK: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> [[VRADDHN_V_I]], <2 x i64> [[VRADDHN_V1_I]]) #4 13561// CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8> 13562// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <2 x i32> 13563// CHECK: ret <2 x i32> [[TMP2]] 13564int32x2_t test_vraddhn_s64(int64x2_t a, int64x2_t b) { 13565 return vraddhn_s64(a, b); 13566} 13567 13568// CHECK-LABEL: define <8 x i8> @test_vraddhn_u16(<8 x i16> %a, <8 x i16> %b) #0 { 13569// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 13570// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 13571// CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 13572// CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 13573// CHECK: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> [[VRADDHN_V_I]], <8 x i16> [[VRADDHN_V1_I]]) #4 13574// CHECK: ret <8 x i8> [[VRADDHN_V2_I]] 13575uint8x8_t test_vraddhn_u16(uint16x8_t a, uint16x8_t b) { 13576 return vraddhn_u16(a, b); 13577} 13578 13579// CHECK-LABEL: define <4 x i16> @test_vraddhn_u32(<4 x i32> %a, <4 x i32> %b) #0 { 13580// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 13581// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 13582// CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 13583// CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 13584// CHECK: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> [[VRADDHN_V_I]], <4 x i32> [[VRADDHN_V1_I]]) #4 13585// CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8> 13586// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <4 x i16> 13587// CHECK: ret <4 x i16> [[TMP2]] 13588uint16x4_t test_vraddhn_u32(uint32x4_t a, uint32x4_t b) { 13589 return vraddhn_u32(a, b); 13590} 13591 13592// CHECK-LABEL: define <2 x i32> @test_vraddhn_u64(<2 x i64> %a, <2 x i64> %b) #0 { 13593// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 13594// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 13595// CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 13596// CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 13597// CHECK: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> [[VRADDHN_V_I]], <2 x i64> [[VRADDHN_V1_I]]) #4 13598// CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8> 13599// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <2 x i32> 13600// CHECK: ret <2 x i32> [[TMP2]] 13601uint32x2_t test_vraddhn_u64(uint64x2_t a, uint64x2_t b) { 13602 return vraddhn_u64(a, b); 13603} 13604 13605 13606// CHECK-LABEL: define <2 x float> @test_vrecpe_f32(<2 x float> %a) #0 { 13607// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 13608// CHECK: [[VRECPE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 13609// CHECK: [[VRECPE_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> [[VRECPE_V_I]]) #4 13610// CHECK: ret <2 x float> [[VRECPE_V1_I]] 13611float32x2_t test_vrecpe_f32(float32x2_t a) { 13612 return vrecpe_f32(a); 13613} 13614 13615// CHECK-LABEL: define <2 x i32> @test_vrecpe_u32(<2 x i32> %a) #0 { 13616// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 13617// CHECK: [[VRECPE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 13618// CHECK: [[VRECPE_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32> [[VRECPE_V_I]]) #4 13619// CHECK: ret <2 x i32> [[VRECPE_V1_I]] 13620uint32x2_t test_vrecpe_u32(uint32x2_t a) { 13621 return vrecpe_u32(a); 13622} 13623 13624// CHECK-LABEL: define <4 x float> @test_vrecpeq_f32(<4 x float> %a) #0 { 13625// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 13626// CHECK: [[VRECPEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 13627// CHECK: [[VRECPEQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> [[VRECPEQ_V_I]]) #4 13628// CHECK: ret <4 x float> [[VRECPEQ_V1_I]] 13629float32x4_t test_vrecpeq_f32(float32x4_t a) { 13630 return vrecpeq_f32(a); 13631} 13632 13633// CHECK-LABEL: define <4 x i32> @test_vrecpeq_u32(<4 x i32> %a) #0 { 13634// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 13635// CHECK: [[VRECPEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 13636// CHECK: [[VRECPEQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32> [[VRECPEQ_V_I]]) #4 13637// CHECK: ret <4 x i32> [[VRECPEQ_V1_I]] 13638uint32x4_t test_vrecpeq_u32(uint32x4_t a) { 13639 return vrecpeq_u32(a); 13640} 13641 13642 13643// CHECK-LABEL: define <2 x float> @test_vrecps_f32(<2 x float> %a, <2 x float> %b) #0 { 13644// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 13645// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 13646// CHECK: [[VRECPS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 13647// CHECK: [[VRECPS_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 13648// CHECK: [[VRECPS_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float> [[VRECPS_V_I]], <2 x float> [[VRECPS_V1_I]]) #4 13649// CHECK: [[VRECPS_V3_I:%.*]] = bitcast <2 x float> [[VRECPS_V2_I]] to <8 x i8> 13650// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRECPS_V3_I]] to <2 x float> 13651// CHECK: ret <2 x float> [[TMP2]] 13652float32x2_t test_vrecps_f32(float32x2_t a, float32x2_t b) { 13653 return vrecps_f32(a, b); 13654} 13655 13656// CHECK-LABEL: define <4 x float> @test_vrecpsq_f32(<4 x float> %a, <4 x float> %b) #0 { 13657// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 13658// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 13659// CHECK: [[VRECPSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 13660// CHECK: [[VRECPSQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 13661// CHECK: [[VRECPSQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> [[VRECPSQ_V_I]], <4 x float> [[VRECPSQ_V1_I]]) #4 13662// CHECK: [[VRECPSQ_V3_I:%.*]] = bitcast <4 x float> [[VRECPSQ_V2_I]] to <16 x i8> 13663// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRECPSQ_V3_I]] to <4 x float> 13664// CHECK: ret <4 x float> [[TMP2]] 13665float32x4_t test_vrecpsq_f32(float32x4_t a, float32x4_t b) { 13666 return vrecpsq_f32(a, b); 13667} 13668 13669 13670// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_s16(<4 x i16> %a) #0 { 13671// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 13672// CHECK: ret <8 x i8> [[TMP0]] 13673int8x8_t test_vreinterpret_s8_s16(int16x4_t a) { 13674 return vreinterpret_s8_s16(a); 13675} 13676 13677// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_s32(<2 x i32> %a) #0 { 13678// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 13679// CHECK: ret <8 x i8> [[TMP0]] 13680int8x8_t test_vreinterpret_s8_s32(int32x2_t a) { 13681 return vreinterpret_s8_s32(a); 13682} 13683 13684// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_s64(<1 x i64> %a) #0 { 13685// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 13686// CHECK: ret <8 x i8> [[TMP0]] 13687int8x8_t test_vreinterpret_s8_s64(int64x1_t a) { 13688 return vreinterpret_s8_s64(a); 13689} 13690 13691// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_u8(<8 x i8> %a) #0 { 13692// CHECK: ret <8 x i8> %a 13693int8x8_t test_vreinterpret_s8_u8(uint8x8_t a) { 13694 return vreinterpret_s8_u8(a); 13695} 13696 13697// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_u16(<4 x i16> %a) #0 { 13698// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 13699// CHECK: ret <8 x i8> [[TMP0]] 13700int8x8_t test_vreinterpret_s8_u16(uint16x4_t a) { 13701 return vreinterpret_s8_u16(a); 13702} 13703 13704// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_u32(<2 x i32> %a) #0 { 13705// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 13706// CHECK: ret <8 x i8> [[TMP0]] 13707int8x8_t test_vreinterpret_s8_u32(uint32x2_t a) { 13708 return vreinterpret_s8_u32(a); 13709} 13710 13711// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_u64(<1 x i64> %a) #0 { 13712// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 13713// CHECK: ret <8 x i8> [[TMP0]] 13714int8x8_t test_vreinterpret_s8_u64(uint64x1_t a) { 13715 return vreinterpret_s8_u64(a); 13716} 13717 13718// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_f16(<4 x half> %a) #0 { 13719// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8> 13720// CHECK: ret <8 x i8> [[TMP0]] 13721int8x8_t test_vreinterpret_s8_f16(float16x4_t a) { 13722 return vreinterpret_s8_f16(a); 13723} 13724 13725// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_f32(<2 x float> %a) #0 { 13726// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 13727// CHECK: ret <8 x i8> [[TMP0]] 13728int8x8_t test_vreinterpret_s8_f32(float32x2_t a) { 13729 return vreinterpret_s8_f32(a); 13730} 13731 13732// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_p8(<8 x i8> %a) #0 { 13733// CHECK: ret <8 x i8> %a 13734int8x8_t test_vreinterpret_s8_p8(poly8x8_t a) { 13735 return vreinterpret_s8_p8(a); 13736} 13737 13738// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_p16(<4 x i16> %a) #0 { 13739// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 13740// CHECK: ret <8 x i8> [[TMP0]] 13741int8x8_t test_vreinterpret_s8_p16(poly16x4_t a) { 13742 return vreinterpret_s8_p16(a); 13743} 13744 13745// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_s8(<8 x i8> %a) #0 { 13746// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> 13747// CHECK: ret <4 x i16> [[TMP0]] 13748int16x4_t test_vreinterpret_s16_s8(int8x8_t a) { 13749 return vreinterpret_s16_s8(a); 13750} 13751 13752// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_s32(<2 x i32> %a) #0 { 13753// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16> 13754// CHECK: ret <4 x i16> [[TMP0]] 13755int16x4_t test_vreinterpret_s16_s32(int32x2_t a) { 13756 return vreinterpret_s16_s32(a); 13757} 13758 13759// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_s64(<1 x i64> %a) #0 { 13760// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> 13761// CHECK: ret <4 x i16> [[TMP0]] 13762int16x4_t test_vreinterpret_s16_s64(int64x1_t a) { 13763 return vreinterpret_s16_s64(a); 13764} 13765 13766// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_u8(<8 x i8> %a) #0 { 13767// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> 13768// CHECK: ret <4 x i16> [[TMP0]] 13769int16x4_t test_vreinterpret_s16_u8(uint8x8_t a) { 13770 return vreinterpret_s16_u8(a); 13771} 13772 13773// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_u16(<4 x i16> %a) #0 { 13774// CHECK: ret <4 x i16> %a 13775int16x4_t test_vreinterpret_s16_u16(uint16x4_t a) { 13776 return vreinterpret_s16_u16(a); 13777} 13778 13779// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_u32(<2 x i32> %a) #0 { 13780// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16> 13781// CHECK: ret <4 x i16> [[TMP0]] 13782int16x4_t test_vreinterpret_s16_u32(uint32x2_t a) { 13783 return vreinterpret_s16_u32(a); 13784} 13785 13786// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_u64(<1 x i64> %a) #0 { 13787// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> 13788// CHECK: ret <4 x i16> [[TMP0]] 13789int16x4_t test_vreinterpret_s16_u64(uint64x1_t a) { 13790 return vreinterpret_s16_u64(a); 13791} 13792 13793// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_f16(<4 x half> %a) #0 { 13794// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16> 13795// CHECK: ret <4 x i16> [[TMP0]] 13796int16x4_t test_vreinterpret_s16_f16(float16x4_t a) { 13797 return vreinterpret_s16_f16(a); 13798} 13799 13800// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_f32(<2 x float> %a) #0 { 13801// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16> 13802// CHECK: ret <4 x i16> [[TMP0]] 13803int16x4_t test_vreinterpret_s16_f32(float32x2_t a) { 13804 return vreinterpret_s16_f32(a); 13805} 13806 13807// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_p8(<8 x i8> %a) #0 { 13808// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> 13809// CHECK: ret <4 x i16> [[TMP0]] 13810int16x4_t test_vreinterpret_s16_p8(poly8x8_t a) { 13811 return vreinterpret_s16_p8(a); 13812} 13813 13814// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_p16(<4 x i16> %a) #0 { 13815// CHECK: ret <4 x i16> %a 13816int16x4_t test_vreinterpret_s16_p16(poly16x4_t a) { 13817 return vreinterpret_s16_p16(a); 13818} 13819 13820// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_s8(<8 x i8> %a) #0 { 13821// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32> 13822// CHECK: ret <2 x i32> [[TMP0]] 13823int32x2_t test_vreinterpret_s32_s8(int8x8_t a) { 13824 return vreinterpret_s32_s8(a); 13825} 13826 13827// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_s16(<4 x i16> %a) #0 { 13828// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32> 13829// CHECK: ret <2 x i32> [[TMP0]] 13830int32x2_t test_vreinterpret_s32_s16(int16x4_t a) { 13831 return vreinterpret_s32_s16(a); 13832} 13833 13834// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_s64(<1 x i64> %a) #0 { 13835// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32> 13836// CHECK: ret <2 x i32> [[TMP0]] 13837int32x2_t test_vreinterpret_s32_s64(int64x1_t a) { 13838 return vreinterpret_s32_s64(a); 13839} 13840 13841// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_u8(<8 x i8> %a) #0 { 13842// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32> 13843// CHECK: ret <2 x i32> [[TMP0]] 13844int32x2_t test_vreinterpret_s32_u8(uint8x8_t a) { 13845 return vreinterpret_s32_u8(a); 13846} 13847 13848// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_u16(<4 x i16> %a) #0 { 13849// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32> 13850// CHECK: ret <2 x i32> [[TMP0]] 13851int32x2_t test_vreinterpret_s32_u16(uint16x4_t a) { 13852 return vreinterpret_s32_u16(a); 13853} 13854 13855// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_u32(<2 x i32> %a) #0 { 13856// CHECK: ret <2 x i32> %a 13857int32x2_t test_vreinterpret_s32_u32(uint32x2_t a) { 13858 return vreinterpret_s32_u32(a); 13859} 13860 13861// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_u64(<1 x i64> %a) #0 { 13862// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32> 13863// CHECK: ret <2 x i32> [[TMP0]] 13864int32x2_t test_vreinterpret_s32_u64(uint64x1_t a) { 13865 return vreinterpret_s32_u64(a); 13866} 13867 13868// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_f16(<4 x half> %a) #0 { 13869// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32> 13870// CHECK: ret <2 x i32> [[TMP0]] 13871int32x2_t test_vreinterpret_s32_f16(float16x4_t a) { 13872 return vreinterpret_s32_f16(a); 13873} 13874 13875// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_f32(<2 x float> %a) #0 { 13876// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32> 13877// CHECK: ret <2 x i32> [[TMP0]] 13878int32x2_t test_vreinterpret_s32_f32(float32x2_t a) { 13879 return vreinterpret_s32_f32(a); 13880} 13881 13882// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_p8(<8 x i8> %a) #0 { 13883// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32> 13884// CHECK: ret <2 x i32> [[TMP0]] 13885int32x2_t test_vreinterpret_s32_p8(poly8x8_t a) { 13886 return vreinterpret_s32_p8(a); 13887} 13888 13889// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_p16(<4 x i16> %a) #0 { 13890// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32> 13891// CHECK: ret <2 x i32> [[TMP0]] 13892int32x2_t test_vreinterpret_s32_p16(poly16x4_t a) { 13893 return vreinterpret_s32_p16(a); 13894} 13895 13896// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_s8(<8 x i8> %a) #0 { 13897// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> 13898// CHECK: ret <1 x i64> [[TMP0]] 13899int64x1_t test_vreinterpret_s64_s8(int8x8_t a) { 13900 return vreinterpret_s64_s8(a); 13901} 13902 13903// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_s16(<4 x i16> %a) #0 { 13904// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> 13905// CHECK: ret <1 x i64> [[TMP0]] 13906int64x1_t test_vreinterpret_s64_s16(int16x4_t a) { 13907 return vreinterpret_s64_s16(a); 13908} 13909 13910// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_s32(<2 x i32> %a) #0 { 13911// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64> 13912// CHECK: ret <1 x i64> [[TMP0]] 13913int64x1_t test_vreinterpret_s64_s32(int32x2_t a) { 13914 return vreinterpret_s64_s32(a); 13915} 13916 13917// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_u8(<8 x i8> %a) #0 { 13918// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> 13919// CHECK: ret <1 x i64> [[TMP0]] 13920int64x1_t test_vreinterpret_s64_u8(uint8x8_t a) { 13921 return vreinterpret_s64_u8(a); 13922} 13923 13924// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_u16(<4 x i16> %a) #0 { 13925// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> 13926// CHECK: ret <1 x i64> [[TMP0]] 13927int64x1_t test_vreinterpret_s64_u16(uint16x4_t a) { 13928 return vreinterpret_s64_u16(a); 13929} 13930 13931// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_u32(<2 x i32> %a) #0 { 13932// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64> 13933// CHECK: ret <1 x i64> [[TMP0]] 13934int64x1_t test_vreinterpret_s64_u32(uint32x2_t a) { 13935 return vreinterpret_s64_u32(a); 13936} 13937 13938// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_u64(<1 x i64> %a) #0 { 13939// CHECK: ret <1 x i64> %a 13940int64x1_t test_vreinterpret_s64_u64(uint64x1_t a) { 13941 return vreinterpret_s64_u64(a); 13942} 13943 13944// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_f16(<4 x half> %a) #0 { 13945// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64> 13946// CHECK: ret <1 x i64> [[TMP0]] 13947int64x1_t test_vreinterpret_s64_f16(float16x4_t a) { 13948 return vreinterpret_s64_f16(a); 13949} 13950 13951// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_f32(<2 x float> %a) #0 { 13952// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64> 13953// CHECK: ret <1 x i64> [[TMP0]] 13954int64x1_t test_vreinterpret_s64_f32(float32x2_t a) { 13955 return vreinterpret_s64_f32(a); 13956} 13957 13958// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_p8(<8 x i8> %a) #0 { 13959// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> 13960// CHECK: ret <1 x i64> [[TMP0]] 13961int64x1_t test_vreinterpret_s64_p8(poly8x8_t a) { 13962 return vreinterpret_s64_p8(a); 13963} 13964 13965// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_p16(<4 x i16> %a) #0 { 13966// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> 13967// CHECK: ret <1 x i64> [[TMP0]] 13968int64x1_t test_vreinterpret_s64_p16(poly16x4_t a) { 13969 return vreinterpret_s64_p16(a); 13970} 13971 13972// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_s8(<8 x i8> %a) #0 { 13973// CHECK: ret <8 x i8> %a 13974uint8x8_t test_vreinterpret_u8_s8(int8x8_t a) { 13975 return vreinterpret_u8_s8(a); 13976} 13977 13978// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_s16(<4 x i16> %a) #0 { 13979// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 13980// CHECK: ret <8 x i8> [[TMP0]] 13981uint8x8_t test_vreinterpret_u8_s16(int16x4_t a) { 13982 return vreinterpret_u8_s16(a); 13983} 13984 13985// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_s32(<2 x i32> %a) #0 { 13986// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 13987// CHECK: ret <8 x i8> [[TMP0]] 13988uint8x8_t test_vreinterpret_u8_s32(int32x2_t a) { 13989 return vreinterpret_u8_s32(a); 13990} 13991 13992// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_s64(<1 x i64> %a) #0 { 13993// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 13994// CHECK: ret <8 x i8> [[TMP0]] 13995uint8x8_t test_vreinterpret_u8_s64(int64x1_t a) { 13996 return vreinterpret_u8_s64(a); 13997} 13998 13999// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_u16(<4 x i16> %a) #0 { 14000// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 14001// CHECK: ret <8 x i8> [[TMP0]] 14002uint8x8_t test_vreinterpret_u8_u16(uint16x4_t a) { 14003 return vreinterpret_u8_u16(a); 14004} 14005 14006// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_u32(<2 x i32> %a) #0 { 14007// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 14008// CHECK: ret <8 x i8> [[TMP0]] 14009uint8x8_t test_vreinterpret_u8_u32(uint32x2_t a) { 14010 return vreinterpret_u8_u32(a); 14011} 14012 14013// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_u64(<1 x i64> %a) #0 { 14014// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 14015// CHECK: ret <8 x i8> [[TMP0]] 14016uint8x8_t test_vreinterpret_u8_u64(uint64x1_t a) { 14017 return vreinterpret_u8_u64(a); 14018} 14019 14020// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_f16(<4 x half> %a) #0 { 14021// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8> 14022// CHECK: ret <8 x i8> [[TMP0]] 14023uint8x8_t test_vreinterpret_u8_f16(float16x4_t a) { 14024 return vreinterpret_u8_f16(a); 14025} 14026 14027// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_f32(<2 x float> %a) #0 { 14028// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 14029// CHECK: ret <8 x i8> [[TMP0]] 14030uint8x8_t test_vreinterpret_u8_f32(float32x2_t a) { 14031 return vreinterpret_u8_f32(a); 14032} 14033 14034// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_p8(<8 x i8> %a) #0 { 14035// CHECK: ret <8 x i8> %a 14036uint8x8_t test_vreinterpret_u8_p8(poly8x8_t a) { 14037 return vreinterpret_u8_p8(a); 14038} 14039 14040// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_p16(<4 x i16> %a) #0 { 14041// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 14042// CHECK: ret <8 x i8> [[TMP0]] 14043uint8x8_t test_vreinterpret_u8_p16(poly16x4_t a) { 14044 return vreinterpret_u8_p16(a); 14045} 14046 14047// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_s8(<8 x i8> %a) #0 { 14048// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> 14049// CHECK: ret <4 x i16> [[TMP0]] 14050uint16x4_t test_vreinterpret_u16_s8(int8x8_t a) { 14051 return vreinterpret_u16_s8(a); 14052} 14053 14054// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_s16(<4 x i16> %a) #0 { 14055// CHECK: ret <4 x i16> %a 14056uint16x4_t test_vreinterpret_u16_s16(int16x4_t a) { 14057 return vreinterpret_u16_s16(a); 14058} 14059 14060// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_s32(<2 x i32> %a) #0 { 14061// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16> 14062// CHECK: ret <4 x i16> [[TMP0]] 14063uint16x4_t test_vreinterpret_u16_s32(int32x2_t a) { 14064 return vreinterpret_u16_s32(a); 14065} 14066 14067// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_s64(<1 x i64> %a) #0 { 14068// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> 14069// CHECK: ret <4 x i16> [[TMP0]] 14070uint16x4_t test_vreinterpret_u16_s64(int64x1_t a) { 14071 return vreinterpret_u16_s64(a); 14072} 14073 14074// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_u8(<8 x i8> %a) #0 { 14075// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> 14076// CHECK: ret <4 x i16> [[TMP0]] 14077uint16x4_t test_vreinterpret_u16_u8(uint8x8_t a) { 14078 return vreinterpret_u16_u8(a); 14079} 14080 14081// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_u32(<2 x i32> %a) #0 { 14082// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16> 14083// CHECK: ret <4 x i16> [[TMP0]] 14084uint16x4_t test_vreinterpret_u16_u32(uint32x2_t a) { 14085 return vreinterpret_u16_u32(a); 14086} 14087 14088// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_u64(<1 x i64> %a) #0 { 14089// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> 14090// CHECK: ret <4 x i16> [[TMP0]] 14091uint16x4_t test_vreinterpret_u16_u64(uint64x1_t a) { 14092 return vreinterpret_u16_u64(a); 14093} 14094 14095// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_f16(<4 x half> %a) #0 { 14096// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16> 14097// CHECK: ret <4 x i16> [[TMP0]] 14098uint16x4_t test_vreinterpret_u16_f16(float16x4_t a) { 14099 return vreinterpret_u16_f16(a); 14100} 14101 14102// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_f32(<2 x float> %a) #0 { 14103// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16> 14104// CHECK: ret <4 x i16> [[TMP0]] 14105uint16x4_t test_vreinterpret_u16_f32(float32x2_t a) { 14106 return vreinterpret_u16_f32(a); 14107} 14108 14109// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_p8(<8 x i8> %a) #0 { 14110// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> 14111// CHECK: ret <4 x i16> [[TMP0]] 14112uint16x4_t test_vreinterpret_u16_p8(poly8x8_t a) { 14113 return vreinterpret_u16_p8(a); 14114} 14115 14116// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_p16(<4 x i16> %a) #0 { 14117// CHECK: ret <4 x i16> %a 14118uint16x4_t test_vreinterpret_u16_p16(poly16x4_t a) { 14119 return vreinterpret_u16_p16(a); 14120} 14121 14122// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_s8(<8 x i8> %a) #0 { 14123// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32> 14124// CHECK: ret <2 x i32> [[TMP0]] 14125uint32x2_t test_vreinterpret_u32_s8(int8x8_t a) { 14126 return vreinterpret_u32_s8(a); 14127} 14128 14129// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_s16(<4 x i16> %a) #0 { 14130// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32> 14131// CHECK: ret <2 x i32> [[TMP0]] 14132uint32x2_t test_vreinterpret_u32_s16(int16x4_t a) { 14133 return vreinterpret_u32_s16(a); 14134} 14135 14136// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_s32(<2 x i32> %a) #0 { 14137// CHECK: ret <2 x i32> %a 14138uint32x2_t test_vreinterpret_u32_s32(int32x2_t a) { 14139 return vreinterpret_u32_s32(a); 14140} 14141 14142// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_s64(<1 x i64> %a) #0 { 14143// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32> 14144// CHECK: ret <2 x i32> [[TMP0]] 14145uint32x2_t test_vreinterpret_u32_s64(int64x1_t a) { 14146 return vreinterpret_u32_s64(a); 14147} 14148 14149// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_u8(<8 x i8> %a) #0 { 14150// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32> 14151// CHECK: ret <2 x i32> [[TMP0]] 14152uint32x2_t test_vreinterpret_u32_u8(uint8x8_t a) { 14153 return vreinterpret_u32_u8(a); 14154} 14155 14156// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_u16(<4 x i16> %a) #0 { 14157// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32> 14158// CHECK: ret <2 x i32> [[TMP0]] 14159uint32x2_t test_vreinterpret_u32_u16(uint16x4_t a) { 14160 return vreinterpret_u32_u16(a); 14161} 14162 14163// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_u64(<1 x i64> %a) #0 { 14164// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32> 14165// CHECK: ret <2 x i32> [[TMP0]] 14166uint32x2_t test_vreinterpret_u32_u64(uint64x1_t a) { 14167 return vreinterpret_u32_u64(a); 14168} 14169 14170// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_f16(<4 x half> %a) #0 { 14171// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32> 14172// CHECK: ret <2 x i32> [[TMP0]] 14173uint32x2_t test_vreinterpret_u32_f16(float16x4_t a) { 14174 return vreinterpret_u32_f16(a); 14175} 14176 14177// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_f32(<2 x float> %a) #0 { 14178// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32> 14179// CHECK: ret <2 x i32> [[TMP0]] 14180uint32x2_t test_vreinterpret_u32_f32(float32x2_t a) { 14181 return vreinterpret_u32_f32(a); 14182} 14183 14184// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_p8(<8 x i8> %a) #0 { 14185// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32> 14186// CHECK: ret <2 x i32> [[TMP0]] 14187uint32x2_t test_vreinterpret_u32_p8(poly8x8_t a) { 14188 return vreinterpret_u32_p8(a); 14189} 14190 14191// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_p16(<4 x i16> %a) #0 { 14192// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32> 14193// CHECK: ret <2 x i32> [[TMP0]] 14194uint32x2_t test_vreinterpret_u32_p16(poly16x4_t a) { 14195 return vreinterpret_u32_p16(a); 14196} 14197 14198// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_s8(<8 x i8> %a) #0 { 14199// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> 14200// CHECK: ret <1 x i64> [[TMP0]] 14201uint64x1_t test_vreinterpret_u64_s8(int8x8_t a) { 14202 return vreinterpret_u64_s8(a); 14203} 14204 14205// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_s16(<4 x i16> %a) #0 { 14206// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> 14207// CHECK: ret <1 x i64> [[TMP0]] 14208uint64x1_t test_vreinterpret_u64_s16(int16x4_t a) { 14209 return vreinterpret_u64_s16(a); 14210} 14211 14212// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_s32(<2 x i32> %a) #0 { 14213// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64> 14214// CHECK: ret <1 x i64> [[TMP0]] 14215uint64x1_t test_vreinterpret_u64_s32(int32x2_t a) { 14216 return vreinterpret_u64_s32(a); 14217} 14218 14219// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_s64(<1 x i64> %a) #0 { 14220// CHECK: ret <1 x i64> %a 14221uint64x1_t test_vreinterpret_u64_s64(int64x1_t a) { 14222 return vreinterpret_u64_s64(a); 14223} 14224 14225// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_u8(<8 x i8> %a) #0 { 14226// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> 14227// CHECK: ret <1 x i64> [[TMP0]] 14228uint64x1_t test_vreinterpret_u64_u8(uint8x8_t a) { 14229 return vreinterpret_u64_u8(a); 14230} 14231 14232// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_u16(<4 x i16> %a) #0 { 14233// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> 14234// CHECK: ret <1 x i64> [[TMP0]] 14235uint64x1_t test_vreinterpret_u64_u16(uint16x4_t a) { 14236 return vreinterpret_u64_u16(a); 14237} 14238 14239// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_u32(<2 x i32> %a) #0 { 14240// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64> 14241// CHECK: ret <1 x i64> [[TMP0]] 14242uint64x1_t test_vreinterpret_u64_u32(uint32x2_t a) { 14243 return vreinterpret_u64_u32(a); 14244} 14245 14246// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_f16(<4 x half> %a) #0 { 14247// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64> 14248// CHECK: ret <1 x i64> [[TMP0]] 14249uint64x1_t test_vreinterpret_u64_f16(float16x4_t a) { 14250 return vreinterpret_u64_f16(a); 14251} 14252 14253// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_f32(<2 x float> %a) #0 { 14254// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64> 14255// CHECK: ret <1 x i64> [[TMP0]] 14256uint64x1_t test_vreinterpret_u64_f32(float32x2_t a) { 14257 return vreinterpret_u64_f32(a); 14258} 14259 14260// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_p8(<8 x i8> %a) #0 { 14261// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> 14262// CHECK: ret <1 x i64> [[TMP0]] 14263uint64x1_t test_vreinterpret_u64_p8(poly8x8_t a) { 14264 return vreinterpret_u64_p8(a); 14265} 14266 14267// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_p16(<4 x i16> %a) #0 { 14268// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> 14269// CHECK: ret <1 x i64> [[TMP0]] 14270uint64x1_t test_vreinterpret_u64_p16(poly16x4_t a) { 14271 return vreinterpret_u64_p16(a); 14272} 14273 14274// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_s8(<8 x i8> %a) #0 { 14275// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half> 14276// CHECK: ret <4 x half> [[TMP0]] 14277float16x4_t test_vreinterpret_f16_s8(int8x8_t a) { 14278 return vreinterpret_f16_s8(a); 14279} 14280 14281// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_s16(<4 x i16> %a) #0 { 14282// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half> 14283// CHECK: ret <4 x half> [[TMP0]] 14284float16x4_t test_vreinterpret_f16_s16(int16x4_t a) { 14285 return vreinterpret_f16_s16(a); 14286} 14287 14288// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_s32(<2 x i32> %a) #0 { 14289// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half> 14290// CHECK: ret <4 x half> [[TMP0]] 14291float16x4_t test_vreinterpret_f16_s32(int32x2_t a) { 14292 return vreinterpret_f16_s32(a); 14293} 14294 14295// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_s64(<1 x i64> %a) #0 { 14296// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half> 14297// CHECK: ret <4 x half> [[TMP0]] 14298float16x4_t test_vreinterpret_f16_s64(int64x1_t a) { 14299 return vreinterpret_f16_s64(a); 14300} 14301 14302// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_u8(<8 x i8> %a) #0 { 14303// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half> 14304// CHECK: ret <4 x half> [[TMP0]] 14305float16x4_t test_vreinterpret_f16_u8(uint8x8_t a) { 14306 return vreinterpret_f16_u8(a); 14307} 14308 14309// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_u16(<4 x i16> %a) #0 { 14310// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half> 14311// CHECK: ret <4 x half> [[TMP0]] 14312float16x4_t test_vreinterpret_f16_u16(uint16x4_t a) { 14313 return vreinterpret_f16_u16(a); 14314} 14315 14316// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_u32(<2 x i32> %a) #0 { 14317// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half> 14318// CHECK: ret <4 x half> [[TMP0]] 14319float16x4_t test_vreinterpret_f16_u32(uint32x2_t a) { 14320 return vreinterpret_f16_u32(a); 14321} 14322 14323// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_u64(<1 x i64> %a) #0 { 14324// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half> 14325// CHECK: ret <4 x half> [[TMP0]] 14326float16x4_t test_vreinterpret_f16_u64(uint64x1_t a) { 14327 return vreinterpret_f16_u64(a); 14328} 14329 14330// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_f32(<2 x float> %a) #0 { 14331// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x half> 14332// CHECK: ret <4 x half> [[TMP0]] 14333float16x4_t test_vreinterpret_f16_f32(float32x2_t a) { 14334 return vreinterpret_f16_f32(a); 14335} 14336 14337// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_p8(<8 x i8> %a) #0 { 14338// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half> 14339// CHECK: ret <4 x half> [[TMP0]] 14340float16x4_t test_vreinterpret_f16_p8(poly8x8_t a) { 14341 return vreinterpret_f16_p8(a); 14342} 14343 14344// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_p16(<4 x i16> %a) #0 { 14345// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half> 14346// CHECK: ret <4 x half> [[TMP0]] 14347float16x4_t test_vreinterpret_f16_p16(poly16x4_t a) { 14348 return vreinterpret_f16_p16(a); 14349} 14350 14351// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_s8(<8 x i8> %a) #0 { 14352// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float> 14353// CHECK: ret <2 x float> [[TMP0]] 14354float32x2_t test_vreinterpret_f32_s8(int8x8_t a) { 14355 return vreinterpret_f32_s8(a); 14356} 14357 14358// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_s16(<4 x i16> %a) #0 { 14359// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float> 14360// CHECK: ret <2 x float> [[TMP0]] 14361float32x2_t test_vreinterpret_f32_s16(int16x4_t a) { 14362 return vreinterpret_f32_s16(a); 14363} 14364 14365// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_s32(<2 x i32> %a) #0 { 14366// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float> 14367// CHECK: ret <2 x float> [[TMP0]] 14368float32x2_t test_vreinterpret_f32_s32(int32x2_t a) { 14369 return vreinterpret_f32_s32(a); 14370} 14371 14372// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_s64(<1 x i64> %a) #0 { 14373// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float> 14374// CHECK: ret <2 x float> [[TMP0]] 14375float32x2_t test_vreinterpret_f32_s64(int64x1_t a) { 14376 return vreinterpret_f32_s64(a); 14377} 14378 14379// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_u8(<8 x i8> %a) #0 { 14380// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float> 14381// CHECK: ret <2 x float> [[TMP0]] 14382float32x2_t test_vreinterpret_f32_u8(uint8x8_t a) { 14383 return vreinterpret_f32_u8(a); 14384} 14385 14386// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_u16(<4 x i16> %a) #0 { 14387// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float> 14388// CHECK: ret <2 x float> [[TMP0]] 14389float32x2_t test_vreinterpret_f32_u16(uint16x4_t a) { 14390 return vreinterpret_f32_u16(a); 14391} 14392 14393// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_u32(<2 x i32> %a) #0 { 14394// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float> 14395// CHECK: ret <2 x float> [[TMP0]] 14396float32x2_t test_vreinterpret_f32_u32(uint32x2_t a) { 14397 return vreinterpret_f32_u32(a); 14398} 14399 14400// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_u64(<1 x i64> %a) #0 { 14401// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float> 14402// CHECK: ret <2 x float> [[TMP0]] 14403float32x2_t test_vreinterpret_f32_u64(uint64x1_t a) { 14404 return vreinterpret_f32_u64(a); 14405} 14406 14407// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_f16(<4 x half> %a) #0 { 14408// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x float> 14409// CHECK: ret <2 x float> [[TMP0]] 14410float32x2_t test_vreinterpret_f32_f16(float16x4_t a) { 14411 return vreinterpret_f32_f16(a); 14412} 14413 14414// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_p8(<8 x i8> %a) #0 { 14415// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float> 14416// CHECK: ret <2 x float> [[TMP0]] 14417float32x2_t test_vreinterpret_f32_p8(poly8x8_t a) { 14418 return vreinterpret_f32_p8(a); 14419} 14420 14421// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_p16(<4 x i16> %a) #0 { 14422// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float> 14423// CHECK: ret <2 x float> [[TMP0]] 14424float32x2_t test_vreinterpret_f32_p16(poly16x4_t a) { 14425 return vreinterpret_f32_p16(a); 14426} 14427 14428// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_s8(<8 x i8> %a) #0 { 14429// CHECK: ret <8 x i8> %a 14430poly8x8_t test_vreinterpret_p8_s8(int8x8_t a) { 14431 return vreinterpret_p8_s8(a); 14432} 14433 14434// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_s16(<4 x i16> %a) #0 { 14435// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 14436// CHECK: ret <8 x i8> [[TMP0]] 14437poly8x8_t test_vreinterpret_p8_s16(int16x4_t a) { 14438 return vreinterpret_p8_s16(a); 14439} 14440 14441// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_s32(<2 x i32> %a) #0 { 14442// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 14443// CHECK: ret <8 x i8> [[TMP0]] 14444poly8x8_t test_vreinterpret_p8_s32(int32x2_t a) { 14445 return vreinterpret_p8_s32(a); 14446} 14447 14448// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_s64(<1 x i64> %a) #0 { 14449// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 14450// CHECK: ret <8 x i8> [[TMP0]] 14451poly8x8_t test_vreinterpret_p8_s64(int64x1_t a) { 14452 return vreinterpret_p8_s64(a); 14453} 14454 14455// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_u8(<8 x i8> %a) #0 { 14456// CHECK: ret <8 x i8> %a 14457poly8x8_t test_vreinterpret_p8_u8(uint8x8_t a) { 14458 return vreinterpret_p8_u8(a); 14459} 14460 14461// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_u16(<4 x i16> %a) #0 { 14462// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 14463// CHECK: ret <8 x i8> [[TMP0]] 14464poly8x8_t test_vreinterpret_p8_u16(uint16x4_t a) { 14465 return vreinterpret_p8_u16(a); 14466} 14467 14468// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_u32(<2 x i32> %a) #0 { 14469// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 14470// CHECK: ret <8 x i8> [[TMP0]] 14471poly8x8_t test_vreinterpret_p8_u32(uint32x2_t a) { 14472 return vreinterpret_p8_u32(a); 14473} 14474 14475// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_u64(<1 x i64> %a) #0 { 14476// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 14477// CHECK: ret <8 x i8> [[TMP0]] 14478poly8x8_t test_vreinterpret_p8_u64(uint64x1_t a) { 14479 return vreinterpret_p8_u64(a); 14480} 14481 14482// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_f16(<4 x half> %a) #0 { 14483// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8> 14484// CHECK: ret <8 x i8> [[TMP0]] 14485poly8x8_t test_vreinterpret_p8_f16(float16x4_t a) { 14486 return vreinterpret_p8_f16(a); 14487} 14488 14489// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_f32(<2 x float> %a) #0 { 14490// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 14491// CHECK: ret <8 x i8> [[TMP0]] 14492poly8x8_t test_vreinterpret_p8_f32(float32x2_t a) { 14493 return vreinterpret_p8_f32(a); 14494} 14495 14496// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_p16(<4 x i16> %a) #0 { 14497// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 14498// CHECK: ret <8 x i8> [[TMP0]] 14499poly8x8_t test_vreinterpret_p8_p16(poly16x4_t a) { 14500 return vreinterpret_p8_p16(a); 14501} 14502 14503// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_s8(<8 x i8> %a) #0 { 14504// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> 14505// CHECK: ret <4 x i16> [[TMP0]] 14506poly16x4_t test_vreinterpret_p16_s8(int8x8_t a) { 14507 return vreinterpret_p16_s8(a); 14508} 14509 14510// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_s16(<4 x i16> %a) #0 { 14511// CHECK: ret <4 x i16> %a 14512poly16x4_t test_vreinterpret_p16_s16(int16x4_t a) { 14513 return vreinterpret_p16_s16(a); 14514} 14515 14516// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_s32(<2 x i32> %a) #0 { 14517// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16> 14518// CHECK: ret <4 x i16> [[TMP0]] 14519poly16x4_t test_vreinterpret_p16_s32(int32x2_t a) { 14520 return vreinterpret_p16_s32(a); 14521} 14522 14523// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_s64(<1 x i64> %a) #0 { 14524// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> 14525// CHECK: ret <4 x i16> [[TMP0]] 14526poly16x4_t test_vreinterpret_p16_s64(int64x1_t a) { 14527 return vreinterpret_p16_s64(a); 14528} 14529 14530// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_u8(<8 x i8> %a) #0 { 14531// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> 14532// CHECK: ret <4 x i16> [[TMP0]] 14533poly16x4_t test_vreinterpret_p16_u8(uint8x8_t a) { 14534 return vreinterpret_p16_u8(a); 14535} 14536 14537// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_u16(<4 x i16> %a) #0 { 14538// CHECK: ret <4 x i16> %a 14539poly16x4_t test_vreinterpret_p16_u16(uint16x4_t a) { 14540 return vreinterpret_p16_u16(a); 14541} 14542 14543// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_u32(<2 x i32> %a) #0 { 14544// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16> 14545// CHECK: ret <4 x i16> [[TMP0]] 14546poly16x4_t test_vreinterpret_p16_u32(uint32x2_t a) { 14547 return vreinterpret_p16_u32(a); 14548} 14549 14550// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_u64(<1 x i64> %a) #0 { 14551// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> 14552// CHECK: ret <4 x i16> [[TMP0]] 14553poly16x4_t test_vreinterpret_p16_u64(uint64x1_t a) { 14554 return vreinterpret_p16_u64(a); 14555} 14556 14557// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_f16(<4 x half> %a) #0 { 14558// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16> 14559// CHECK: ret <4 x i16> [[TMP0]] 14560poly16x4_t test_vreinterpret_p16_f16(float16x4_t a) { 14561 return vreinterpret_p16_f16(a); 14562} 14563 14564// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_f32(<2 x float> %a) #0 { 14565// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16> 14566// CHECK: ret <4 x i16> [[TMP0]] 14567poly16x4_t test_vreinterpret_p16_f32(float32x2_t a) { 14568 return vreinterpret_p16_f32(a); 14569} 14570 14571// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_p8(<8 x i8> %a) #0 { 14572// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> 14573// CHECK: ret <4 x i16> [[TMP0]] 14574poly16x4_t test_vreinterpret_p16_p8(poly8x8_t a) { 14575 return vreinterpret_p16_p8(a); 14576} 14577 14578// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_s16(<8 x i16> %a) #0 { 14579// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 14580// CHECK: ret <16 x i8> [[TMP0]] 14581int8x16_t test_vreinterpretq_s8_s16(int16x8_t a) { 14582 return vreinterpretq_s8_s16(a); 14583} 14584 14585// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_s32(<4 x i32> %a) #0 { 14586// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 14587// CHECK: ret <16 x i8> [[TMP0]] 14588int8x16_t test_vreinterpretq_s8_s32(int32x4_t a) { 14589 return vreinterpretq_s8_s32(a); 14590} 14591 14592// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_s64(<2 x i64> %a) #0 { 14593// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 14594// CHECK: ret <16 x i8> [[TMP0]] 14595int8x16_t test_vreinterpretq_s8_s64(int64x2_t a) { 14596 return vreinterpretq_s8_s64(a); 14597} 14598 14599// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_u8(<16 x i8> %a) #0 { 14600// CHECK: ret <16 x i8> %a 14601int8x16_t test_vreinterpretq_s8_u8(uint8x16_t a) { 14602 return vreinterpretq_s8_u8(a); 14603} 14604 14605// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_u16(<8 x i16> %a) #0 { 14606// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 14607// CHECK: ret <16 x i8> [[TMP0]] 14608int8x16_t test_vreinterpretq_s8_u16(uint16x8_t a) { 14609 return vreinterpretq_s8_u16(a); 14610} 14611 14612// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_u32(<4 x i32> %a) #0 { 14613// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 14614// CHECK: ret <16 x i8> [[TMP0]] 14615int8x16_t test_vreinterpretq_s8_u32(uint32x4_t a) { 14616 return vreinterpretq_s8_u32(a); 14617} 14618 14619// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_u64(<2 x i64> %a) #0 { 14620// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 14621// CHECK: ret <16 x i8> [[TMP0]] 14622int8x16_t test_vreinterpretq_s8_u64(uint64x2_t a) { 14623 return vreinterpretq_s8_u64(a); 14624} 14625 14626// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_f16(<8 x half> %a) #0 { 14627// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8> 14628// CHECK: ret <16 x i8> [[TMP0]] 14629int8x16_t test_vreinterpretq_s8_f16(float16x8_t a) { 14630 return vreinterpretq_s8_f16(a); 14631} 14632 14633// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_f32(<4 x float> %a) #0 { 14634// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 14635// CHECK: ret <16 x i8> [[TMP0]] 14636int8x16_t test_vreinterpretq_s8_f32(float32x4_t a) { 14637 return vreinterpretq_s8_f32(a); 14638} 14639 14640// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_p8(<16 x i8> %a) #0 { 14641// CHECK: ret <16 x i8> %a 14642int8x16_t test_vreinterpretq_s8_p8(poly8x16_t a) { 14643 return vreinterpretq_s8_p8(a); 14644} 14645 14646// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_p16(<8 x i16> %a) #0 { 14647// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 14648// CHECK: ret <16 x i8> [[TMP0]] 14649int8x16_t test_vreinterpretq_s8_p16(poly16x8_t a) { 14650 return vreinterpretq_s8_p16(a); 14651} 14652 14653// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_s8(<16 x i8> %a) #0 { 14654// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> 14655// CHECK: ret <8 x i16> [[TMP0]] 14656int16x8_t test_vreinterpretq_s16_s8(int8x16_t a) { 14657 return vreinterpretq_s16_s8(a); 14658} 14659 14660// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_s32(<4 x i32> %a) #0 { 14661// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16> 14662// CHECK: ret <8 x i16> [[TMP0]] 14663int16x8_t test_vreinterpretq_s16_s32(int32x4_t a) { 14664 return vreinterpretq_s16_s32(a); 14665} 14666 14667// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_s64(<2 x i64> %a) #0 { 14668// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> 14669// CHECK: ret <8 x i16> [[TMP0]] 14670int16x8_t test_vreinterpretq_s16_s64(int64x2_t a) { 14671 return vreinterpretq_s16_s64(a); 14672} 14673 14674// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_u8(<16 x i8> %a) #0 { 14675// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> 14676// CHECK: ret <8 x i16> [[TMP0]] 14677int16x8_t test_vreinterpretq_s16_u8(uint8x16_t a) { 14678 return vreinterpretq_s16_u8(a); 14679} 14680 14681// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_u16(<8 x i16> %a) #0 { 14682// CHECK: ret <8 x i16> %a 14683int16x8_t test_vreinterpretq_s16_u16(uint16x8_t a) { 14684 return vreinterpretq_s16_u16(a); 14685} 14686 14687// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_u32(<4 x i32> %a) #0 { 14688// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16> 14689// CHECK: ret <8 x i16> [[TMP0]] 14690int16x8_t test_vreinterpretq_s16_u32(uint32x4_t a) { 14691 return vreinterpretq_s16_u32(a); 14692} 14693 14694// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_u64(<2 x i64> %a) #0 { 14695// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> 14696// CHECK: ret <8 x i16> [[TMP0]] 14697int16x8_t test_vreinterpretq_s16_u64(uint64x2_t a) { 14698 return vreinterpretq_s16_u64(a); 14699} 14700 14701// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_f16(<8 x half> %a) #0 { 14702// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16> 14703// CHECK: ret <8 x i16> [[TMP0]] 14704int16x8_t test_vreinterpretq_s16_f16(float16x8_t a) { 14705 return vreinterpretq_s16_f16(a); 14706} 14707 14708// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_f32(<4 x float> %a) #0 { 14709// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16> 14710// CHECK: ret <8 x i16> [[TMP0]] 14711int16x8_t test_vreinterpretq_s16_f32(float32x4_t a) { 14712 return vreinterpretq_s16_f32(a); 14713} 14714 14715// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_p8(<16 x i8> %a) #0 { 14716// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> 14717// CHECK: ret <8 x i16> [[TMP0]] 14718int16x8_t test_vreinterpretq_s16_p8(poly8x16_t a) { 14719 return vreinterpretq_s16_p8(a); 14720} 14721 14722// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_p16(<8 x i16> %a) #0 { 14723// CHECK: ret <8 x i16> %a 14724int16x8_t test_vreinterpretq_s16_p16(poly16x8_t a) { 14725 return vreinterpretq_s16_p16(a); 14726} 14727 14728// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_s8(<16 x i8> %a) #0 { 14729// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32> 14730// CHECK: ret <4 x i32> [[TMP0]] 14731int32x4_t test_vreinterpretq_s32_s8(int8x16_t a) { 14732 return vreinterpretq_s32_s8(a); 14733} 14734 14735// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_s16(<8 x i16> %a) #0 { 14736// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32> 14737// CHECK: ret <4 x i32> [[TMP0]] 14738int32x4_t test_vreinterpretq_s32_s16(int16x8_t a) { 14739 return vreinterpretq_s32_s16(a); 14740} 14741 14742// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_s64(<2 x i64> %a) #0 { 14743// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32> 14744// CHECK: ret <4 x i32> [[TMP0]] 14745int32x4_t test_vreinterpretq_s32_s64(int64x2_t a) { 14746 return vreinterpretq_s32_s64(a); 14747} 14748 14749// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_u8(<16 x i8> %a) #0 { 14750// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32> 14751// CHECK: ret <4 x i32> [[TMP0]] 14752int32x4_t test_vreinterpretq_s32_u8(uint8x16_t a) { 14753 return vreinterpretq_s32_u8(a); 14754} 14755 14756// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_u16(<8 x i16> %a) #0 { 14757// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32> 14758// CHECK: ret <4 x i32> [[TMP0]] 14759int32x4_t test_vreinterpretq_s32_u16(uint16x8_t a) { 14760 return vreinterpretq_s32_u16(a); 14761} 14762 14763// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_u32(<4 x i32> %a) #0 { 14764// CHECK: ret <4 x i32> %a 14765int32x4_t test_vreinterpretq_s32_u32(uint32x4_t a) { 14766 return vreinterpretq_s32_u32(a); 14767} 14768 14769// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_u64(<2 x i64> %a) #0 { 14770// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32> 14771// CHECK: ret <4 x i32> [[TMP0]] 14772int32x4_t test_vreinterpretq_s32_u64(uint64x2_t a) { 14773 return vreinterpretq_s32_u64(a); 14774} 14775 14776// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_f16(<8 x half> %a) #0 { 14777// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32> 14778// CHECK: ret <4 x i32> [[TMP0]] 14779int32x4_t test_vreinterpretq_s32_f16(float16x8_t a) { 14780 return vreinterpretq_s32_f16(a); 14781} 14782 14783// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_f32(<4 x float> %a) #0 { 14784// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32> 14785// CHECK: ret <4 x i32> [[TMP0]] 14786int32x4_t test_vreinterpretq_s32_f32(float32x4_t a) { 14787 return vreinterpretq_s32_f32(a); 14788} 14789 14790// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_p8(<16 x i8> %a) #0 { 14791// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32> 14792// CHECK: ret <4 x i32> [[TMP0]] 14793int32x4_t test_vreinterpretq_s32_p8(poly8x16_t a) { 14794 return vreinterpretq_s32_p8(a); 14795} 14796 14797// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_p16(<8 x i16> %a) #0 { 14798// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32> 14799// CHECK: ret <4 x i32> [[TMP0]] 14800int32x4_t test_vreinterpretq_s32_p16(poly16x8_t a) { 14801 return vreinterpretq_s32_p16(a); 14802} 14803 14804// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_s8(<16 x i8> %a) #0 { 14805// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> 14806// CHECK: ret <2 x i64> [[TMP0]] 14807int64x2_t test_vreinterpretq_s64_s8(int8x16_t a) { 14808 return vreinterpretq_s64_s8(a); 14809} 14810 14811// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_s16(<8 x i16> %a) #0 { 14812// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> 14813// CHECK: ret <2 x i64> [[TMP0]] 14814int64x2_t test_vreinterpretq_s64_s16(int16x8_t a) { 14815 return vreinterpretq_s64_s16(a); 14816} 14817 14818// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_s32(<4 x i32> %a) #0 { 14819// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64> 14820// CHECK: ret <2 x i64> [[TMP0]] 14821int64x2_t test_vreinterpretq_s64_s32(int32x4_t a) { 14822 return vreinterpretq_s64_s32(a); 14823} 14824 14825// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_u8(<16 x i8> %a) #0 { 14826// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> 14827// CHECK: ret <2 x i64> [[TMP0]] 14828int64x2_t test_vreinterpretq_s64_u8(uint8x16_t a) { 14829 return vreinterpretq_s64_u8(a); 14830} 14831 14832// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_u16(<8 x i16> %a) #0 { 14833// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> 14834// CHECK: ret <2 x i64> [[TMP0]] 14835int64x2_t test_vreinterpretq_s64_u16(uint16x8_t a) { 14836 return vreinterpretq_s64_u16(a); 14837} 14838 14839// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_u32(<4 x i32> %a) #0 { 14840// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64> 14841// CHECK: ret <2 x i64> [[TMP0]] 14842int64x2_t test_vreinterpretq_s64_u32(uint32x4_t a) { 14843 return vreinterpretq_s64_u32(a); 14844} 14845 14846// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_u64(<2 x i64> %a) #0 { 14847// CHECK: ret <2 x i64> %a 14848int64x2_t test_vreinterpretq_s64_u64(uint64x2_t a) { 14849 return vreinterpretq_s64_u64(a); 14850} 14851 14852// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_f16(<8 x half> %a) #0 { 14853// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64> 14854// CHECK: ret <2 x i64> [[TMP0]] 14855int64x2_t test_vreinterpretq_s64_f16(float16x8_t a) { 14856 return vreinterpretq_s64_f16(a); 14857} 14858 14859// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_f32(<4 x float> %a) #0 { 14860// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64> 14861// CHECK: ret <2 x i64> [[TMP0]] 14862int64x2_t test_vreinterpretq_s64_f32(float32x4_t a) { 14863 return vreinterpretq_s64_f32(a); 14864} 14865 14866// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_p8(<16 x i8> %a) #0 { 14867// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> 14868// CHECK: ret <2 x i64> [[TMP0]] 14869int64x2_t test_vreinterpretq_s64_p8(poly8x16_t a) { 14870 return vreinterpretq_s64_p8(a); 14871} 14872 14873// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_p16(<8 x i16> %a) #0 { 14874// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> 14875// CHECK: ret <2 x i64> [[TMP0]] 14876int64x2_t test_vreinterpretq_s64_p16(poly16x8_t a) { 14877 return vreinterpretq_s64_p16(a); 14878} 14879 14880// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_s8(<16 x i8> %a) #0 { 14881// CHECK: ret <16 x i8> %a 14882uint8x16_t test_vreinterpretq_u8_s8(int8x16_t a) { 14883 return vreinterpretq_u8_s8(a); 14884} 14885 14886// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_s16(<8 x i16> %a) #0 { 14887// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 14888// CHECK: ret <16 x i8> [[TMP0]] 14889uint8x16_t test_vreinterpretq_u8_s16(int16x8_t a) { 14890 return vreinterpretq_u8_s16(a); 14891} 14892 14893// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_s32(<4 x i32> %a) #0 { 14894// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 14895// CHECK: ret <16 x i8> [[TMP0]] 14896uint8x16_t test_vreinterpretq_u8_s32(int32x4_t a) { 14897 return vreinterpretq_u8_s32(a); 14898} 14899 14900// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_s64(<2 x i64> %a) #0 { 14901// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 14902// CHECK: ret <16 x i8> [[TMP0]] 14903uint8x16_t test_vreinterpretq_u8_s64(int64x2_t a) { 14904 return vreinterpretq_u8_s64(a); 14905} 14906 14907// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_u16(<8 x i16> %a) #0 { 14908// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 14909// CHECK: ret <16 x i8> [[TMP0]] 14910uint8x16_t test_vreinterpretq_u8_u16(uint16x8_t a) { 14911 return vreinterpretq_u8_u16(a); 14912} 14913 14914// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_u32(<4 x i32> %a) #0 { 14915// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 14916// CHECK: ret <16 x i8> [[TMP0]] 14917uint8x16_t test_vreinterpretq_u8_u32(uint32x4_t a) { 14918 return vreinterpretq_u8_u32(a); 14919} 14920 14921// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_u64(<2 x i64> %a) #0 { 14922// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 14923// CHECK: ret <16 x i8> [[TMP0]] 14924uint8x16_t test_vreinterpretq_u8_u64(uint64x2_t a) { 14925 return vreinterpretq_u8_u64(a); 14926} 14927 14928// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_f16(<8 x half> %a) #0 { 14929// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8> 14930// CHECK: ret <16 x i8> [[TMP0]] 14931uint8x16_t test_vreinterpretq_u8_f16(float16x8_t a) { 14932 return vreinterpretq_u8_f16(a); 14933} 14934 14935// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_f32(<4 x float> %a) #0 { 14936// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 14937// CHECK: ret <16 x i8> [[TMP0]] 14938uint8x16_t test_vreinterpretq_u8_f32(float32x4_t a) { 14939 return vreinterpretq_u8_f32(a); 14940} 14941 14942// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_p8(<16 x i8> %a) #0 { 14943// CHECK: ret <16 x i8> %a 14944uint8x16_t test_vreinterpretq_u8_p8(poly8x16_t a) { 14945 return vreinterpretq_u8_p8(a); 14946} 14947 14948// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_p16(<8 x i16> %a) #0 { 14949// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 14950// CHECK: ret <16 x i8> [[TMP0]] 14951uint8x16_t test_vreinterpretq_u8_p16(poly16x8_t a) { 14952 return vreinterpretq_u8_p16(a); 14953} 14954 14955// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_s8(<16 x i8> %a) #0 { 14956// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> 14957// CHECK: ret <8 x i16> [[TMP0]] 14958uint16x8_t test_vreinterpretq_u16_s8(int8x16_t a) { 14959 return vreinterpretq_u16_s8(a); 14960} 14961 14962// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_s16(<8 x i16> %a) #0 { 14963// CHECK: ret <8 x i16> %a 14964uint16x8_t test_vreinterpretq_u16_s16(int16x8_t a) { 14965 return vreinterpretq_u16_s16(a); 14966} 14967 14968// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_s32(<4 x i32> %a) #0 { 14969// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16> 14970// CHECK: ret <8 x i16> [[TMP0]] 14971uint16x8_t test_vreinterpretq_u16_s32(int32x4_t a) { 14972 return vreinterpretq_u16_s32(a); 14973} 14974 14975// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_s64(<2 x i64> %a) #0 { 14976// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> 14977// CHECK: ret <8 x i16> [[TMP0]] 14978uint16x8_t test_vreinterpretq_u16_s64(int64x2_t a) { 14979 return vreinterpretq_u16_s64(a); 14980} 14981 14982// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_u8(<16 x i8> %a) #0 { 14983// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> 14984// CHECK: ret <8 x i16> [[TMP0]] 14985uint16x8_t test_vreinterpretq_u16_u8(uint8x16_t a) { 14986 return vreinterpretq_u16_u8(a); 14987} 14988 14989// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_u32(<4 x i32> %a) #0 { 14990// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16> 14991// CHECK: ret <8 x i16> [[TMP0]] 14992uint16x8_t test_vreinterpretq_u16_u32(uint32x4_t a) { 14993 return vreinterpretq_u16_u32(a); 14994} 14995 14996// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_u64(<2 x i64> %a) #0 { 14997// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> 14998// CHECK: ret <8 x i16> [[TMP0]] 14999uint16x8_t test_vreinterpretq_u16_u64(uint64x2_t a) { 15000 return vreinterpretq_u16_u64(a); 15001} 15002 15003// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_f16(<8 x half> %a) #0 { 15004// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16> 15005// CHECK: ret <8 x i16> [[TMP0]] 15006uint16x8_t test_vreinterpretq_u16_f16(float16x8_t a) { 15007 return vreinterpretq_u16_f16(a); 15008} 15009 15010// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_f32(<4 x float> %a) #0 { 15011// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16> 15012// CHECK: ret <8 x i16> [[TMP0]] 15013uint16x8_t test_vreinterpretq_u16_f32(float32x4_t a) { 15014 return vreinterpretq_u16_f32(a); 15015} 15016 15017// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_p8(<16 x i8> %a) #0 { 15018// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> 15019// CHECK: ret <8 x i16> [[TMP0]] 15020uint16x8_t test_vreinterpretq_u16_p8(poly8x16_t a) { 15021 return vreinterpretq_u16_p8(a); 15022} 15023 15024// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_p16(<8 x i16> %a) #0 { 15025// CHECK: ret <8 x i16> %a 15026uint16x8_t test_vreinterpretq_u16_p16(poly16x8_t a) { 15027 return vreinterpretq_u16_p16(a); 15028} 15029 15030// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_s8(<16 x i8> %a) #0 { 15031// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32> 15032// CHECK: ret <4 x i32> [[TMP0]] 15033uint32x4_t test_vreinterpretq_u32_s8(int8x16_t a) { 15034 return vreinterpretq_u32_s8(a); 15035} 15036 15037// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_s16(<8 x i16> %a) #0 { 15038// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32> 15039// CHECK: ret <4 x i32> [[TMP0]] 15040uint32x4_t test_vreinterpretq_u32_s16(int16x8_t a) { 15041 return vreinterpretq_u32_s16(a); 15042} 15043 15044// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_s32(<4 x i32> %a) #0 { 15045// CHECK: ret <4 x i32> %a 15046uint32x4_t test_vreinterpretq_u32_s32(int32x4_t a) { 15047 return vreinterpretq_u32_s32(a); 15048} 15049 15050// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_s64(<2 x i64> %a) #0 { 15051// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32> 15052// CHECK: ret <4 x i32> [[TMP0]] 15053uint32x4_t test_vreinterpretq_u32_s64(int64x2_t a) { 15054 return vreinterpretq_u32_s64(a); 15055} 15056 15057// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_u8(<16 x i8> %a) #0 { 15058// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32> 15059// CHECK: ret <4 x i32> [[TMP0]] 15060uint32x4_t test_vreinterpretq_u32_u8(uint8x16_t a) { 15061 return vreinterpretq_u32_u8(a); 15062} 15063 15064// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_u16(<8 x i16> %a) #0 { 15065// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32> 15066// CHECK: ret <4 x i32> [[TMP0]] 15067uint32x4_t test_vreinterpretq_u32_u16(uint16x8_t a) { 15068 return vreinterpretq_u32_u16(a); 15069} 15070 15071// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_u64(<2 x i64> %a) #0 { 15072// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32> 15073// CHECK: ret <4 x i32> [[TMP0]] 15074uint32x4_t test_vreinterpretq_u32_u64(uint64x2_t a) { 15075 return vreinterpretq_u32_u64(a); 15076} 15077 15078// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_f16(<8 x half> %a) #0 { 15079// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32> 15080// CHECK: ret <4 x i32> [[TMP0]] 15081uint32x4_t test_vreinterpretq_u32_f16(float16x8_t a) { 15082 return vreinterpretq_u32_f16(a); 15083} 15084 15085// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_f32(<4 x float> %a) #0 { 15086// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32> 15087// CHECK: ret <4 x i32> [[TMP0]] 15088uint32x4_t test_vreinterpretq_u32_f32(float32x4_t a) { 15089 return vreinterpretq_u32_f32(a); 15090} 15091 15092// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_p8(<16 x i8> %a) #0 { 15093// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32> 15094// CHECK: ret <4 x i32> [[TMP0]] 15095uint32x4_t test_vreinterpretq_u32_p8(poly8x16_t a) { 15096 return vreinterpretq_u32_p8(a); 15097} 15098 15099// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_p16(<8 x i16> %a) #0 { 15100// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32> 15101// CHECK: ret <4 x i32> [[TMP0]] 15102uint32x4_t test_vreinterpretq_u32_p16(poly16x8_t a) { 15103 return vreinterpretq_u32_p16(a); 15104} 15105 15106// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_s8(<16 x i8> %a) #0 { 15107// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> 15108// CHECK: ret <2 x i64> [[TMP0]] 15109uint64x2_t test_vreinterpretq_u64_s8(int8x16_t a) { 15110 return vreinterpretq_u64_s8(a); 15111} 15112 15113// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_s16(<8 x i16> %a) #0 { 15114// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> 15115// CHECK: ret <2 x i64> [[TMP0]] 15116uint64x2_t test_vreinterpretq_u64_s16(int16x8_t a) { 15117 return vreinterpretq_u64_s16(a); 15118} 15119 15120// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_s32(<4 x i32> %a) #0 { 15121// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64> 15122// CHECK: ret <2 x i64> [[TMP0]] 15123uint64x2_t test_vreinterpretq_u64_s32(int32x4_t a) { 15124 return vreinterpretq_u64_s32(a); 15125} 15126 15127// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_s64(<2 x i64> %a) #0 { 15128// CHECK: ret <2 x i64> %a 15129uint64x2_t test_vreinterpretq_u64_s64(int64x2_t a) { 15130 return vreinterpretq_u64_s64(a); 15131} 15132 15133// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_u8(<16 x i8> %a) #0 { 15134// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> 15135// CHECK: ret <2 x i64> [[TMP0]] 15136uint64x2_t test_vreinterpretq_u64_u8(uint8x16_t a) { 15137 return vreinterpretq_u64_u8(a); 15138} 15139 15140// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_u16(<8 x i16> %a) #0 { 15141// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> 15142// CHECK: ret <2 x i64> [[TMP0]] 15143uint64x2_t test_vreinterpretq_u64_u16(uint16x8_t a) { 15144 return vreinterpretq_u64_u16(a); 15145} 15146 15147// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_u32(<4 x i32> %a) #0 { 15148// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64> 15149// CHECK: ret <2 x i64> [[TMP0]] 15150uint64x2_t test_vreinterpretq_u64_u32(uint32x4_t a) { 15151 return vreinterpretq_u64_u32(a); 15152} 15153 15154// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_f16(<8 x half> %a) #0 { 15155// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64> 15156// CHECK: ret <2 x i64> [[TMP0]] 15157uint64x2_t test_vreinterpretq_u64_f16(float16x8_t a) { 15158 return vreinterpretq_u64_f16(a); 15159} 15160 15161// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_f32(<4 x float> %a) #0 { 15162// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64> 15163// CHECK: ret <2 x i64> [[TMP0]] 15164uint64x2_t test_vreinterpretq_u64_f32(float32x4_t a) { 15165 return vreinterpretq_u64_f32(a); 15166} 15167 15168// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_p8(<16 x i8> %a) #0 { 15169// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> 15170// CHECK: ret <2 x i64> [[TMP0]] 15171uint64x2_t test_vreinterpretq_u64_p8(poly8x16_t a) { 15172 return vreinterpretq_u64_p8(a); 15173} 15174 15175// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_p16(<8 x i16> %a) #0 { 15176// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> 15177// CHECK: ret <2 x i64> [[TMP0]] 15178uint64x2_t test_vreinterpretq_u64_p16(poly16x8_t a) { 15179 return vreinterpretq_u64_p16(a); 15180} 15181 15182// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_s8(<16 x i8> %a) #0 { 15183// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half> 15184// CHECK: ret <8 x half> [[TMP0]] 15185float16x8_t test_vreinterpretq_f16_s8(int8x16_t a) { 15186 return vreinterpretq_f16_s8(a); 15187} 15188 15189// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_s16(<8 x i16> %a) #0 { 15190// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half> 15191// CHECK: ret <8 x half> [[TMP0]] 15192float16x8_t test_vreinterpretq_f16_s16(int16x8_t a) { 15193 return vreinterpretq_f16_s16(a); 15194} 15195 15196// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_s32(<4 x i32> %a) #0 { 15197// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half> 15198// CHECK: ret <8 x half> [[TMP0]] 15199float16x8_t test_vreinterpretq_f16_s32(int32x4_t a) { 15200 return vreinterpretq_f16_s32(a); 15201} 15202 15203// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_s64(<2 x i64> %a) #0 { 15204// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half> 15205// CHECK: ret <8 x half> [[TMP0]] 15206float16x8_t test_vreinterpretq_f16_s64(int64x2_t a) { 15207 return vreinterpretq_f16_s64(a); 15208} 15209 15210// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_u8(<16 x i8> %a) #0 { 15211// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half> 15212// CHECK: ret <8 x half> [[TMP0]] 15213float16x8_t test_vreinterpretq_f16_u8(uint8x16_t a) { 15214 return vreinterpretq_f16_u8(a); 15215} 15216 15217// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_u16(<8 x i16> %a) #0 { 15218// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half> 15219// CHECK: ret <8 x half> [[TMP0]] 15220float16x8_t test_vreinterpretq_f16_u16(uint16x8_t a) { 15221 return vreinterpretq_f16_u16(a); 15222} 15223 15224// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_u32(<4 x i32> %a) #0 { 15225// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half> 15226// CHECK: ret <8 x half> [[TMP0]] 15227float16x8_t test_vreinterpretq_f16_u32(uint32x4_t a) { 15228 return vreinterpretq_f16_u32(a); 15229} 15230 15231// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_u64(<2 x i64> %a) #0 { 15232// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half> 15233// CHECK: ret <8 x half> [[TMP0]] 15234float16x8_t test_vreinterpretq_f16_u64(uint64x2_t a) { 15235 return vreinterpretq_f16_u64(a); 15236} 15237 15238// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_f32(<4 x float> %a) #0 { 15239// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x half> 15240// CHECK: ret <8 x half> [[TMP0]] 15241float16x8_t test_vreinterpretq_f16_f32(float32x4_t a) { 15242 return vreinterpretq_f16_f32(a); 15243} 15244 15245// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_p8(<16 x i8> %a) #0 { 15246// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half> 15247// CHECK: ret <8 x half> [[TMP0]] 15248float16x8_t test_vreinterpretq_f16_p8(poly8x16_t a) { 15249 return vreinterpretq_f16_p8(a); 15250} 15251 15252// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_p16(<8 x i16> %a) #0 { 15253// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half> 15254// CHECK: ret <8 x half> [[TMP0]] 15255float16x8_t test_vreinterpretq_f16_p16(poly16x8_t a) { 15256 return vreinterpretq_f16_p16(a); 15257} 15258 15259// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_s8(<16 x i8> %a) #0 { 15260// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float> 15261// CHECK: ret <4 x float> [[TMP0]] 15262float32x4_t test_vreinterpretq_f32_s8(int8x16_t a) { 15263 return vreinterpretq_f32_s8(a); 15264} 15265 15266// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_s16(<8 x i16> %a) #0 { 15267// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float> 15268// CHECK: ret <4 x float> [[TMP0]] 15269float32x4_t test_vreinterpretq_f32_s16(int16x8_t a) { 15270 return vreinterpretq_f32_s16(a); 15271} 15272 15273// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_s32(<4 x i32> %a) #0 { 15274// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float> 15275// CHECK: ret <4 x float> [[TMP0]] 15276float32x4_t test_vreinterpretq_f32_s32(int32x4_t a) { 15277 return vreinterpretq_f32_s32(a); 15278} 15279 15280// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_s64(<2 x i64> %a) #0 { 15281// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float> 15282// CHECK: ret <4 x float> [[TMP0]] 15283float32x4_t test_vreinterpretq_f32_s64(int64x2_t a) { 15284 return vreinterpretq_f32_s64(a); 15285} 15286 15287// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_u8(<16 x i8> %a) #0 { 15288// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float> 15289// CHECK: ret <4 x float> [[TMP0]] 15290float32x4_t test_vreinterpretq_f32_u8(uint8x16_t a) { 15291 return vreinterpretq_f32_u8(a); 15292} 15293 15294// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_u16(<8 x i16> %a) #0 { 15295// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float> 15296// CHECK: ret <4 x float> [[TMP0]] 15297float32x4_t test_vreinterpretq_f32_u16(uint16x8_t a) { 15298 return vreinterpretq_f32_u16(a); 15299} 15300 15301// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_u32(<4 x i32> %a) #0 { 15302// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float> 15303// CHECK: ret <4 x float> [[TMP0]] 15304float32x4_t test_vreinterpretq_f32_u32(uint32x4_t a) { 15305 return vreinterpretq_f32_u32(a); 15306} 15307 15308// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_u64(<2 x i64> %a) #0 { 15309// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float> 15310// CHECK: ret <4 x float> [[TMP0]] 15311float32x4_t test_vreinterpretq_f32_u64(uint64x2_t a) { 15312 return vreinterpretq_f32_u64(a); 15313} 15314 15315// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_f16(<8 x half> %a) #0 { 15316// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x float> 15317// CHECK: ret <4 x float> [[TMP0]] 15318float32x4_t test_vreinterpretq_f32_f16(float16x8_t a) { 15319 return vreinterpretq_f32_f16(a); 15320} 15321 15322// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_p8(<16 x i8> %a) #0 { 15323// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float> 15324// CHECK: ret <4 x float> [[TMP0]] 15325float32x4_t test_vreinterpretq_f32_p8(poly8x16_t a) { 15326 return vreinterpretq_f32_p8(a); 15327} 15328 15329// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_p16(<8 x i16> %a) #0 { 15330// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float> 15331// CHECK: ret <4 x float> [[TMP0]] 15332float32x4_t test_vreinterpretq_f32_p16(poly16x8_t a) { 15333 return vreinterpretq_f32_p16(a); 15334} 15335 15336// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_s8(<16 x i8> %a) #0 { 15337// CHECK: ret <16 x i8> %a 15338poly8x16_t test_vreinterpretq_p8_s8(int8x16_t a) { 15339 return vreinterpretq_p8_s8(a); 15340} 15341 15342// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_s16(<8 x i16> %a) #0 { 15343// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 15344// CHECK: ret <16 x i8> [[TMP0]] 15345poly8x16_t test_vreinterpretq_p8_s16(int16x8_t a) { 15346 return vreinterpretq_p8_s16(a); 15347} 15348 15349// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_s32(<4 x i32> %a) #0 { 15350// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 15351// CHECK: ret <16 x i8> [[TMP0]] 15352poly8x16_t test_vreinterpretq_p8_s32(int32x4_t a) { 15353 return vreinterpretq_p8_s32(a); 15354} 15355 15356// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_s64(<2 x i64> %a) #0 { 15357// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 15358// CHECK: ret <16 x i8> [[TMP0]] 15359poly8x16_t test_vreinterpretq_p8_s64(int64x2_t a) { 15360 return vreinterpretq_p8_s64(a); 15361} 15362 15363// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_u8(<16 x i8> %a) #0 { 15364// CHECK: ret <16 x i8> %a 15365poly8x16_t test_vreinterpretq_p8_u8(uint8x16_t a) { 15366 return vreinterpretq_p8_u8(a); 15367} 15368 15369// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_u16(<8 x i16> %a) #0 { 15370// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 15371// CHECK: ret <16 x i8> [[TMP0]] 15372poly8x16_t test_vreinterpretq_p8_u16(uint16x8_t a) { 15373 return vreinterpretq_p8_u16(a); 15374} 15375 15376// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_u32(<4 x i32> %a) #0 { 15377// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 15378// CHECK: ret <16 x i8> [[TMP0]] 15379poly8x16_t test_vreinterpretq_p8_u32(uint32x4_t a) { 15380 return vreinterpretq_p8_u32(a); 15381} 15382 15383// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_u64(<2 x i64> %a) #0 { 15384// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 15385// CHECK: ret <16 x i8> [[TMP0]] 15386poly8x16_t test_vreinterpretq_p8_u64(uint64x2_t a) { 15387 return vreinterpretq_p8_u64(a); 15388} 15389 15390// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_f16(<8 x half> %a) #0 { 15391// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8> 15392// CHECK: ret <16 x i8> [[TMP0]] 15393poly8x16_t test_vreinterpretq_p8_f16(float16x8_t a) { 15394 return vreinterpretq_p8_f16(a); 15395} 15396 15397// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_f32(<4 x float> %a) #0 { 15398// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 15399// CHECK: ret <16 x i8> [[TMP0]] 15400poly8x16_t test_vreinterpretq_p8_f32(float32x4_t a) { 15401 return vreinterpretq_p8_f32(a); 15402} 15403 15404// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_p16(<8 x i16> %a) #0 { 15405// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 15406// CHECK: ret <16 x i8> [[TMP0]] 15407poly8x16_t test_vreinterpretq_p8_p16(poly16x8_t a) { 15408 return vreinterpretq_p8_p16(a); 15409} 15410 15411// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_s8(<16 x i8> %a) #0 { 15412// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> 15413// CHECK: ret <8 x i16> [[TMP0]] 15414poly16x8_t test_vreinterpretq_p16_s8(int8x16_t a) { 15415 return vreinterpretq_p16_s8(a); 15416} 15417 15418// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_s16(<8 x i16> %a) #0 { 15419// CHECK: ret <8 x i16> %a 15420poly16x8_t test_vreinterpretq_p16_s16(int16x8_t a) { 15421 return vreinterpretq_p16_s16(a); 15422} 15423 15424// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_s32(<4 x i32> %a) #0 { 15425// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16> 15426// CHECK: ret <8 x i16> [[TMP0]] 15427poly16x8_t test_vreinterpretq_p16_s32(int32x4_t a) { 15428 return vreinterpretq_p16_s32(a); 15429} 15430 15431// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_s64(<2 x i64> %a) #0 { 15432// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> 15433// CHECK: ret <8 x i16> [[TMP0]] 15434poly16x8_t test_vreinterpretq_p16_s64(int64x2_t a) { 15435 return vreinterpretq_p16_s64(a); 15436} 15437 15438// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_u8(<16 x i8> %a) #0 { 15439// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> 15440// CHECK: ret <8 x i16> [[TMP0]] 15441poly16x8_t test_vreinterpretq_p16_u8(uint8x16_t a) { 15442 return vreinterpretq_p16_u8(a); 15443} 15444 15445// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_u16(<8 x i16> %a) #0 { 15446// CHECK: ret <8 x i16> %a 15447poly16x8_t test_vreinterpretq_p16_u16(uint16x8_t a) { 15448 return vreinterpretq_p16_u16(a); 15449} 15450 15451// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_u32(<4 x i32> %a) #0 { 15452// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16> 15453// CHECK: ret <8 x i16> [[TMP0]] 15454poly16x8_t test_vreinterpretq_p16_u32(uint32x4_t a) { 15455 return vreinterpretq_p16_u32(a); 15456} 15457 15458// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_u64(<2 x i64> %a) #0 { 15459// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> 15460// CHECK: ret <8 x i16> [[TMP0]] 15461poly16x8_t test_vreinterpretq_p16_u64(uint64x2_t a) { 15462 return vreinterpretq_p16_u64(a); 15463} 15464 15465// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_f16(<8 x half> %a) #0 { 15466// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16> 15467// CHECK: ret <8 x i16> [[TMP0]] 15468poly16x8_t test_vreinterpretq_p16_f16(float16x8_t a) { 15469 return vreinterpretq_p16_f16(a); 15470} 15471 15472// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_f32(<4 x float> %a) #0 { 15473// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16> 15474// CHECK: ret <8 x i16> [[TMP0]] 15475poly16x8_t test_vreinterpretq_p16_f32(float32x4_t a) { 15476 return vreinterpretq_p16_f32(a); 15477} 15478 15479// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_p8(<16 x i8> %a) #0 { 15480// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> 15481// CHECK: ret <8 x i16> [[TMP0]] 15482poly16x8_t test_vreinterpretq_p16_p8(poly8x16_t a) { 15483 return vreinterpretq_p16_p8(a); 15484} 15485 15486 15487// CHECK-LABEL: define <8 x i8> @test_vrev16_s8(<8 x i8> %a) #0 { 15488// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> 15489// CHECK: ret <8 x i8> [[SHUFFLE_I]] 15490int8x8_t test_vrev16_s8(int8x8_t a) { 15491 return vrev16_s8(a); 15492} 15493 15494// CHECK-LABEL: define <8 x i8> @test_vrev16_u8(<8 x i8> %a) #0 { 15495// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> 15496// CHECK: ret <8 x i8> [[SHUFFLE_I]] 15497uint8x8_t test_vrev16_u8(uint8x8_t a) { 15498 return vrev16_u8(a); 15499} 15500 15501// CHECK-LABEL: define <8 x i8> @test_vrev16_p8(<8 x i8> %a) #0 { 15502// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> 15503// CHECK: ret <8 x i8> [[SHUFFLE_I]] 15504poly8x8_t test_vrev16_p8(poly8x8_t a) { 15505 return vrev16_p8(a); 15506} 15507 15508// CHECK-LABEL: define <16 x i8> @test_vrev16q_s8(<16 x i8> %a) #0 { 15509// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14> 15510// CHECK: ret <16 x i8> [[SHUFFLE_I]] 15511int8x16_t test_vrev16q_s8(int8x16_t a) { 15512 return vrev16q_s8(a); 15513} 15514 15515// CHECK-LABEL: define <16 x i8> @test_vrev16q_u8(<16 x i8> %a) #0 { 15516// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14> 15517// CHECK: ret <16 x i8> [[SHUFFLE_I]] 15518uint8x16_t test_vrev16q_u8(uint8x16_t a) { 15519 return vrev16q_u8(a); 15520} 15521 15522// CHECK-LABEL: define <16 x i8> @test_vrev16q_p8(<16 x i8> %a) #0 { 15523// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14> 15524// CHECK: ret <16 x i8> [[SHUFFLE_I]] 15525poly8x16_t test_vrev16q_p8(poly8x16_t a) { 15526 return vrev16q_p8(a); 15527} 15528 15529 15530// CHECK-LABEL: define <8 x i8> @test_vrev32_s8(<8 x i8> %a) #0 { 15531// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 15532// CHECK: ret <8 x i8> [[SHUFFLE_I]] 15533int8x8_t test_vrev32_s8(int8x8_t a) { 15534 return vrev32_s8(a); 15535} 15536 15537// CHECK-LABEL: define <4 x i16> @test_vrev32_s16(<4 x i16> %a) #0 { 15538// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2> 15539// CHECK: ret <4 x i16> [[SHUFFLE_I]] 15540int16x4_t test_vrev32_s16(int16x4_t a) { 15541 return vrev32_s16(a); 15542} 15543 15544// CHECK-LABEL: define <8 x i8> @test_vrev32_u8(<8 x i8> %a) #0 { 15545// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 15546// CHECK: ret <8 x i8> [[SHUFFLE_I]] 15547uint8x8_t test_vrev32_u8(uint8x8_t a) { 15548 return vrev32_u8(a); 15549} 15550 15551// CHECK-LABEL: define <4 x i16> @test_vrev32_u16(<4 x i16> %a) #0 { 15552// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2> 15553// CHECK: ret <4 x i16> [[SHUFFLE_I]] 15554uint16x4_t test_vrev32_u16(uint16x4_t a) { 15555 return vrev32_u16(a); 15556} 15557 15558// CHECK-LABEL: define <8 x i8> @test_vrev32_p8(<8 x i8> %a) #0 { 15559// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 15560// CHECK: ret <8 x i8> [[SHUFFLE_I]] 15561poly8x8_t test_vrev32_p8(poly8x8_t a) { 15562 return vrev32_p8(a); 15563} 15564 15565// CHECK-LABEL: define <4 x i16> @test_vrev32_p16(<4 x i16> %a) #0 { 15566// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2> 15567// CHECK: ret <4 x i16> [[SHUFFLE_I]] 15568poly16x4_t test_vrev32_p16(poly16x4_t a) { 15569 return vrev32_p16(a); 15570} 15571 15572// CHECK-LABEL: define <16 x i8> @test_vrev32q_s8(<16 x i8> %a) #0 { 15573// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12> 15574// CHECK: ret <16 x i8> [[SHUFFLE_I]] 15575int8x16_t test_vrev32q_s8(int8x16_t a) { 15576 return vrev32q_s8(a); 15577} 15578 15579// CHECK-LABEL: define <8 x i16> @test_vrev32q_s16(<8 x i16> %a) #0 { 15580// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> 15581// CHECK: ret <8 x i16> [[SHUFFLE_I]] 15582int16x8_t test_vrev32q_s16(int16x8_t a) { 15583 return vrev32q_s16(a); 15584} 15585 15586// CHECK-LABEL: define <16 x i8> @test_vrev32q_u8(<16 x i8> %a) #0 { 15587// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12> 15588// CHECK: ret <16 x i8> [[SHUFFLE_I]] 15589uint8x16_t test_vrev32q_u8(uint8x16_t a) { 15590 return vrev32q_u8(a); 15591} 15592 15593// CHECK-LABEL: define <8 x i16> @test_vrev32q_u16(<8 x i16> %a) #0 { 15594// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> 15595// CHECK: ret <8 x i16> [[SHUFFLE_I]] 15596uint16x8_t test_vrev32q_u16(uint16x8_t a) { 15597 return vrev32q_u16(a); 15598} 15599 15600// CHECK-LABEL: define <16 x i8> @test_vrev32q_p8(<16 x i8> %a) #0 { 15601// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12> 15602// CHECK: ret <16 x i8> [[SHUFFLE_I]] 15603poly8x16_t test_vrev32q_p8(poly8x16_t a) { 15604 return vrev32q_p8(a); 15605} 15606 15607// CHECK-LABEL: define <8 x i16> @test_vrev32q_p16(<8 x i16> %a) #0 { 15608// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> 15609// CHECK: ret <8 x i16> [[SHUFFLE_I]] 15610poly16x8_t test_vrev32q_p16(poly16x8_t a) { 15611 return vrev32q_p16(a); 15612} 15613 15614 15615// CHECK-LABEL: define <8 x i8> @test_vrev64_s8(<8 x i8> %a) #0 { 15616// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 15617// CHECK: ret <8 x i8> [[SHUFFLE_I]] 15618int8x8_t test_vrev64_s8(int8x8_t a) { 15619 return vrev64_s8(a); 15620} 15621 15622// CHECK-LABEL: define <4 x i16> @test_vrev64_s16(<4 x i16> %a) #0 { 15623// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 15624// CHECK: ret <4 x i16> [[SHUFFLE_I]] 15625int16x4_t test_vrev64_s16(int16x4_t a) { 15626 return vrev64_s16(a); 15627} 15628 15629// CHECK-LABEL: define <2 x i32> @test_vrev64_s32(<2 x i32> %a) #0 { 15630// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %a, <2 x i32> <i32 1, i32 0> 15631// CHECK: ret <2 x i32> [[SHUFFLE_I]] 15632int32x2_t test_vrev64_s32(int32x2_t a) { 15633 return vrev64_s32(a); 15634} 15635 15636// CHECK-LABEL: define <8 x i8> @test_vrev64_u8(<8 x i8> %a) #0 { 15637// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 15638// CHECK: ret <8 x i8> [[SHUFFLE_I]] 15639uint8x8_t test_vrev64_u8(uint8x8_t a) { 15640 return vrev64_u8(a); 15641} 15642 15643// CHECK-LABEL: define <4 x i16> @test_vrev64_u16(<4 x i16> %a) #0 { 15644// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 15645// CHECK: ret <4 x i16> [[SHUFFLE_I]] 15646uint16x4_t test_vrev64_u16(uint16x4_t a) { 15647 return vrev64_u16(a); 15648} 15649 15650// CHECK-LABEL: define <2 x i32> @test_vrev64_u32(<2 x i32> %a) #0 { 15651// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %a, <2 x i32> <i32 1, i32 0> 15652// CHECK: ret <2 x i32> [[SHUFFLE_I]] 15653uint32x2_t test_vrev64_u32(uint32x2_t a) { 15654 return vrev64_u32(a); 15655} 15656 15657// CHECK-LABEL: define <8 x i8> @test_vrev64_p8(<8 x i8> %a) #0 { 15658// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 15659// CHECK: ret <8 x i8> [[SHUFFLE_I]] 15660poly8x8_t test_vrev64_p8(poly8x8_t a) { 15661 return vrev64_p8(a); 15662} 15663 15664// CHECK-LABEL: define <4 x i16> @test_vrev64_p16(<4 x i16> %a) #0 { 15665// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 15666// CHECK: ret <4 x i16> [[SHUFFLE_I]] 15667poly16x4_t test_vrev64_p16(poly16x4_t a) { 15668 return vrev64_p16(a); 15669} 15670 15671// CHECK-LABEL: define <2 x float> @test_vrev64_f32(<2 x float> %a) #0 { 15672// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %a, <2 x i32> <i32 1, i32 0> 15673// CHECK: ret <2 x float> [[SHUFFLE_I]] 15674float32x2_t test_vrev64_f32(float32x2_t a) { 15675 return vrev64_f32(a); 15676} 15677 15678// CHECK-LABEL: define <16 x i8> @test_vrev64q_s8(<16 x i8> %a) #0 { 15679// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8> 15680// CHECK: ret <16 x i8> [[SHUFFLE_I]] 15681int8x16_t test_vrev64q_s8(int8x16_t a) { 15682 return vrev64q_s8(a); 15683} 15684 15685// CHECK-LABEL: define <8 x i16> @test_vrev64q_s16(<8 x i16> %a) #0 { 15686// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 15687// CHECK: ret <8 x i16> [[SHUFFLE_I]] 15688int16x8_t test_vrev64q_s16(int16x8_t a) { 15689 return vrev64q_s16(a); 15690} 15691 15692// CHECK-LABEL: define <4 x i32> @test_vrev64q_s32(<4 x i32> %a) #0 { 15693// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2> 15694// CHECK: ret <4 x i32> [[SHUFFLE_I]] 15695int32x4_t test_vrev64q_s32(int32x4_t a) { 15696 return vrev64q_s32(a); 15697} 15698 15699// CHECK-LABEL: define <16 x i8> @test_vrev64q_u8(<16 x i8> %a) #0 { 15700// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8> 15701// CHECK: ret <16 x i8> [[SHUFFLE_I]] 15702uint8x16_t test_vrev64q_u8(uint8x16_t a) { 15703 return vrev64q_u8(a); 15704} 15705 15706// CHECK-LABEL: define <8 x i16> @test_vrev64q_u16(<8 x i16> %a) #0 { 15707// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 15708// CHECK: ret <8 x i16> [[SHUFFLE_I]] 15709uint16x8_t test_vrev64q_u16(uint16x8_t a) { 15710 return vrev64q_u16(a); 15711} 15712 15713// CHECK-LABEL: define <4 x i32> @test_vrev64q_u32(<4 x i32> %a) #0 { 15714// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2> 15715// CHECK: ret <4 x i32> [[SHUFFLE_I]] 15716uint32x4_t test_vrev64q_u32(uint32x4_t a) { 15717 return vrev64q_u32(a); 15718} 15719 15720// CHECK-LABEL: define <16 x i8> @test_vrev64q_p8(<16 x i8> %a) #0 { 15721// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8> 15722// CHECK: ret <16 x i8> [[SHUFFLE_I]] 15723poly8x16_t test_vrev64q_p8(poly8x16_t a) { 15724 return vrev64q_p8(a); 15725} 15726 15727// CHECK-LABEL: define <8 x i16> @test_vrev64q_p16(<8 x i16> %a) #0 { 15728// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 15729// CHECK: ret <8 x i16> [[SHUFFLE_I]] 15730poly16x8_t test_vrev64q_p16(poly16x8_t a) { 15731 return vrev64q_p16(a); 15732} 15733 15734// CHECK-LABEL: define <4 x float> @test_vrev64q_f32(<4 x float> %a) #0 { 15735// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2> 15736// CHECK: ret <4 x float> [[SHUFFLE_I]] 15737float32x4_t test_vrev64q_f32(float32x4_t a) { 15738 return vrev64q_f32(a); 15739} 15740 15741 15742// CHECK-LABEL: define <8 x i8> @test_vrhadd_s8(<8 x i8> %a, <8 x i8> %b) #0 { 15743// CHECK: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8> %a, <8 x i8> %b) #4 15744// CHECK: ret <8 x i8> [[VRHADD_V_I]] 15745int8x8_t test_vrhadd_s8(int8x8_t a, int8x8_t b) { 15746 return vrhadd_s8(a, b); 15747} 15748 15749// CHECK-LABEL: define <4 x i16> @test_vrhadd_s16(<4 x i16> %a, <4 x i16> %b) #0 { 15750// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 15751// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 15752// CHECK: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 15753// CHECK: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 15754// CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16> [[VRHADD_V_I]], <4 x i16> [[VRHADD_V1_I]]) #4 15755// CHECK: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8> 15756// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <4 x i16> 15757// CHECK: ret <4 x i16> [[TMP2]] 15758int16x4_t test_vrhadd_s16(int16x4_t a, int16x4_t b) { 15759 return vrhadd_s16(a, b); 15760} 15761 15762// CHECK-LABEL: define <2 x i32> @test_vrhadd_s32(<2 x i32> %a, <2 x i32> %b) #0 { 15763// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 15764// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 15765// CHECK: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 15766// CHECK: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 15767// CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32> [[VRHADD_V_I]], <2 x i32> [[VRHADD_V1_I]]) #4 15768// CHECK: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8> 15769// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <2 x i32> 15770// CHECK: ret <2 x i32> [[TMP2]] 15771int32x2_t test_vrhadd_s32(int32x2_t a, int32x2_t b) { 15772 return vrhadd_s32(a, b); 15773} 15774 15775// CHECK-LABEL: define <8 x i8> @test_vrhadd_u8(<8 x i8> %a, <8 x i8> %b) #0 { 15776// CHECK: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 15777// CHECK: ret <8 x i8> [[VRHADD_V_I]] 15778uint8x8_t test_vrhadd_u8(uint8x8_t a, uint8x8_t b) { 15779 return vrhadd_u8(a, b); 15780} 15781 15782// CHECK-LABEL: define <4 x i16> @test_vrhadd_u16(<4 x i16> %a, <4 x i16> %b) #0 { 15783// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 15784// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 15785// CHECK: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 15786// CHECK: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 15787// CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16> [[VRHADD_V_I]], <4 x i16> [[VRHADD_V1_I]]) #4 15788// CHECK: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8> 15789// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <4 x i16> 15790// CHECK: ret <4 x i16> [[TMP2]] 15791uint16x4_t test_vrhadd_u16(uint16x4_t a, uint16x4_t b) { 15792 return vrhadd_u16(a, b); 15793} 15794 15795// CHECK-LABEL: define <2 x i32> @test_vrhadd_u32(<2 x i32> %a, <2 x i32> %b) #0 { 15796// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 15797// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 15798// CHECK: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 15799// CHECK: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 15800// CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32> [[VRHADD_V_I]], <2 x i32> [[VRHADD_V1_I]]) #4 15801// CHECK: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8> 15802// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <2 x i32> 15803// CHECK: ret <2 x i32> [[TMP2]] 15804uint32x2_t test_vrhadd_u32(uint32x2_t a, uint32x2_t b) { 15805 return vrhadd_u32(a, b); 15806} 15807 15808// CHECK-LABEL: define <16 x i8> @test_vrhaddq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 15809// CHECK: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8> %a, <16 x i8> %b) #4 15810// CHECK: ret <16 x i8> [[VRHADDQ_V_I]] 15811int8x16_t test_vrhaddq_s8(int8x16_t a, int8x16_t b) { 15812 return vrhaddq_s8(a, b); 15813} 15814 15815// CHECK-LABEL: define <8 x i16> @test_vrhaddq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 15816// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 15817// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 15818// CHECK: [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 15819// CHECK: [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 15820// CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16> [[VRHADDQ_V_I]], <8 x i16> [[VRHADDQ_V1_I]]) #4 15821// CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8> 15822// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <8 x i16> 15823// CHECK: ret <8 x i16> [[TMP2]] 15824int16x8_t test_vrhaddq_s16(int16x8_t a, int16x8_t b) { 15825 return vrhaddq_s16(a, b); 15826} 15827 15828// CHECK-LABEL: define <4 x i32> @test_vrhaddq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 15829// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 15830// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 15831// CHECK: [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 15832// CHECK: [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 15833// CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32> [[VRHADDQ_V_I]], <4 x i32> [[VRHADDQ_V1_I]]) #4 15834// CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8> 15835// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <4 x i32> 15836// CHECK: ret <4 x i32> [[TMP2]] 15837int32x4_t test_vrhaddq_s32(int32x4_t a, int32x4_t b) { 15838 return vrhaddq_s32(a, b); 15839} 15840 15841// CHECK-LABEL: define <16 x i8> @test_vrhaddq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 15842// CHECK: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8> %a, <16 x i8> %b) #4 15843// CHECK: ret <16 x i8> [[VRHADDQ_V_I]] 15844uint8x16_t test_vrhaddq_u8(uint8x16_t a, uint8x16_t b) { 15845 return vrhaddq_u8(a, b); 15846} 15847 15848// CHECK-LABEL: define <8 x i16> @test_vrhaddq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 15849// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 15850// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 15851// CHECK: [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 15852// CHECK: [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 15853// CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16> [[VRHADDQ_V_I]], <8 x i16> [[VRHADDQ_V1_I]]) #4 15854// CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8> 15855// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <8 x i16> 15856// CHECK: ret <8 x i16> [[TMP2]] 15857uint16x8_t test_vrhaddq_u16(uint16x8_t a, uint16x8_t b) { 15858 return vrhaddq_u16(a, b); 15859} 15860 15861// CHECK-LABEL: define <4 x i32> @test_vrhaddq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 15862// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 15863// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 15864// CHECK: [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 15865// CHECK: [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 15866// CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32> [[VRHADDQ_V_I]], <4 x i32> [[VRHADDQ_V1_I]]) #4 15867// CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8> 15868// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <4 x i32> 15869// CHECK: ret <4 x i32> [[TMP2]] 15870uint32x4_t test_vrhaddq_u32(uint32x4_t a, uint32x4_t b) { 15871 return vrhaddq_u32(a, b); 15872} 15873 15874 15875// CHECK-LABEL: define <8 x i8> @test_vrshl_s8(<8 x i8> %a, <8 x i8> %b) #0 { 15876// CHECK: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %a, <8 x i8> %b) #4 15877// CHECK: ret <8 x i8> [[VRSHL_V_I]] 15878int8x8_t test_vrshl_s8(int8x8_t a, int8x8_t b) { 15879 return vrshl_s8(a, b); 15880} 15881 15882// CHECK-LABEL: define <4 x i16> @test_vrshl_s16(<4 x i16> %a, <4 x i16> %b) #0 { 15883// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 15884// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 15885// CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 15886// CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 15887// CHECK: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> [[VRSHL_V_I]], <4 x i16> [[VRSHL_V1_I]]) #4 15888// CHECK: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8> 15889// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <4 x i16> 15890// CHECK: ret <4 x i16> [[TMP2]] 15891int16x4_t test_vrshl_s16(int16x4_t a, int16x4_t b) { 15892 return vrshl_s16(a, b); 15893} 15894 15895// CHECK-LABEL: define <2 x i32> @test_vrshl_s32(<2 x i32> %a, <2 x i32> %b) #0 { 15896// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 15897// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 15898// CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 15899// CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 15900// CHECK: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> [[VRSHL_V_I]], <2 x i32> [[VRSHL_V1_I]]) #4 15901// CHECK: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8> 15902// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <2 x i32> 15903// CHECK: ret <2 x i32> [[TMP2]] 15904int32x2_t test_vrshl_s32(int32x2_t a, int32x2_t b) { 15905 return vrshl_s32(a, b); 15906} 15907 15908// CHECK-LABEL: define <1 x i64> @test_vrshl_s64(<1 x i64> %a, <1 x i64> %b) #0 { 15909// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 15910// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 15911// CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 15912// CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 15913// CHECK: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> [[VRSHL_V_I]], <1 x i64> [[VRSHL_V1_I]]) #4 15914// CHECK: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8> 15915// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <1 x i64> 15916// CHECK: ret <1 x i64> [[TMP2]] 15917int64x1_t test_vrshl_s64(int64x1_t a, int64x1_t b) { 15918 return vrshl_s64(a, b); 15919} 15920 15921// CHECK-LABEL: define <8 x i8> @test_vrshl_u8(<8 x i8> %a, <8 x i8> %b) #0 { 15922// CHECK: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 15923// CHECK: ret <8 x i8> [[VRSHL_V_I]] 15924uint8x8_t test_vrshl_u8(uint8x8_t a, int8x8_t b) { 15925 return vrshl_u8(a, b); 15926} 15927 15928// CHECK-LABEL: define <4 x i16> @test_vrshl_u16(<4 x i16> %a, <4 x i16> %b) #0 { 15929// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 15930// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 15931// CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 15932// CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 15933// CHECK: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> [[VRSHL_V_I]], <4 x i16> [[VRSHL_V1_I]]) #4 15934// CHECK: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8> 15935// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <4 x i16> 15936// CHECK: ret <4 x i16> [[TMP2]] 15937uint16x4_t test_vrshl_u16(uint16x4_t a, int16x4_t b) { 15938 return vrshl_u16(a, b); 15939} 15940 15941// CHECK-LABEL: define <2 x i32> @test_vrshl_u32(<2 x i32> %a, <2 x i32> %b) #0 { 15942// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 15943// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 15944// CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 15945// CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 15946// CHECK: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> [[VRSHL_V_I]], <2 x i32> [[VRSHL_V1_I]]) #4 15947// CHECK: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8> 15948// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <2 x i32> 15949// CHECK: ret <2 x i32> [[TMP2]] 15950uint32x2_t test_vrshl_u32(uint32x2_t a, int32x2_t b) { 15951 return vrshl_u32(a, b); 15952} 15953 15954// CHECK-LABEL: define <1 x i64> @test_vrshl_u64(<1 x i64> %a, <1 x i64> %b) #0 { 15955// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 15956// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 15957// CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 15958// CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 15959// CHECK: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> [[VRSHL_V_I]], <1 x i64> [[VRSHL_V1_I]]) #4 15960// CHECK: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8> 15961// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <1 x i64> 15962// CHECK: ret <1 x i64> [[TMP2]] 15963uint64x1_t test_vrshl_u64(uint64x1_t a, int64x1_t b) { 15964 return vrshl_u64(a, b); 15965} 15966 15967// CHECK-LABEL: define <16 x i8> @test_vrshlq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 15968// CHECK: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %a, <16 x i8> %b) #4 15969// CHECK: ret <16 x i8> [[VRSHLQ_V_I]] 15970int8x16_t test_vrshlq_s8(int8x16_t a, int8x16_t b) { 15971 return vrshlq_s8(a, b); 15972} 15973 15974// CHECK-LABEL: define <8 x i16> @test_vrshlq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 15975// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 15976// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 15977// CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 15978// CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 15979// CHECK: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> [[VRSHLQ_V_I]], <8 x i16> [[VRSHLQ_V1_I]]) #4 15980// CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8> 15981// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <8 x i16> 15982// CHECK: ret <8 x i16> [[TMP2]] 15983int16x8_t test_vrshlq_s16(int16x8_t a, int16x8_t b) { 15984 return vrshlq_s16(a, b); 15985} 15986 15987// CHECK-LABEL: define <4 x i32> @test_vrshlq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 15988// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 15989// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 15990// CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 15991// CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 15992// CHECK: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> [[VRSHLQ_V_I]], <4 x i32> [[VRSHLQ_V1_I]]) #4 15993// CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8> 15994// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <4 x i32> 15995// CHECK: ret <4 x i32> [[TMP2]] 15996int32x4_t test_vrshlq_s32(int32x4_t a, int32x4_t b) { 15997 return vrshlq_s32(a, b); 15998} 15999 16000// CHECK-LABEL: define <2 x i64> @test_vrshlq_s64(<2 x i64> %a, <2 x i64> %b) #0 { 16001// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 16002// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 16003// CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 16004// CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 16005// CHECK: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> [[VRSHLQ_V_I]], <2 x i64> [[VRSHLQ_V1_I]]) #4 16006// CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8> 16007// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <2 x i64> 16008// CHECK: ret <2 x i64> [[TMP2]] 16009int64x2_t test_vrshlq_s64(int64x2_t a, int64x2_t b) { 16010 return vrshlq_s64(a, b); 16011} 16012 16013// CHECK-LABEL: define <16 x i8> @test_vrshlq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 16014// CHECK: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %a, <16 x i8> %b) #4 16015// CHECK: ret <16 x i8> [[VRSHLQ_V_I]] 16016uint8x16_t test_vrshlq_u8(uint8x16_t a, int8x16_t b) { 16017 return vrshlq_u8(a, b); 16018} 16019 16020// CHECK-LABEL: define <8 x i16> @test_vrshlq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 16021// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 16022// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 16023// CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 16024// CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 16025// CHECK: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> [[VRSHLQ_V_I]], <8 x i16> [[VRSHLQ_V1_I]]) #4 16026// CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8> 16027// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <8 x i16> 16028// CHECK: ret <8 x i16> [[TMP2]] 16029uint16x8_t test_vrshlq_u16(uint16x8_t a, int16x8_t b) { 16030 return vrshlq_u16(a, b); 16031} 16032 16033// CHECK-LABEL: define <4 x i32> @test_vrshlq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 16034// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 16035// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 16036// CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 16037// CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 16038// CHECK: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> [[VRSHLQ_V_I]], <4 x i32> [[VRSHLQ_V1_I]]) #4 16039// CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8> 16040// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <4 x i32> 16041// CHECK: ret <4 x i32> [[TMP2]] 16042uint32x4_t test_vrshlq_u32(uint32x4_t a, int32x4_t b) { 16043 return vrshlq_u32(a, b); 16044} 16045 16046// CHECK-LABEL: define <2 x i64> @test_vrshlq_u64(<2 x i64> %a, <2 x i64> %b) #0 { 16047// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 16048// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 16049// CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 16050// CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 16051// CHECK: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> [[VRSHLQ_V_I]], <2 x i64> [[VRSHLQ_V1_I]]) #4 16052// CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8> 16053// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <2 x i64> 16054// CHECK: ret <2 x i64> [[TMP2]] 16055uint64x2_t test_vrshlq_u64(uint64x2_t a, int64x2_t b) { 16056 return vrshlq_u64(a, b); 16057} 16058 16059 16060// CHECK-LABEL: define <8 x i8> @test_vrshrn_n_s16(<8 x i16> %a) #0 { 16061// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 16062// CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 16063// CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftn.v8i8(<8 x i16> [[VRSHRN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>) 16064// CHECK: ret <8 x i8> [[VRSHRN_N1]] 16065int8x8_t test_vrshrn_n_s16(int16x8_t a) { 16066 return vrshrn_n_s16(a, 1); 16067} 16068 16069// CHECK-LABEL: define <4 x i16> @test_vrshrn_n_s32(<4 x i32> %a) #0 { 16070// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 16071// CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 16072// CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32> [[VRSHRN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>) 16073// CHECK: ret <4 x i16> [[VRSHRN_N1]] 16074int16x4_t test_vrshrn_n_s32(int32x4_t a) { 16075 return vrshrn_n_s32(a, 1); 16076} 16077 16078// CHECK-LABEL: define <2 x i32> @test_vrshrn_n_s64(<2 x i64> %a) #0 { 16079// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 16080// CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 16081// CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64> [[VRSHRN_N]], <2 x i64> <i64 -1, i64 -1>) 16082// CHECK: ret <2 x i32> [[VRSHRN_N1]] 16083int32x2_t test_vrshrn_n_s64(int64x2_t a) { 16084 return vrshrn_n_s64(a, 1); 16085} 16086 16087// CHECK-LABEL: define <8 x i8> @test_vrshrn_n_u16(<8 x i16> %a) #0 { 16088// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 16089// CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 16090// CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftn.v8i8(<8 x i16> [[VRSHRN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>) 16091// CHECK: ret <8 x i8> [[VRSHRN_N1]] 16092uint8x8_t test_vrshrn_n_u16(uint16x8_t a) { 16093 return vrshrn_n_u16(a, 1); 16094} 16095 16096// CHECK-LABEL: define <4 x i16> @test_vrshrn_n_u32(<4 x i32> %a) #0 { 16097// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 16098// CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 16099// CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32> [[VRSHRN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>) 16100// CHECK: ret <4 x i16> [[VRSHRN_N1]] 16101uint16x4_t test_vrshrn_n_u32(uint32x4_t a) { 16102 return vrshrn_n_u32(a, 1); 16103} 16104 16105// CHECK-LABEL: define <2 x i32> @test_vrshrn_n_u64(<2 x i64> %a) #0 { 16106// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 16107// CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 16108// CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64> [[VRSHRN_N]], <2 x i64> <i64 -1, i64 -1>) 16109// CHECK: ret <2 x i32> [[VRSHRN_N1]] 16110uint32x2_t test_vrshrn_n_u64(uint64x2_t a) { 16111 return vrshrn_n_u64(a, 1); 16112} 16113 16114 16115// CHECK-LABEL: define <8 x i8> @test_vrshr_n_s8(<8 x i8> %a) #0 { 16116// CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %a, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 16117// CHECK: ret <8 x i8> [[VRSHR_N]] 16118int8x8_t test_vrshr_n_s8(int8x8_t a) { 16119 return vrshr_n_s8(a, 1); 16120} 16121 16122// CHECK-LABEL: define <4 x i16> @test_vrshr_n_s16(<4 x i16> %a) #0 { 16123// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 16124// CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 16125// CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>) 16126// CHECK: ret <4 x i16> [[VRSHR_N1]] 16127int16x4_t test_vrshr_n_s16(int16x4_t a) { 16128 return vrshr_n_s16(a, 1); 16129} 16130 16131// CHECK-LABEL: define <2 x i32> @test_vrshr_n_s32(<2 x i32> %a) #0 { 16132// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 16133// CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 16134// CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -1, i32 -1>) 16135// CHECK: ret <2 x i32> [[VRSHR_N1]] 16136int32x2_t test_vrshr_n_s32(int32x2_t a) { 16137 return vrshr_n_s32(a, 1); 16138} 16139 16140// CHECK-LABEL: define <1 x i64> @test_vrshr_n_s64(<1 x i64> %a) #0 { 16141// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 16142// CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 16143// CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>) 16144// CHECK: ret <1 x i64> [[VRSHR_N1]] 16145int64x1_t test_vrshr_n_s64(int64x1_t a) { 16146 return vrshr_n_s64(a, 1); 16147} 16148 16149// CHECK-LABEL: define <8 x i8> @test_vrshr_n_u8(<8 x i8> %a) #0 { 16150// CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %a, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 16151// CHECK: ret <8 x i8> [[VRSHR_N]] 16152uint8x8_t test_vrshr_n_u8(uint8x8_t a) { 16153 return vrshr_n_u8(a, 1); 16154} 16155 16156// CHECK-LABEL: define <4 x i16> @test_vrshr_n_u16(<4 x i16> %a) #0 { 16157// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 16158// CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 16159// CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>) 16160// CHECK: ret <4 x i16> [[VRSHR_N1]] 16161uint16x4_t test_vrshr_n_u16(uint16x4_t a) { 16162 return vrshr_n_u16(a, 1); 16163} 16164 16165// CHECK-LABEL: define <2 x i32> @test_vrshr_n_u32(<2 x i32> %a) #0 { 16166// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 16167// CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 16168// CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -1, i32 -1>) 16169// CHECK: ret <2 x i32> [[VRSHR_N1]] 16170uint32x2_t test_vrshr_n_u32(uint32x2_t a) { 16171 return vrshr_n_u32(a, 1); 16172} 16173 16174// CHECK-LABEL: define <1 x i64> @test_vrshr_n_u64(<1 x i64> %a) #0 { 16175// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 16176// CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 16177// CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>) 16178// CHECK: ret <1 x i64> [[VRSHR_N1]] 16179uint64x1_t test_vrshr_n_u64(uint64x1_t a) { 16180 return vrshr_n_u64(a, 1); 16181} 16182 16183// CHECK-LABEL: define <16 x i8> @test_vrshrq_n_s8(<16 x i8> %a) #0 { 16184// CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %a, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 16185// CHECK: ret <16 x i8> [[VRSHR_N]] 16186int8x16_t test_vrshrq_n_s8(int8x16_t a) { 16187 return vrshrq_n_s8(a, 1); 16188} 16189 16190// CHECK-LABEL: define <8 x i16> @test_vrshrq_n_s16(<8 x i16> %a) #0 { 16191// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 16192// CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 16193// CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>) 16194// CHECK: ret <8 x i16> [[VRSHR_N1]] 16195int16x8_t test_vrshrq_n_s16(int16x8_t a) { 16196 return vrshrq_n_s16(a, 1); 16197} 16198 16199// CHECK-LABEL: define <4 x i32> @test_vrshrq_n_s32(<4 x i32> %a) #0 { 16200// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 16201// CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 16202// CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>) 16203// CHECK: ret <4 x i32> [[VRSHR_N1]] 16204int32x4_t test_vrshrq_n_s32(int32x4_t a) { 16205 return vrshrq_n_s32(a, 1); 16206} 16207 16208// CHECK-LABEL: define <2 x i64> @test_vrshrq_n_s64(<2 x i64> %a) #0 { 16209// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 16210// CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 16211// CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -1, i64 -1>) 16212// CHECK: ret <2 x i64> [[VRSHR_N1]] 16213int64x2_t test_vrshrq_n_s64(int64x2_t a) { 16214 return vrshrq_n_s64(a, 1); 16215} 16216 16217// CHECK-LABEL: define <16 x i8> @test_vrshrq_n_u8(<16 x i8> %a) #0 { 16218// CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %a, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 16219// CHECK: ret <16 x i8> [[VRSHR_N]] 16220uint8x16_t test_vrshrq_n_u8(uint8x16_t a) { 16221 return vrshrq_n_u8(a, 1); 16222} 16223 16224// CHECK-LABEL: define <8 x i16> @test_vrshrq_n_u16(<8 x i16> %a) #0 { 16225// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 16226// CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 16227// CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>) 16228// CHECK: ret <8 x i16> [[VRSHR_N1]] 16229uint16x8_t test_vrshrq_n_u16(uint16x8_t a) { 16230 return vrshrq_n_u16(a, 1); 16231} 16232 16233// CHECK-LABEL: define <4 x i32> @test_vrshrq_n_u32(<4 x i32> %a) #0 { 16234// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 16235// CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 16236// CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>) 16237// CHECK: ret <4 x i32> [[VRSHR_N1]] 16238uint32x4_t test_vrshrq_n_u32(uint32x4_t a) { 16239 return vrshrq_n_u32(a, 1); 16240} 16241 16242// CHECK-LABEL: define <2 x i64> @test_vrshrq_n_u64(<2 x i64> %a) #0 { 16243// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 16244// CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 16245// CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -1, i64 -1>) 16246// CHECK: ret <2 x i64> [[VRSHR_N1]] 16247uint64x2_t test_vrshrq_n_u64(uint64x2_t a) { 16248 return vrshrq_n_u64(a, 1); 16249} 16250 16251 16252// CHECK-LABEL: define <2 x float> @test_vrsqrte_f32(<2 x float> %a) #0 { 16253// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 16254// CHECK: [[VRSQRTE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 16255// CHECK: [[VRSQRTE_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> [[VRSQRTE_V_I]]) #4 16256// CHECK: ret <2 x float> [[VRSQRTE_V1_I]] 16257float32x2_t test_vrsqrte_f32(float32x2_t a) { 16258 return vrsqrte_f32(a); 16259} 16260 16261// CHECK-LABEL: define <2 x i32> @test_vrsqrte_u32(<2 x i32> %a) #0 { 16262// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 16263// CHECK: [[VRSQRTE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 16264// CHECK: [[VRSQRTE_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32> [[VRSQRTE_V_I]]) #4 16265// CHECK: ret <2 x i32> [[VRSQRTE_V1_I]] 16266uint32x2_t test_vrsqrte_u32(uint32x2_t a) { 16267 return vrsqrte_u32(a); 16268} 16269 16270// CHECK-LABEL: define <4 x float> @test_vrsqrteq_f32(<4 x float> %a) #0 { 16271// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 16272// CHECK: [[VRSQRTEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 16273// CHECK: [[VRSQRTEQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> [[VRSQRTEQ_V_I]]) #4 16274// CHECK: ret <4 x float> [[VRSQRTEQ_V1_I]] 16275float32x4_t test_vrsqrteq_f32(float32x4_t a) { 16276 return vrsqrteq_f32(a); 16277} 16278 16279// CHECK-LABEL: define <4 x i32> @test_vrsqrteq_u32(<4 x i32> %a) #0 { 16280// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 16281// CHECK: [[VRSQRTEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 16282// CHECK: [[VRSQRTEQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32> [[VRSQRTEQ_V_I]]) #4 16283// CHECK: ret <4 x i32> [[VRSQRTEQ_V1_I]] 16284uint32x4_t test_vrsqrteq_u32(uint32x4_t a) { 16285 return vrsqrteq_u32(a); 16286} 16287 16288 16289// CHECK-LABEL: define <2 x float> @test_vrsqrts_f32(<2 x float> %a, <2 x float> %b) #0 { 16290// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 16291// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 16292// CHECK: [[VRSQRTS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 16293// CHECK: [[VRSQRTS_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 16294// CHECK: [[VRSQRTS_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float> [[VRSQRTS_V_I]], <2 x float> [[VRSQRTS_V1_I]]) #4 16295// CHECK: [[VRSQRTS_V3_I:%.*]] = bitcast <2 x float> [[VRSQRTS_V2_I]] to <8 x i8> 16296// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSQRTS_V3_I]] to <2 x float> 16297// CHECK: ret <2 x float> [[TMP2]] 16298float32x2_t test_vrsqrts_f32(float32x2_t a, float32x2_t b) { 16299 return vrsqrts_f32(a, b); 16300} 16301 16302// CHECK-LABEL: define <4 x float> @test_vrsqrtsq_f32(<4 x float> %a, <4 x float> %b) #0 { 16303// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 16304// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 16305// CHECK: [[VRSQRTSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 16306// CHECK: [[VRSQRTSQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 16307// CHECK: [[VRSQRTSQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float> [[VRSQRTSQ_V_I]], <4 x float> [[VRSQRTSQ_V1_I]]) #4 16308// CHECK: [[VRSQRTSQ_V3_I:%.*]] = bitcast <4 x float> [[VRSQRTSQ_V2_I]] to <16 x i8> 16309// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSQRTSQ_V3_I]] to <4 x float> 16310// CHECK: ret <4 x float> [[TMP2]] 16311float32x4_t test_vrsqrtsq_f32(float32x4_t a, float32x4_t b) { 16312 return vrsqrtsq_f32(a, b); 16313} 16314 16315 16316// CHECK-LABEL: define <8 x i8> @test_vrsra_n_s8(<8 x i8> %a, <8 x i8> %b) #0 { 16317// CHECK: [[TMP0:%.*]] = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %b, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 16318// CHECK: [[VRSRA_N:%.*]] = add <8 x i8> %a, [[TMP0]] 16319// CHECK: ret <8 x i8> [[VRSRA_N]] 16320int8x8_t test_vrsra_n_s8(int8x8_t a, int8x8_t b) { 16321 return vrsra_n_s8(a, b, 1); 16322} 16323 16324// CHECK-LABEL: define <4 x i16> @test_vrsra_n_s16(<4 x i16> %a, <4 x i16> %b) #0 { 16325// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 16326// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 16327// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 16328// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 16329// CHECK: [[TMP4:%.*]] = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> [[TMP3]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>) 16330// CHECK: [[VRSRA_N:%.*]] = add <4 x i16> [[TMP2]], [[TMP4]] 16331// CHECK: ret <4 x i16> [[VRSRA_N]] 16332int16x4_t test_vrsra_n_s16(int16x4_t a, int16x4_t b) { 16333 return vrsra_n_s16(a, b, 1); 16334} 16335 16336// CHECK-LABEL: define <2 x i32> @test_vrsra_n_s32(<2 x i32> %a, <2 x i32> %b) #0 { 16337// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 16338// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 16339// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 16340// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 16341// CHECK: [[TMP4:%.*]] = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> [[TMP3]], <2 x i32> <i32 -1, i32 -1>) 16342// CHECK: [[VRSRA_N:%.*]] = add <2 x i32> [[TMP2]], [[TMP4]] 16343// CHECK: ret <2 x i32> [[VRSRA_N]] 16344int32x2_t test_vrsra_n_s32(int32x2_t a, int32x2_t b) { 16345 return vrsra_n_s32(a, b, 1); 16346} 16347 16348// CHECK-LABEL: define <1 x i64> @test_vrsra_n_s64(<1 x i64> %a, <1 x i64> %b) #0 { 16349// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 16350// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 16351// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 16352// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 16353// CHECK: [[TMP4:%.*]] = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> [[TMP3]], <1 x i64> <i64 -1>) 16354// CHECK: [[VRSRA_N:%.*]] = add <1 x i64> [[TMP2]], [[TMP4]] 16355// CHECK: ret <1 x i64> [[VRSRA_N]] 16356int64x1_t test_vrsra_n_s64(int64x1_t a, int64x1_t b) { 16357 return vrsra_n_s64(a, b, 1); 16358} 16359 16360// CHECK-LABEL: define <8 x i8> @test_vrsra_n_u8(<8 x i8> %a, <8 x i8> %b) #0 { 16361// CHECK: [[TMP0:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %b, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 16362// CHECK: [[VRSRA_N:%.*]] = add <8 x i8> %a, [[TMP0]] 16363// CHECK: ret <8 x i8> [[VRSRA_N]] 16364uint8x8_t test_vrsra_n_u8(uint8x8_t a, uint8x8_t b) { 16365 return vrsra_n_u8(a, b, 1); 16366} 16367 16368// CHECK-LABEL: define <4 x i16> @test_vrsra_n_u16(<4 x i16> %a, <4 x i16> %b) #0 { 16369// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 16370// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 16371// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 16372// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 16373// CHECK: [[TMP4:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> [[TMP3]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>) 16374// CHECK: [[VRSRA_N:%.*]] = add <4 x i16> [[TMP2]], [[TMP4]] 16375// CHECK: ret <4 x i16> [[VRSRA_N]] 16376uint16x4_t test_vrsra_n_u16(uint16x4_t a, uint16x4_t b) { 16377 return vrsra_n_u16(a, b, 1); 16378} 16379 16380// CHECK-LABEL: define <2 x i32> @test_vrsra_n_u32(<2 x i32> %a, <2 x i32> %b) #0 { 16381// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 16382// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 16383// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 16384// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 16385// CHECK: [[TMP4:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> [[TMP3]], <2 x i32> <i32 -1, i32 -1>) 16386// CHECK: [[VRSRA_N:%.*]] = add <2 x i32> [[TMP2]], [[TMP4]] 16387// CHECK: ret <2 x i32> [[VRSRA_N]] 16388uint32x2_t test_vrsra_n_u32(uint32x2_t a, uint32x2_t b) { 16389 return vrsra_n_u32(a, b, 1); 16390} 16391 16392// CHECK-LABEL: define <1 x i64> @test_vrsra_n_u64(<1 x i64> %a, <1 x i64> %b) #0 { 16393// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 16394// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 16395// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 16396// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 16397// CHECK: [[TMP4:%.*]] = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> [[TMP3]], <1 x i64> <i64 -1>) 16398// CHECK: [[VRSRA_N:%.*]] = add <1 x i64> [[TMP2]], [[TMP4]] 16399// CHECK: ret <1 x i64> [[VRSRA_N]] 16400uint64x1_t test_vrsra_n_u64(uint64x1_t a, uint64x1_t b) { 16401 return vrsra_n_u64(a, b, 1); 16402} 16403 16404// CHECK-LABEL: define <16 x i8> @test_vrsraq_n_s8(<16 x i8> %a, <16 x i8> %b) #0 { 16405// CHECK: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %b, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 16406// CHECK: [[VRSRA_N:%.*]] = add <16 x i8> %a, [[TMP0]] 16407// CHECK: ret <16 x i8> [[VRSRA_N]] 16408int8x16_t test_vrsraq_n_s8(int8x16_t a, int8x16_t b) { 16409 return vrsraq_n_s8(a, b, 1); 16410} 16411 16412// CHECK-LABEL: define <8 x i16> @test_vrsraq_n_s16(<8 x i16> %a, <8 x i16> %b) #0 { 16413// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 16414// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 16415// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 16416// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 16417// CHECK: [[TMP4:%.*]] = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> [[TMP3]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>) 16418// CHECK: [[VRSRA_N:%.*]] = add <8 x i16> [[TMP2]], [[TMP4]] 16419// CHECK: ret <8 x i16> [[VRSRA_N]] 16420int16x8_t test_vrsraq_n_s16(int16x8_t a, int16x8_t b) { 16421 return vrsraq_n_s16(a, b, 1); 16422} 16423 16424// CHECK-LABEL: define <4 x i32> @test_vrsraq_n_s32(<4 x i32> %a, <4 x i32> %b) #0 { 16425// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 16426// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 16427// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 16428// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 16429// CHECK: [[TMP4:%.*]] = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> [[TMP3]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>) 16430// CHECK: [[VRSRA_N:%.*]] = add <4 x i32> [[TMP2]], [[TMP4]] 16431// CHECK: ret <4 x i32> [[VRSRA_N]] 16432int32x4_t test_vrsraq_n_s32(int32x4_t a, int32x4_t b) { 16433 return vrsraq_n_s32(a, b, 1); 16434} 16435 16436// CHECK-LABEL: define <2 x i64> @test_vrsraq_n_s64(<2 x i64> %a, <2 x i64> %b) #0 { 16437// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 16438// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 16439// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 16440// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 16441// CHECK: [[TMP4:%.*]] = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> [[TMP3]], <2 x i64> <i64 -1, i64 -1>) 16442// CHECK: [[VRSRA_N:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]] 16443// CHECK: ret <2 x i64> [[VRSRA_N]] 16444int64x2_t test_vrsraq_n_s64(int64x2_t a, int64x2_t b) { 16445 return vrsraq_n_s64(a, b, 1); 16446} 16447 16448// CHECK-LABEL: define <16 x i8> @test_vrsraq_n_u8(<16 x i8> %a, <16 x i8> %b) #0 { 16449// CHECK: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %b, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 16450// CHECK: [[VRSRA_N:%.*]] = add <16 x i8> %a, [[TMP0]] 16451// CHECK: ret <16 x i8> [[VRSRA_N]] 16452uint8x16_t test_vrsraq_n_u8(uint8x16_t a, uint8x16_t b) { 16453 return vrsraq_n_u8(a, b, 1); 16454} 16455 16456// CHECK-LABEL: define <8 x i16> @test_vrsraq_n_u16(<8 x i16> %a, <8 x i16> %b) #0 { 16457// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 16458// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 16459// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 16460// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 16461// CHECK: [[TMP4:%.*]] = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> [[TMP3]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>) 16462// CHECK: [[VRSRA_N:%.*]] = add <8 x i16> [[TMP2]], [[TMP4]] 16463// CHECK: ret <8 x i16> [[VRSRA_N]] 16464uint16x8_t test_vrsraq_n_u16(uint16x8_t a, uint16x8_t b) { 16465 return vrsraq_n_u16(a, b, 1); 16466} 16467 16468// CHECK-LABEL: define <4 x i32> @test_vrsraq_n_u32(<4 x i32> %a, <4 x i32> %b) #0 { 16469// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 16470// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 16471// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 16472// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 16473// CHECK: [[TMP4:%.*]] = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> [[TMP3]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>) 16474// CHECK: [[VRSRA_N:%.*]] = add <4 x i32> [[TMP2]], [[TMP4]] 16475// CHECK: ret <4 x i32> [[VRSRA_N]] 16476uint32x4_t test_vrsraq_n_u32(uint32x4_t a, uint32x4_t b) { 16477 return vrsraq_n_u32(a, b, 1); 16478} 16479 16480// CHECK-LABEL: define <2 x i64> @test_vrsraq_n_u64(<2 x i64> %a, <2 x i64> %b) #0 { 16481// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 16482// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 16483// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 16484// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 16485// CHECK: [[TMP4:%.*]] = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> [[TMP3]], <2 x i64> <i64 -1, i64 -1>) 16486// CHECK: [[VRSRA_N:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]] 16487// CHECK: ret <2 x i64> [[VRSRA_N]] 16488uint64x2_t test_vrsraq_n_u64(uint64x2_t a, uint64x2_t b) { 16489 return vrsraq_n_u64(a, b, 1); 16490} 16491 16492 16493// CHECK-LABEL: define <8 x i8> @test_vrsubhn_s16(<8 x i16> %a, <8 x i16> %b) #0 { 16494// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 16495// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 16496// CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 16497// CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 16498// CHECK: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> [[VRSUBHN_V_I]], <8 x i16> [[VRSUBHN_V1_I]]) #4 16499// CHECK: ret <8 x i8> [[VRSUBHN_V2_I]] 16500int8x8_t test_vrsubhn_s16(int16x8_t a, int16x8_t b) { 16501 return vrsubhn_s16(a, b); 16502} 16503 16504// CHECK-LABEL: define <4 x i16> @test_vrsubhn_s32(<4 x i32> %a, <4 x i32> %b) #0 { 16505// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 16506// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 16507// CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 16508// CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 16509// CHECK: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> [[VRSUBHN_V_I]], <4 x i32> [[VRSUBHN_V1_I]]) #4 16510// CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8> 16511// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <4 x i16> 16512// CHECK: ret <4 x i16> [[TMP2]] 16513int16x4_t test_vrsubhn_s32(int32x4_t a, int32x4_t b) { 16514 return vrsubhn_s32(a, b); 16515} 16516 16517// CHECK-LABEL: define <2 x i32> @test_vrsubhn_s64(<2 x i64> %a, <2 x i64> %b) #0 { 16518// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 16519// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 16520// CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 16521// CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 16522// CHECK: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> [[VRSUBHN_V_I]], <2 x i64> [[VRSUBHN_V1_I]]) #4 16523// CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8> 16524// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <2 x i32> 16525// CHECK: ret <2 x i32> [[TMP2]] 16526int32x2_t test_vrsubhn_s64(int64x2_t a, int64x2_t b) { 16527 return vrsubhn_s64(a, b); 16528} 16529 16530// CHECK-LABEL: define <8 x i8> @test_vrsubhn_u16(<8 x i16> %a, <8 x i16> %b) #0 { 16531// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 16532// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 16533// CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 16534// CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 16535// CHECK: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> [[VRSUBHN_V_I]], <8 x i16> [[VRSUBHN_V1_I]]) #4 16536// CHECK: ret <8 x i8> [[VRSUBHN_V2_I]] 16537uint8x8_t test_vrsubhn_u16(uint16x8_t a, uint16x8_t b) { 16538 return vrsubhn_u16(a, b); 16539} 16540 16541// CHECK-LABEL: define <4 x i16> @test_vrsubhn_u32(<4 x i32> %a, <4 x i32> %b) #0 { 16542// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 16543// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 16544// CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 16545// CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 16546// CHECK: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> [[VRSUBHN_V_I]], <4 x i32> [[VRSUBHN_V1_I]]) #4 16547// CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8> 16548// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <4 x i16> 16549// CHECK: ret <4 x i16> [[TMP2]] 16550uint16x4_t test_vrsubhn_u32(uint32x4_t a, uint32x4_t b) { 16551 return vrsubhn_u32(a, b); 16552} 16553 16554// CHECK-LABEL: define <2 x i32> @test_vrsubhn_u64(<2 x i64> %a, <2 x i64> %b) #0 { 16555// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 16556// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 16557// CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 16558// CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 16559// CHECK: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> [[VRSUBHN_V_I]], <2 x i64> [[VRSUBHN_V1_I]]) #4 16560// CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8> 16561// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <2 x i32> 16562// CHECK: ret <2 x i32> [[TMP2]] 16563uint32x2_t test_vrsubhn_u64(uint64x2_t a, uint64x2_t b) { 16564 return vrsubhn_u64(a, b); 16565} 16566 16567 16568// CHECK-LABEL: define <8 x i8> @test_vset_lane_u8(i8 zeroext %a, <8 x i8> %b) #0 { 16569// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7 16570// CHECK: ret <8 x i8> [[VSET_LANE]] 16571uint8x8_t test_vset_lane_u8(uint8_t a, uint8x8_t b) { 16572 return vset_lane_u8(a, b, 7); 16573} 16574 16575// CHECK-LABEL: define <4 x i16> @test_vset_lane_u16(i16 zeroext %a, <4 x i16> %b) #0 { 16576// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 16577// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 16578// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 %a, i32 3 16579// CHECK: ret <4 x i16> [[VSET_LANE]] 16580uint16x4_t test_vset_lane_u16(uint16_t a, uint16x4_t b) { 16581 return vset_lane_u16(a, b, 3); 16582} 16583 16584// CHECK-LABEL: define <2 x i32> @test_vset_lane_u32(i32 %a, <2 x i32> %b) #0 { 16585// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 16586// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 16587// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i32> [[TMP1]], i32 %a, i32 1 16588// CHECK: ret <2 x i32> [[VSET_LANE]] 16589uint32x2_t test_vset_lane_u32(uint32_t a, uint32x2_t b) { 16590 return vset_lane_u32(a, b, 1); 16591} 16592 16593// CHECK-LABEL: define <8 x i8> @test_vset_lane_s8(i8 signext %a, <8 x i8> %b) #0 { 16594// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7 16595// CHECK: ret <8 x i8> [[VSET_LANE]] 16596int8x8_t test_vset_lane_s8(int8_t a, int8x8_t b) { 16597 return vset_lane_s8(a, b, 7); 16598} 16599 16600// CHECK-LABEL: define <4 x i16> @test_vset_lane_s16(i16 signext %a, <4 x i16> %b) #0 { 16601// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 16602// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 16603// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 %a, i32 3 16604// CHECK: ret <4 x i16> [[VSET_LANE]] 16605int16x4_t test_vset_lane_s16(int16_t a, int16x4_t b) { 16606 return vset_lane_s16(a, b, 3); 16607} 16608 16609// CHECK-LABEL: define <2 x i32> @test_vset_lane_s32(i32 %a, <2 x i32> %b) #0 { 16610// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 16611// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 16612// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i32> [[TMP1]], i32 %a, i32 1 16613// CHECK: ret <2 x i32> [[VSET_LANE]] 16614int32x2_t test_vset_lane_s32(int32_t a, int32x2_t b) { 16615 return vset_lane_s32(a, b, 1); 16616} 16617 16618// CHECK-LABEL: define <8 x i8> @test_vset_lane_p8(i8 signext %a, <8 x i8> %b) #0 { 16619// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7 16620// CHECK: ret <8 x i8> [[VSET_LANE]] 16621poly8x8_t test_vset_lane_p8(poly8_t a, poly8x8_t b) { 16622 return vset_lane_p8(a, b, 7); 16623} 16624 16625// CHECK-LABEL: define <4 x i16> @test_vset_lane_p16(i16 signext %a, <4 x i16> %b) #0 { 16626// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 16627// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 16628// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 %a, i32 3 16629// CHECK: ret <4 x i16> [[VSET_LANE]] 16630poly16x4_t test_vset_lane_p16(poly16_t a, poly16x4_t b) { 16631 return vset_lane_p16(a, b, 3); 16632} 16633 16634// CHECK-LABEL: define <2 x float> @test_vset_lane_f32(float %a, <2 x float> %b) #0 { 16635// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %b to <8 x i8> 16636// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 16637// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x float> [[TMP1]], float %a, i32 1 16638// CHECK: ret <2 x float> [[VSET_LANE]] 16639float32x2_t test_vset_lane_f32(float32_t a, float32x2_t b) { 16640 return vset_lane_f32(a, b, 1); 16641} 16642 16643// CHECK-LABEL: define <4 x half> @test_vset_lane_f16(half* %a, <4 x half> %b) #0 { 16644// CHECK: [[__REINT_246:%.*]] = alloca half, align 2 16645// CHECK: [[__REINT1_246:%.*]] = alloca <4 x half>, align 8 16646// CHECK: [[__REINT2_246:%.*]] = alloca <4 x i16>, align 8 16647// CHECK: [[TMP0:%.*]] = load half, half* %a, align 2 16648// CHECK: store half [[TMP0]], half* [[__REINT_246]], align 2 16649// CHECK: store <4 x half> %b, <4 x half>* [[__REINT1_246]], align 8 16650// CHECK: [[TMP1:%.*]] = bitcast half* [[__REINT_246]] to i16* 16651// CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 16652// CHECK: [[TMP3:%.*]] = bitcast <4 x half>* [[__REINT1_246]] to <4 x i16>* 16653// CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[TMP3]], align 8 16654// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 16655// CHECK: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 16656// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP6]], i16 [[TMP2]], i32 1 16657// CHECK: store <4 x i16> [[VSET_LANE]], <4 x i16>* [[__REINT2_246]], align 8 16658// CHECK: [[TMP7:%.*]] = bitcast <4 x i16>* [[__REINT2_246]] to <4 x half>* 16659// CHECK: [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[TMP7]], align 8 16660// CHECK: ret <4 x half> [[TMP8]] 16661float16x4_t test_vset_lane_f16(float16_t *a, float16x4_t b) { 16662 return vset_lane_f16(*a, b, 1); 16663} 16664 16665// CHECK-LABEL: define <16 x i8> @test_vsetq_lane_u8(i8 zeroext %a, <16 x i8> %b) #0 { 16666// CHECK: [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15 16667// CHECK: ret <16 x i8> [[VSET_LANE]] 16668uint8x16_t test_vsetq_lane_u8(uint8_t a, uint8x16_t b) { 16669 return vsetq_lane_u8(a, b, 15); 16670} 16671 16672// CHECK-LABEL: define <8 x i16> @test_vsetq_lane_u16(i16 zeroext %a, <8 x i16> %b) #0 { 16673// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> 16674// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 16675// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7 16676// CHECK: ret <8 x i16> [[VSET_LANE]] 16677uint16x8_t test_vsetq_lane_u16(uint16_t a, uint16x8_t b) { 16678 return vsetq_lane_u16(a, b, 7); 16679} 16680 16681// CHECK-LABEL: define <4 x i32> @test_vsetq_lane_u32(i32 %a, <4 x i32> %b) #0 { 16682// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> 16683// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 16684// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP1]], i32 %a, i32 3 16685// CHECK: ret <4 x i32> [[VSET_LANE]] 16686uint32x4_t test_vsetq_lane_u32(uint32_t a, uint32x4_t b) { 16687 return vsetq_lane_u32(a, b, 3); 16688} 16689 16690// CHECK-LABEL: define <16 x i8> @test_vsetq_lane_s8(i8 signext %a, <16 x i8> %b) #0 { 16691// CHECK: [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15 16692// CHECK: ret <16 x i8> [[VSET_LANE]] 16693int8x16_t test_vsetq_lane_s8(int8_t a, int8x16_t b) { 16694 return vsetq_lane_s8(a, b, 15); 16695} 16696 16697// CHECK-LABEL: define <8 x i16> @test_vsetq_lane_s16(i16 signext %a, <8 x i16> %b) #0 { 16698// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> 16699// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 16700// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7 16701// CHECK: ret <8 x i16> [[VSET_LANE]] 16702int16x8_t test_vsetq_lane_s16(int16_t a, int16x8_t b) { 16703 return vsetq_lane_s16(a, b, 7); 16704} 16705 16706// CHECK-LABEL: define <4 x i32> @test_vsetq_lane_s32(i32 %a, <4 x i32> %b) #0 { 16707// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> 16708// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 16709// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP1]], i32 %a, i32 3 16710// CHECK: ret <4 x i32> [[VSET_LANE]] 16711int32x4_t test_vsetq_lane_s32(int32_t a, int32x4_t b) { 16712 return vsetq_lane_s32(a, b, 3); 16713} 16714 16715// CHECK-LABEL: define <16 x i8> @test_vsetq_lane_p8(i8 signext %a, <16 x i8> %b) #0 { 16716// CHECK: [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15 16717// CHECK: ret <16 x i8> [[VSET_LANE]] 16718poly8x16_t test_vsetq_lane_p8(poly8_t a, poly8x16_t b) { 16719 return vsetq_lane_p8(a, b, 15); 16720} 16721 16722// CHECK-LABEL: define <8 x i16> @test_vsetq_lane_p16(i16 signext %a, <8 x i16> %b) #0 { 16723// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> 16724// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 16725// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7 16726// CHECK: ret <8 x i16> [[VSET_LANE]] 16727poly16x8_t test_vsetq_lane_p16(poly16_t a, poly16x8_t b) { 16728 return vsetq_lane_p16(a, b, 7); 16729} 16730 16731// CHECK-LABEL: define <4 x float> @test_vsetq_lane_f32(float %a, <4 x float> %b) #0 { 16732// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %b to <16 x i8> 16733// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 16734// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x float> [[TMP1]], float %a, i32 3 16735// CHECK: ret <4 x float> [[VSET_LANE]] 16736float32x4_t test_vsetq_lane_f32(float32_t a, float32x4_t b) { 16737 return vsetq_lane_f32(a, b, 3); 16738} 16739 16740// CHECK-LABEL: define <8 x half> @test_vsetq_lane_f16(half* %a, <8 x half> %b) #0 { 16741// CHECK: [[__REINT_248:%.*]] = alloca half, align 2 16742// CHECK: [[__REINT1_248:%.*]] = alloca <8 x half>, align 16 16743// CHECK: [[__REINT2_248:%.*]] = alloca <8 x i16>, align 16 16744// CHECK: [[TMP0:%.*]] = load half, half* %a, align 2 16745// CHECK: store half [[TMP0]], half* [[__REINT_248]], align 2 16746// CHECK: store <8 x half> %b, <8 x half>* [[__REINT1_248]], align 16 16747// CHECK: [[TMP1:%.*]] = bitcast half* [[__REINT_248]] to i16* 16748// CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 16749// CHECK: [[TMP3:%.*]] = bitcast <8 x half>* [[__REINT1_248]] to <8 x i16>* 16750// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[TMP3]], align 16 16751// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 16752// CHECK: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 16753// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP6]], i16 [[TMP2]], i32 3 16754// CHECK: store <8 x i16> [[VSET_LANE]], <8 x i16>* [[__REINT2_248]], align 16 16755// CHECK: [[TMP7:%.*]] = bitcast <8 x i16>* [[__REINT2_248]] to <8 x half>* 16756// CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[TMP7]], align 16 16757// CHECK: ret <8 x half> [[TMP8]] 16758float16x8_t test_vsetq_lane_f16(float16_t *a, float16x8_t b) { 16759 return vsetq_lane_f16(*a, b, 3); 16760} 16761 16762// The optimizer is able to get rid of all moves now. 16763// CHECK-LABEL: define <1 x i64> @test_vset_lane_s64(i64 %a, <1 x i64> %b) #0 { 16764// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8> 16765// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 16766// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 %a, i32 0 16767// CHECK: ret <1 x i64> [[VSET_LANE]] 16768int64x1_t test_vset_lane_s64(int64_t a, int64x1_t b) { 16769 return vset_lane_s64(a, b, 0); 16770} 16771 16772// The optimizer is able to get rid of all moves now. 16773// CHECK-LABEL: define <1 x i64> @test_vset_lane_u64(i64 %a, <1 x i64> %b) #0 { 16774// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8> 16775// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 16776// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 %a, i32 0 16777// CHECK: ret <1 x i64> [[VSET_LANE]] 16778uint64x1_t test_vset_lane_u64(uint64_t a, uint64x1_t b) { 16779 return vset_lane_u64(a, b, 0); 16780} 16781 16782// CHECK-LABEL: define <2 x i64> @test_vsetq_lane_s64(i64 %a, <2 x i64> %b) #0 { 16783// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> 16784// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 16785// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a, i32 1 16786// CHECK: ret <2 x i64> [[VSET_LANE]] 16787int64x2_t test_vsetq_lane_s64(int64_t a, int64x2_t b) { 16788 return vsetq_lane_s64(a, b, 1); 16789} 16790 16791// CHECK-LABEL: define <2 x i64> @test_vsetq_lane_u64(i64 %a, <2 x i64> %b) #0 { 16792// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> 16793// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 16794// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a, i32 1 16795// CHECK: ret <2 x i64> [[VSET_LANE]] 16796uint64x2_t test_vsetq_lane_u64(uint64_t a, uint64x2_t b) { 16797 return vsetq_lane_u64(a, b, 1); 16798} 16799 16800 16801// CHECK-LABEL: define <8 x i8> @test_vshl_s8(<8 x i8> %a, <8 x i8> %b) #0 { 16802// CHECK: [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %a, <8 x i8> %b) #4 16803// CHECK: ret <8 x i8> [[VSHL_V_I]] 16804int8x8_t test_vshl_s8(int8x8_t a, int8x8_t b) { 16805 return vshl_s8(a, b); 16806} 16807 16808// CHECK-LABEL: define <4 x i16> @test_vshl_s16(<4 x i16> %a, <4 x i16> %b) #0 { 16809// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 16810// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 16811// CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 16812// CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 16813// CHECK: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16> [[VSHL_V_I]], <4 x i16> [[VSHL_V1_I]]) #4 16814// CHECK: [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8> 16815// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <4 x i16> 16816// CHECK: ret <4 x i16> [[TMP2]] 16817int16x4_t test_vshl_s16(int16x4_t a, int16x4_t b) { 16818 return vshl_s16(a, b); 16819} 16820 16821// CHECK-LABEL: define <2 x i32> @test_vshl_s32(<2 x i32> %a, <2 x i32> %b) #0 { 16822// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 16823// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 16824// CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 16825// CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 16826// CHECK: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> [[VSHL_V_I]], <2 x i32> [[VSHL_V1_I]]) #4 16827// CHECK: [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8> 16828// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <2 x i32> 16829// CHECK: ret <2 x i32> [[TMP2]] 16830int32x2_t test_vshl_s32(int32x2_t a, int32x2_t b) { 16831 return vshl_s32(a, b); 16832} 16833 16834// CHECK-LABEL: define <1 x i64> @test_vshl_s64(<1 x i64> %a, <1 x i64> %b) #0 { 16835// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 16836// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 16837// CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 16838// CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 16839// CHECK: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> [[VSHL_V_I]], <1 x i64> [[VSHL_V1_I]]) #4 16840// CHECK: [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8> 16841// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <1 x i64> 16842// CHECK: ret <1 x i64> [[TMP2]] 16843int64x1_t test_vshl_s64(int64x1_t a, int64x1_t b) { 16844 return vshl_s64(a, b); 16845} 16846 16847// CHECK-LABEL: define <8 x i8> @test_vshl_u8(<8 x i8> %a, <8 x i8> %b) #0 { 16848// CHECK: [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 16849// CHECK: ret <8 x i8> [[VSHL_V_I]] 16850uint8x8_t test_vshl_u8(uint8x8_t a, int8x8_t b) { 16851 return vshl_u8(a, b); 16852} 16853 16854// CHECK-LABEL: define <4 x i16> @test_vshl_u16(<4 x i16> %a, <4 x i16> %b) #0 { 16855// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 16856// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 16857// CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 16858// CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 16859// CHECK: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> [[VSHL_V_I]], <4 x i16> [[VSHL_V1_I]]) #4 16860// CHECK: [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8> 16861// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <4 x i16> 16862// CHECK: ret <4 x i16> [[TMP2]] 16863uint16x4_t test_vshl_u16(uint16x4_t a, int16x4_t b) { 16864 return vshl_u16(a, b); 16865} 16866 16867// CHECK-LABEL: define <2 x i32> @test_vshl_u32(<2 x i32> %a, <2 x i32> %b) #0 { 16868// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 16869// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 16870// CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 16871// CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 16872// CHECK: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32> [[VSHL_V_I]], <2 x i32> [[VSHL_V1_I]]) #4 16873// CHECK: [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8> 16874// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <2 x i32> 16875// CHECK: ret <2 x i32> [[TMP2]] 16876uint32x2_t test_vshl_u32(uint32x2_t a, int32x2_t b) { 16877 return vshl_u32(a, b); 16878} 16879 16880// CHECK-LABEL: define <1 x i64> @test_vshl_u64(<1 x i64> %a, <1 x i64> %b) #0 { 16881// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 16882// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 16883// CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 16884// CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 16885// CHECK: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> [[VSHL_V_I]], <1 x i64> [[VSHL_V1_I]]) #4 16886// CHECK: [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8> 16887// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <1 x i64> 16888// CHECK: ret <1 x i64> [[TMP2]] 16889uint64x1_t test_vshl_u64(uint64x1_t a, int64x1_t b) { 16890 return vshl_u64(a, b); 16891} 16892 16893// CHECK-LABEL: define <16 x i8> @test_vshlq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 16894// CHECK: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %a, <16 x i8> %b) #4 16895// CHECK: ret <16 x i8> [[VSHLQ_V_I]] 16896int8x16_t test_vshlq_s8(int8x16_t a, int8x16_t b) { 16897 return vshlq_s8(a, b); 16898} 16899 16900// CHECK-LABEL: define <8 x i16> @test_vshlq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 16901// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 16902// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 16903// CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 16904// CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 16905// CHECK: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16> [[VSHLQ_V_I]], <8 x i16> [[VSHLQ_V1_I]]) #4 16906// CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8> 16907// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <8 x i16> 16908// CHECK: ret <8 x i16> [[TMP2]] 16909int16x8_t test_vshlq_s16(int16x8_t a, int16x8_t b) { 16910 return vshlq_s16(a, b); 16911} 16912 16913// CHECK-LABEL: define <4 x i32> @test_vshlq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 16914// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 16915// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 16916// CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 16917// CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 16918// CHECK: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> [[VSHLQ_V_I]], <4 x i32> [[VSHLQ_V1_I]]) #4 16919// CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8> 16920// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <4 x i32> 16921// CHECK: ret <4 x i32> [[TMP2]] 16922int32x4_t test_vshlq_s32(int32x4_t a, int32x4_t b) { 16923 return vshlq_s32(a, b); 16924} 16925 16926// CHECK-LABEL: define <2 x i64> @test_vshlq_s64(<2 x i64> %a, <2 x i64> %b) #0 { 16927// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 16928// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 16929// CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 16930// CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 16931// CHECK: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64> [[VSHLQ_V_I]], <2 x i64> [[VSHLQ_V1_I]]) #4 16932// CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8> 16933// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <2 x i64> 16934// CHECK: ret <2 x i64> [[TMP2]] 16935int64x2_t test_vshlq_s64(int64x2_t a, int64x2_t b) { 16936 return vshlq_s64(a, b); 16937} 16938 16939// CHECK-LABEL: define <16 x i8> @test_vshlq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 16940// CHECK: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8> %a, <16 x i8> %b) #4 16941// CHECK: ret <16 x i8> [[VSHLQ_V_I]] 16942uint8x16_t test_vshlq_u8(uint8x16_t a, int8x16_t b) { 16943 return vshlq_u8(a, b); 16944} 16945 16946// CHECK-LABEL: define <8 x i16> @test_vshlq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 16947// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 16948// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 16949// CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 16950// CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 16951// CHECK: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> [[VSHLQ_V_I]], <8 x i16> [[VSHLQ_V1_I]]) #4 16952// CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8> 16953// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <8 x i16> 16954// CHECK: ret <8 x i16> [[TMP2]] 16955uint16x8_t test_vshlq_u16(uint16x8_t a, int16x8_t b) { 16956 return vshlq_u16(a, b); 16957} 16958 16959// CHECK-LABEL: define <4 x i32> @test_vshlq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 16960// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 16961// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 16962// CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 16963// CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 16964// CHECK: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32> [[VSHLQ_V_I]], <4 x i32> [[VSHLQ_V1_I]]) #4 16965// CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8> 16966// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <4 x i32> 16967// CHECK: ret <4 x i32> [[TMP2]] 16968uint32x4_t test_vshlq_u32(uint32x4_t a, int32x4_t b) { 16969 return vshlq_u32(a, b); 16970} 16971 16972// CHECK-LABEL: define <2 x i64> @test_vshlq_u64(<2 x i64> %a, <2 x i64> %b) #0 { 16973// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 16974// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 16975// CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 16976// CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 16977// CHECK: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> [[VSHLQ_V_I]], <2 x i64> [[VSHLQ_V1_I]]) #4 16978// CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8> 16979// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <2 x i64> 16980// CHECK: ret <2 x i64> [[TMP2]] 16981uint64x2_t test_vshlq_u64(uint64x2_t a, int64x2_t b) { 16982 return vshlq_u64(a, b); 16983} 16984 16985 16986// CHECK-LABEL: define <8 x i16> @test_vshll_n_s8(<8 x i8> %a) #0 { 16987// CHECK: [[TMP0:%.*]] = sext <8 x i8> %a to <8 x i16> 16988// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 16989// CHECK: ret <8 x i16> [[VSHLL_N]] 16990int16x8_t test_vshll_n_s8(int8x8_t a) { 16991 return vshll_n_s8(a, 1); 16992} 16993 16994// CHECK-LABEL: define <4 x i32> @test_vshll_n_s16(<4 x i16> %a) #0 { 16995// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 16996// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 16997// CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> 16998// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1> 16999// CHECK: ret <4 x i32> [[VSHLL_N]] 17000int32x4_t test_vshll_n_s16(int16x4_t a) { 17001 return vshll_n_s16(a, 1); 17002} 17003 17004// CHECK-LABEL: define <2 x i64> @test_vshll_n_s32(<2 x i32> %a) #0 { 17005// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 17006// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 17007// CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> 17008// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 1, i64 1> 17009// CHECK: ret <2 x i64> [[VSHLL_N]] 17010int64x2_t test_vshll_n_s32(int32x2_t a) { 17011 return vshll_n_s32(a, 1); 17012} 17013 17014// CHECK-LABEL: define <8 x i16> @test_vshll_n_u8(<8 x i8> %a) #0 { 17015// CHECK: [[TMP0:%.*]] = zext <8 x i8> %a to <8 x i16> 17016// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 17017// CHECK: ret <8 x i16> [[VSHLL_N]] 17018uint16x8_t test_vshll_n_u8(uint8x8_t a) { 17019 return vshll_n_u8(a, 1); 17020} 17021 17022// CHECK-LABEL: define <4 x i32> @test_vshll_n_u16(<4 x i16> %a) #0 { 17023// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 17024// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 17025// CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> 17026// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1> 17027// CHECK: ret <4 x i32> [[VSHLL_N]] 17028uint32x4_t test_vshll_n_u16(uint16x4_t a) { 17029 return vshll_n_u16(a, 1); 17030} 17031 17032// CHECK-LABEL: define <2 x i64> @test_vshll_n_u32(<2 x i32> %a) #0 { 17033// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 17034// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 17035// CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> 17036// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 1, i64 1> 17037// CHECK: ret <2 x i64> [[VSHLL_N]] 17038uint64x2_t test_vshll_n_u32(uint32x2_t a) { 17039 return vshll_n_u32(a, 1); 17040} 17041 17042 17043// CHECK-LABEL: define <8 x i8> @test_vshl_n_s8(<8 x i8> %a) #0 { 17044// CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 17045// CHECK: ret <8 x i8> [[VSHL_N]] 17046int8x8_t test_vshl_n_s8(int8x8_t a) { 17047 return vshl_n_s8(a, 1); 17048} 17049 17050// CHECK-LABEL: define <4 x i16> @test_vshl_n_s16(<4 x i16> %a) #0 { 17051// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 17052// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 17053// CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1> 17054// CHECK: ret <4 x i16> [[VSHL_N]] 17055int16x4_t test_vshl_n_s16(int16x4_t a) { 17056 return vshl_n_s16(a, 1); 17057} 17058 17059// CHECK-LABEL: define <2 x i32> @test_vshl_n_s32(<2 x i32> %a) #0 { 17060// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 17061// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 17062// CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], <i32 1, i32 1> 17063// CHECK: ret <2 x i32> [[VSHL_N]] 17064int32x2_t test_vshl_n_s32(int32x2_t a) { 17065 return vshl_n_s32(a, 1); 17066} 17067 17068// CHECK-LABEL: define <1 x i64> @test_vshl_n_s64(<1 x i64> %a) #0 { 17069// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 17070// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 17071// CHECK: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], <i64 1> 17072// CHECK: ret <1 x i64> [[VSHL_N]] 17073int64x1_t test_vshl_n_s64(int64x1_t a) { 17074 return vshl_n_s64(a, 1); 17075} 17076 17077// CHECK-LABEL: define <8 x i8> @test_vshl_n_u8(<8 x i8> %a) #0 { 17078// CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 17079// CHECK: ret <8 x i8> [[VSHL_N]] 17080uint8x8_t test_vshl_n_u8(uint8x8_t a) { 17081 return vshl_n_u8(a, 1); 17082} 17083 17084// CHECK-LABEL: define <4 x i16> @test_vshl_n_u16(<4 x i16> %a) #0 { 17085// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 17086// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 17087// CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1> 17088// CHECK: ret <4 x i16> [[VSHL_N]] 17089uint16x4_t test_vshl_n_u16(uint16x4_t a) { 17090 return vshl_n_u16(a, 1); 17091} 17092 17093// CHECK-LABEL: define <2 x i32> @test_vshl_n_u32(<2 x i32> %a) #0 { 17094// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 17095// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 17096// CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], <i32 1, i32 1> 17097// CHECK: ret <2 x i32> [[VSHL_N]] 17098uint32x2_t test_vshl_n_u32(uint32x2_t a) { 17099 return vshl_n_u32(a, 1); 17100} 17101 17102// CHECK-LABEL: define <1 x i64> @test_vshl_n_u64(<1 x i64> %a) #0 { 17103// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 17104// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 17105// CHECK: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], <i64 1> 17106// CHECK: ret <1 x i64> [[VSHL_N]] 17107uint64x1_t test_vshl_n_u64(uint64x1_t a) { 17108 return vshl_n_u64(a, 1); 17109} 17110 17111// CHECK-LABEL: define <16 x i8> @test_vshlq_n_s8(<16 x i8> %a) #0 { 17112// CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 17113// CHECK: ret <16 x i8> [[VSHL_N]] 17114int8x16_t test_vshlq_n_s8(int8x16_t a) { 17115 return vshlq_n_s8(a, 1); 17116} 17117 17118// CHECK-LABEL: define <8 x i16> @test_vshlq_n_s16(<8 x i16> %a) #0 { 17119// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 17120// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 17121// CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 17122// CHECK: ret <8 x i16> [[VSHL_N]] 17123int16x8_t test_vshlq_n_s16(int16x8_t a) { 17124 return vshlq_n_s16(a, 1); 17125} 17126 17127// CHECK-LABEL: define <4 x i32> @test_vshlq_n_s32(<4 x i32> %a) #0 { 17128// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 17129// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 17130// CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1> 17131// CHECK: ret <4 x i32> [[VSHL_N]] 17132int32x4_t test_vshlq_n_s32(int32x4_t a) { 17133 return vshlq_n_s32(a, 1); 17134} 17135 17136// CHECK-LABEL: define <2 x i64> @test_vshlq_n_s64(<2 x i64> %a) #0 { 17137// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 17138// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 17139// CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], <i64 1, i64 1> 17140// CHECK: ret <2 x i64> [[VSHL_N]] 17141int64x2_t test_vshlq_n_s64(int64x2_t a) { 17142 return vshlq_n_s64(a, 1); 17143} 17144 17145// CHECK-LABEL: define <16 x i8> @test_vshlq_n_u8(<16 x i8> %a) #0 { 17146// CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 17147// CHECK: ret <16 x i8> [[VSHL_N]] 17148uint8x16_t test_vshlq_n_u8(uint8x16_t a) { 17149 return vshlq_n_u8(a, 1); 17150} 17151 17152// CHECK-LABEL: define <8 x i16> @test_vshlq_n_u16(<8 x i16> %a) #0 { 17153// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 17154// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 17155// CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 17156// CHECK: ret <8 x i16> [[VSHL_N]] 17157uint16x8_t test_vshlq_n_u16(uint16x8_t a) { 17158 return vshlq_n_u16(a, 1); 17159} 17160 17161// CHECK-LABEL: define <4 x i32> @test_vshlq_n_u32(<4 x i32> %a) #0 { 17162// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 17163// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 17164// CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1> 17165// CHECK: ret <4 x i32> [[VSHL_N]] 17166uint32x4_t test_vshlq_n_u32(uint32x4_t a) { 17167 return vshlq_n_u32(a, 1); 17168} 17169 17170// CHECK-LABEL: define <2 x i64> @test_vshlq_n_u64(<2 x i64> %a) #0 { 17171// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 17172// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 17173// CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], <i64 1, i64 1> 17174// CHECK: ret <2 x i64> [[VSHL_N]] 17175uint64x2_t test_vshlq_n_u64(uint64x2_t a) { 17176 return vshlq_n_u64(a, 1); 17177} 17178 17179 17180// CHECK-LABEL: define <8 x i8> @test_vshrn_n_s16(<8 x i16> %a) #0 { 17181// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 17182// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 17183// CHECK: [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 17184// CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8> 17185// CHECK: ret <8 x i8> [[VSHRN_N]] 17186int8x8_t test_vshrn_n_s16(int16x8_t a) { 17187 return vshrn_n_s16(a, 1); 17188} 17189 17190// CHECK-LABEL: define <4 x i16> @test_vshrn_n_s32(<4 x i32> %a) #0 { 17191// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 17192// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 17193// CHECK: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1> 17194// CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16> 17195// CHECK: ret <4 x i16> [[VSHRN_N]] 17196int16x4_t test_vshrn_n_s32(int32x4_t a) { 17197 return vshrn_n_s32(a, 1); 17198} 17199 17200// CHECK-LABEL: define <2 x i32> @test_vshrn_n_s64(<2 x i64> %a) #0 { 17201// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 17202// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 17203// CHECK: [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], <i64 1, i64 1> 17204// CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32> 17205// CHECK: ret <2 x i32> [[VSHRN_N]] 17206int32x2_t test_vshrn_n_s64(int64x2_t a) { 17207 return vshrn_n_s64(a, 1); 17208} 17209 17210// CHECK-LABEL: define <8 x i8> @test_vshrn_n_u16(<8 x i16> %a) #0 { 17211// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 17212// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 17213// CHECK: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 17214// CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8> 17215// CHECK: ret <8 x i8> [[VSHRN_N]] 17216uint8x8_t test_vshrn_n_u16(uint16x8_t a) { 17217 return vshrn_n_u16(a, 1); 17218} 17219 17220// CHECK-LABEL: define <4 x i16> @test_vshrn_n_u32(<4 x i32> %a) #0 { 17221// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 17222// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 17223// CHECK: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1> 17224// CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16> 17225// CHECK: ret <4 x i16> [[VSHRN_N]] 17226uint16x4_t test_vshrn_n_u32(uint32x4_t a) { 17227 return vshrn_n_u32(a, 1); 17228} 17229 17230// CHECK-LABEL: define <2 x i32> @test_vshrn_n_u64(<2 x i64> %a) #0 { 17231// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 17232// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 17233// CHECK: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], <i64 1, i64 1> 17234// CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32> 17235// CHECK: ret <2 x i32> [[VSHRN_N]] 17236uint32x2_t test_vshrn_n_u64(uint64x2_t a) { 17237 return vshrn_n_u64(a, 1); 17238} 17239 17240 17241// CHECK-LABEL: define <8 x i8> @test_vshr_n_s8(<8 x i8> %a) #0 { 17242// CHECK: [[VSHR_N:%.*]] = ashr <8 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 17243// CHECK: ret <8 x i8> [[VSHR_N]] 17244int8x8_t test_vshr_n_s8(int8x8_t a) { 17245 return vshr_n_s8(a, 1); 17246} 17247 17248// CHECK-LABEL: define <4 x i16> @test_vshr_n_s16(<4 x i16> %a) #0 { 17249// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 17250// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 17251// CHECK: [[VSHR_N:%.*]] = ashr <4 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1> 17252// CHECK: ret <4 x i16> [[VSHR_N]] 17253int16x4_t test_vshr_n_s16(int16x4_t a) { 17254 return vshr_n_s16(a, 1); 17255} 17256 17257// CHECK-LABEL: define <2 x i32> @test_vshr_n_s32(<2 x i32> %a) #0 { 17258// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 17259// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 17260// CHECK: [[VSHR_N:%.*]] = ashr <2 x i32> [[TMP1]], <i32 1, i32 1> 17261// CHECK: ret <2 x i32> [[VSHR_N]] 17262int32x2_t test_vshr_n_s32(int32x2_t a) { 17263 return vshr_n_s32(a, 1); 17264} 17265 17266// CHECK-LABEL: define <1 x i64> @test_vshr_n_s64(<1 x i64> %a) #0 { 17267// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 17268// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 17269// CHECK: [[VSHR_N:%.*]] = ashr <1 x i64> [[TMP1]], <i64 1> 17270// CHECK: ret <1 x i64> [[VSHR_N]] 17271int64x1_t test_vshr_n_s64(int64x1_t a) { 17272 return vshr_n_s64(a, 1); 17273} 17274 17275// CHECK-LABEL: define <8 x i8> @test_vshr_n_u8(<8 x i8> %a) #0 { 17276// CHECK: [[VSHR_N:%.*]] = lshr <8 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 17277// CHECK: ret <8 x i8> [[VSHR_N]] 17278uint8x8_t test_vshr_n_u8(uint8x8_t a) { 17279 return vshr_n_u8(a, 1); 17280} 17281 17282// CHECK-LABEL: define <4 x i16> @test_vshr_n_u16(<4 x i16> %a) #0 { 17283// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 17284// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 17285// CHECK: [[VSHR_N:%.*]] = lshr <4 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1> 17286// CHECK: ret <4 x i16> [[VSHR_N]] 17287uint16x4_t test_vshr_n_u16(uint16x4_t a) { 17288 return vshr_n_u16(a, 1); 17289} 17290 17291// CHECK-LABEL: define <2 x i32> @test_vshr_n_u32(<2 x i32> %a) #0 { 17292// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 17293// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 17294// CHECK: [[VSHR_N:%.*]] = lshr <2 x i32> [[TMP1]], <i32 1, i32 1> 17295// CHECK: ret <2 x i32> [[VSHR_N]] 17296uint32x2_t test_vshr_n_u32(uint32x2_t a) { 17297 return vshr_n_u32(a, 1); 17298} 17299 17300// CHECK-LABEL: define <1 x i64> @test_vshr_n_u64(<1 x i64> %a) #0 { 17301// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 17302// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 17303// CHECK: [[VSHR_N:%.*]] = lshr <1 x i64> [[TMP1]], <i64 1> 17304// CHECK: ret <1 x i64> [[VSHR_N]] 17305uint64x1_t test_vshr_n_u64(uint64x1_t a) { 17306 return vshr_n_u64(a, 1); 17307} 17308 17309// CHECK-LABEL: define <16 x i8> @test_vshrq_n_s8(<16 x i8> %a) #0 { 17310// CHECK: [[VSHR_N:%.*]] = ashr <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 17311// CHECK: ret <16 x i8> [[VSHR_N]] 17312int8x16_t test_vshrq_n_s8(int8x16_t a) { 17313 return vshrq_n_s8(a, 1); 17314} 17315 17316// CHECK-LABEL: define <8 x i16> @test_vshrq_n_s16(<8 x i16> %a) #0 { 17317// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 17318// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 17319// CHECK: [[VSHR_N:%.*]] = ashr <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 17320// CHECK: ret <8 x i16> [[VSHR_N]] 17321int16x8_t test_vshrq_n_s16(int16x8_t a) { 17322 return vshrq_n_s16(a, 1); 17323} 17324 17325// CHECK-LABEL: define <4 x i32> @test_vshrq_n_s32(<4 x i32> %a) #0 { 17326// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 17327// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 17328// CHECK: [[VSHR_N:%.*]] = ashr <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1> 17329// CHECK: ret <4 x i32> [[VSHR_N]] 17330int32x4_t test_vshrq_n_s32(int32x4_t a) { 17331 return vshrq_n_s32(a, 1); 17332} 17333 17334// CHECK-LABEL: define <2 x i64> @test_vshrq_n_s64(<2 x i64> %a) #0 { 17335// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 17336// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 17337// CHECK: [[VSHR_N:%.*]] = ashr <2 x i64> [[TMP1]], <i64 1, i64 1> 17338// CHECK: ret <2 x i64> [[VSHR_N]] 17339int64x2_t test_vshrq_n_s64(int64x2_t a) { 17340 return vshrq_n_s64(a, 1); 17341} 17342 17343// CHECK-LABEL: define <16 x i8> @test_vshrq_n_u8(<16 x i8> %a) #0 { 17344// CHECK: [[VSHR_N:%.*]] = lshr <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 17345// CHECK: ret <16 x i8> [[VSHR_N]] 17346uint8x16_t test_vshrq_n_u8(uint8x16_t a) { 17347 return vshrq_n_u8(a, 1); 17348} 17349 17350// CHECK-LABEL: define <8 x i16> @test_vshrq_n_u16(<8 x i16> %a) #0 { 17351// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 17352// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 17353// CHECK: [[VSHR_N:%.*]] = lshr <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 17354// CHECK: ret <8 x i16> [[VSHR_N]] 17355uint16x8_t test_vshrq_n_u16(uint16x8_t a) { 17356 return vshrq_n_u16(a, 1); 17357} 17358 17359// CHECK-LABEL: define <4 x i32> @test_vshrq_n_u32(<4 x i32> %a) #0 { 17360// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 17361// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 17362// CHECK: [[VSHR_N:%.*]] = lshr <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1> 17363// CHECK: ret <4 x i32> [[VSHR_N]] 17364uint32x4_t test_vshrq_n_u32(uint32x4_t a) { 17365 return vshrq_n_u32(a, 1); 17366} 17367 17368// CHECK-LABEL: define <2 x i64> @test_vshrq_n_u64(<2 x i64> %a) #0 { 17369// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 17370// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 17371// CHECK: [[VSHR_N:%.*]] = lshr <2 x i64> [[TMP1]], <i64 1, i64 1> 17372// CHECK: ret <2 x i64> [[VSHR_N]] 17373uint64x2_t test_vshrq_n_u64(uint64x2_t a) { 17374 return vshrq_n_u64(a, 1); 17375} 17376 17377 17378// CHECK-LABEL: define <8 x i8> @test_vsli_n_s8(<8 x i8> %a, <8 x i8> %b) #0 { 17379// CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>) 17380// CHECK: ret <8 x i8> [[VSLI_N]] 17381int8x8_t test_vsli_n_s8(int8x8_t a, int8x8_t b) { 17382 return vsli_n_s8(a, b, 1); 17383} 17384 17385// CHECK-LABEL: define <4 x i16> @test_vsli_n_s16(<4 x i16> %a, <4 x i16> %b) #0 { 17386// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 17387// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 17388// CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 17389// CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 17390// CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>) 17391// CHECK: ret <4 x i16> [[VSLI_N2]] 17392int16x4_t test_vsli_n_s16(int16x4_t a, int16x4_t b) { 17393 return vsli_n_s16(a, b, 1); 17394} 17395 17396// CHECK-LABEL: define <2 x i32> @test_vsli_n_s32(<2 x i32> %a, <2 x i32> %b) #0 { 17397// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 17398// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 17399// CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 17400// CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 17401// CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> <i32 1, i32 1>) 17402// CHECK: ret <2 x i32> [[VSLI_N2]] 17403int32x2_t test_vsli_n_s32(int32x2_t a, int32x2_t b) { 17404 return vsli_n_s32(a, b, 1); 17405} 17406 17407// CHECK-LABEL: define <1 x i64> @test_vsli_n_s64(<1 x i64> %a, <1 x i64> %b) #0 { 17408// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 17409// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 17410// CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 17411// CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 17412// CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> <i64 1>) 17413// CHECK: ret <1 x i64> [[VSLI_N2]] 17414int64x1_t test_vsli_n_s64(int64x1_t a, int64x1_t b) { 17415 return vsli_n_s64(a, b, 1); 17416} 17417 17418// CHECK-LABEL: define <8 x i8> @test_vsli_n_u8(<8 x i8> %a, <8 x i8> %b) #0 { 17419// CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>) 17420// CHECK: ret <8 x i8> [[VSLI_N]] 17421uint8x8_t test_vsli_n_u8(uint8x8_t a, uint8x8_t b) { 17422 return vsli_n_u8(a, b, 1); 17423} 17424 17425// CHECK-LABEL: define <4 x i16> @test_vsli_n_u16(<4 x i16> %a, <4 x i16> %b) #0 { 17426// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 17427// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 17428// CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 17429// CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 17430// CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>) 17431// CHECK: ret <4 x i16> [[VSLI_N2]] 17432uint16x4_t test_vsli_n_u16(uint16x4_t a, uint16x4_t b) { 17433 return vsli_n_u16(a, b, 1); 17434} 17435 17436// CHECK-LABEL: define <2 x i32> @test_vsli_n_u32(<2 x i32> %a, <2 x i32> %b) #0 { 17437// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 17438// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 17439// CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 17440// CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 17441// CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> <i32 1, i32 1>) 17442// CHECK: ret <2 x i32> [[VSLI_N2]] 17443uint32x2_t test_vsli_n_u32(uint32x2_t a, uint32x2_t b) { 17444 return vsli_n_u32(a, b, 1); 17445} 17446 17447// CHECK-LABEL: define <1 x i64> @test_vsli_n_u64(<1 x i64> %a, <1 x i64> %b) #0 { 17448// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 17449// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 17450// CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 17451// CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 17452// CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> <i64 1>) 17453// CHECK: ret <1 x i64> [[VSLI_N2]] 17454uint64x1_t test_vsli_n_u64(uint64x1_t a, uint64x1_t b) { 17455 return vsli_n_u64(a, b, 1); 17456} 17457 17458// CHECK-LABEL: define <8 x i8> @test_vsli_n_p8(<8 x i8> %a, <8 x i8> %b) #0 { 17459// CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>) 17460// CHECK: ret <8 x i8> [[VSLI_N]] 17461poly8x8_t test_vsli_n_p8(poly8x8_t a, poly8x8_t b) { 17462 return vsli_n_p8(a, b, 1); 17463} 17464 17465// CHECK-LABEL: define <4 x i16> @test_vsli_n_p16(<4 x i16> %a, <4 x i16> %b) #0 { 17466// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 17467// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 17468// CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 17469// CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 17470// CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>) 17471// CHECK: ret <4 x i16> [[VSLI_N2]] 17472poly16x4_t test_vsli_n_p16(poly16x4_t a, poly16x4_t b) { 17473 return vsli_n_p16(a, b, 1); 17474} 17475 17476// CHECK-LABEL: define <16 x i8> @test_vsliq_n_s8(<16 x i8> %a, <16 x i8> %b) #0 { 17477// CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>) 17478// CHECK: ret <16 x i8> [[VSLI_N]] 17479int8x16_t test_vsliq_n_s8(int8x16_t a, int8x16_t b) { 17480 return vsliq_n_s8(a, b, 1); 17481} 17482 17483// CHECK-LABEL: define <8 x i16> @test_vsliq_n_s16(<8 x i16> %a, <8 x i16> %b) #0 { 17484// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 17485// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 17486// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 17487// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 17488// CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) 17489// CHECK: ret <8 x i16> [[VSLI_N2]] 17490int16x8_t test_vsliq_n_s16(int16x8_t a, int16x8_t b) { 17491 return vsliq_n_s16(a, b, 1); 17492} 17493 17494// CHECK-LABEL: define <4 x i32> @test_vsliq_n_s32(<4 x i32> %a, <4 x i32> %b) #0 { 17495// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 17496// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 17497// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 17498// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 17499// CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>) 17500// CHECK: ret <4 x i32> [[VSLI_N2]] 17501int32x4_t test_vsliq_n_s32(int32x4_t a, int32x4_t b) { 17502 return vsliq_n_s32(a, b, 1); 17503} 17504 17505// CHECK-LABEL: define <2 x i64> @test_vsliq_n_s64(<2 x i64> %a, <2 x i64> %b) #0 { 17506// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 17507// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 17508// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 17509// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 17510// CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> <i64 1, i64 1>) 17511// CHECK: ret <2 x i64> [[VSLI_N2]] 17512int64x2_t test_vsliq_n_s64(int64x2_t a, int64x2_t b) { 17513 return vsliq_n_s64(a, b, 1); 17514} 17515 17516// CHECK-LABEL: define <16 x i8> @test_vsliq_n_u8(<16 x i8> %a, <16 x i8> %b) #0 { 17517// CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>) 17518// CHECK: ret <16 x i8> [[VSLI_N]] 17519uint8x16_t test_vsliq_n_u8(uint8x16_t a, uint8x16_t b) { 17520 return vsliq_n_u8(a, b, 1); 17521} 17522 17523// CHECK-LABEL: define <8 x i16> @test_vsliq_n_u16(<8 x i16> %a, <8 x i16> %b) #0 { 17524// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 17525// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 17526// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 17527// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 17528// CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) 17529// CHECK: ret <8 x i16> [[VSLI_N2]] 17530uint16x8_t test_vsliq_n_u16(uint16x8_t a, uint16x8_t b) { 17531 return vsliq_n_u16(a, b, 1); 17532} 17533 17534// CHECK-LABEL: define <4 x i32> @test_vsliq_n_u32(<4 x i32> %a, <4 x i32> %b) #0 { 17535// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 17536// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 17537// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 17538// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 17539// CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>) 17540// CHECK: ret <4 x i32> [[VSLI_N2]] 17541uint32x4_t test_vsliq_n_u32(uint32x4_t a, uint32x4_t b) { 17542 return vsliq_n_u32(a, b, 1); 17543} 17544 17545// CHECK-LABEL: define <2 x i64> @test_vsliq_n_u64(<2 x i64> %a, <2 x i64> %b) #0 { 17546// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 17547// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 17548// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 17549// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 17550// CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> <i64 1, i64 1>) 17551// CHECK: ret <2 x i64> [[VSLI_N2]] 17552uint64x2_t test_vsliq_n_u64(uint64x2_t a, uint64x2_t b) { 17553 return vsliq_n_u64(a, b, 1); 17554} 17555 17556// CHECK-LABEL: define <16 x i8> @test_vsliq_n_p8(<16 x i8> %a, <16 x i8> %b) #0 { 17557// CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>) 17558// CHECK: ret <16 x i8> [[VSLI_N]] 17559poly8x16_t test_vsliq_n_p8(poly8x16_t a, poly8x16_t b) { 17560 return vsliq_n_p8(a, b, 1); 17561} 17562 17563// CHECK-LABEL: define <8 x i16> @test_vsliq_n_p16(<8 x i16> %a, <8 x i16> %b) #0 { 17564// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 17565// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 17566// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 17567// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 17568// CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) 17569// CHECK: ret <8 x i16> [[VSLI_N2]] 17570poly16x8_t test_vsliq_n_p16(poly16x8_t a, poly16x8_t b) { 17571 return vsliq_n_p16(a, b, 1); 17572} 17573 17574 17575// CHECK-LABEL: define <8 x i8> @test_vsra_n_s8(<8 x i8> %a, <8 x i8> %b) #0 { 17576// CHECK: [[VSRA_N:%.*]] = ashr <8 x i8> %b, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 17577// CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]] 17578// CHECK: ret <8 x i8> [[TMP0]] 17579int8x8_t test_vsra_n_s8(int8x8_t a, int8x8_t b) { 17580 return vsra_n_s8(a, b, 1); 17581} 17582 17583// CHECK-LABEL: define <4 x i16> @test_vsra_n_s16(<4 x i16> %a, <4 x i16> %b) #0 { 17584// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 17585// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 17586// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 17587// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 17588// CHECK: [[VSRA_N:%.*]] = ashr <4 x i16> [[TMP3]], <i16 1, i16 1, i16 1, i16 1> 17589// CHECK: [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]] 17590// CHECK: ret <4 x i16> [[TMP4]] 17591int16x4_t test_vsra_n_s16(int16x4_t a, int16x4_t b) { 17592 return vsra_n_s16(a, b, 1); 17593} 17594 17595// CHECK-LABEL: define <2 x i32> @test_vsra_n_s32(<2 x i32> %a, <2 x i32> %b) #0 { 17596// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 17597// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 17598// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 17599// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 17600// CHECK: [[VSRA_N:%.*]] = ashr <2 x i32> [[TMP3]], <i32 1, i32 1> 17601// CHECK: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]] 17602// CHECK: ret <2 x i32> [[TMP4]] 17603int32x2_t test_vsra_n_s32(int32x2_t a, int32x2_t b) { 17604 return vsra_n_s32(a, b, 1); 17605} 17606 17607// CHECK-LABEL: define <1 x i64> @test_vsra_n_s64(<1 x i64> %a, <1 x i64> %b) #0 { 17608// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 17609// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 17610// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 17611// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 17612// CHECK: [[VSRA_N:%.*]] = ashr <1 x i64> [[TMP3]], <i64 1> 17613// CHECK: [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]] 17614// CHECK: ret <1 x i64> [[TMP4]] 17615int64x1_t test_vsra_n_s64(int64x1_t a, int64x1_t b) { 17616 return vsra_n_s64(a, b, 1); 17617} 17618 17619// CHECK-LABEL: define <8 x i8> @test_vsra_n_u8(<8 x i8> %a, <8 x i8> %b) #0 { 17620// CHECK: [[VSRA_N:%.*]] = lshr <8 x i8> %b, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 17621// CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]] 17622// CHECK: ret <8 x i8> [[TMP0]] 17623uint8x8_t test_vsra_n_u8(uint8x8_t a, uint8x8_t b) { 17624 return vsra_n_u8(a, b, 1); 17625} 17626 17627// CHECK-LABEL: define <4 x i16> @test_vsra_n_u16(<4 x i16> %a, <4 x i16> %b) #0 { 17628// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 17629// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 17630// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 17631// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 17632// CHECK: [[VSRA_N:%.*]] = lshr <4 x i16> [[TMP3]], <i16 1, i16 1, i16 1, i16 1> 17633// CHECK: [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]] 17634// CHECK: ret <4 x i16> [[TMP4]] 17635uint16x4_t test_vsra_n_u16(uint16x4_t a, uint16x4_t b) { 17636 return vsra_n_u16(a, b, 1); 17637} 17638 17639// CHECK-LABEL: define <2 x i32> @test_vsra_n_u32(<2 x i32> %a, <2 x i32> %b) #0 { 17640// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 17641// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 17642// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 17643// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 17644// CHECK: [[VSRA_N:%.*]] = lshr <2 x i32> [[TMP3]], <i32 1, i32 1> 17645// CHECK: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]] 17646// CHECK: ret <2 x i32> [[TMP4]] 17647uint32x2_t test_vsra_n_u32(uint32x2_t a, uint32x2_t b) { 17648 return vsra_n_u32(a, b, 1); 17649} 17650 17651// CHECK-LABEL: define <1 x i64> @test_vsra_n_u64(<1 x i64> %a, <1 x i64> %b) #0 { 17652// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 17653// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 17654// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 17655// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 17656// CHECK: [[VSRA_N:%.*]] = lshr <1 x i64> [[TMP3]], <i64 1> 17657// CHECK: [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]] 17658// CHECK: ret <1 x i64> [[TMP4]] 17659uint64x1_t test_vsra_n_u64(uint64x1_t a, uint64x1_t b) { 17660 return vsra_n_u64(a, b, 1); 17661} 17662 17663// CHECK-LABEL: define <16 x i8> @test_vsraq_n_s8(<16 x i8> %a, <16 x i8> %b) #0 { 17664// CHECK: [[VSRA_N:%.*]] = ashr <16 x i8> %b, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 17665// CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]] 17666// CHECK: ret <16 x i8> [[TMP0]] 17667int8x16_t test_vsraq_n_s8(int8x16_t a, int8x16_t b) { 17668 return vsraq_n_s8(a, b, 1); 17669} 17670 17671// CHECK-LABEL: define <8 x i16> @test_vsraq_n_s16(<8 x i16> %a, <8 x i16> %b) #0 { 17672// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 17673// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 17674// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 17675// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 17676// CHECK: [[VSRA_N:%.*]] = ashr <8 x i16> [[TMP3]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 17677// CHECK: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]] 17678// CHECK: ret <8 x i16> [[TMP4]] 17679int16x8_t test_vsraq_n_s16(int16x8_t a, int16x8_t b) { 17680 return vsraq_n_s16(a, b, 1); 17681} 17682 17683// CHECK-LABEL: define <4 x i32> @test_vsraq_n_s32(<4 x i32> %a, <4 x i32> %b) #0 { 17684// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 17685// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 17686// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 17687// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 17688// CHECK: [[VSRA_N:%.*]] = ashr <4 x i32> [[TMP3]], <i32 1, i32 1, i32 1, i32 1> 17689// CHECK: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]] 17690// CHECK: ret <4 x i32> [[TMP4]] 17691int32x4_t test_vsraq_n_s32(int32x4_t a, int32x4_t b) { 17692 return vsraq_n_s32(a, b, 1); 17693} 17694 17695// CHECK-LABEL: define <2 x i64> @test_vsraq_n_s64(<2 x i64> %a, <2 x i64> %b) #0 { 17696// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 17697// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 17698// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 17699// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 17700// CHECK: [[VSRA_N:%.*]] = ashr <2 x i64> [[TMP3]], <i64 1, i64 1> 17701// CHECK: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]] 17702// CHECK: ret <2 x i64> [[TMP4]] 17703int64x2_t test_vsraq_n_s64(int64x2_t a, int64x2_t b) { 17704 return vsraq_n_s64(a, b, 1); 17705} 17706 17707// CHECK-LABEL: define <16 x i8> @test_vsraq_n_u8(<16 x i8> %a, <16 x i8> %b) #0 { 17708// CHECK: [[VSRA_N:%.*]] = lshr <16 x i8> %b, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 17709// CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]] 17710// CHECK: ret <16 x i8> [[TMP0]] 17711uint8x16_t test_vsraq_n_u8(uint8x16_t a, uint8x16_t b) { 17712 return vsraq_n_u8(a, b, 1); 17713} 17714 17715// CHECK-LABEL: define <8 x i16> @test_vsraq_n_u16(<8 x i16> %a, <8 x i16> %b) #0 { 17716// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 17717// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 17718// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 17719// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 17720// CHECK: [[VSRA_N:%.*]] = lshr <8 x i16> [[TMP3]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 17721// CHECK: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]] 17722// CHECK: ret <8 x i16> [[TMP4]] 17723uint16x8_t test_vsraq_n_u16(uint16x8_t a, uint16x8_t b) { 17724 return vsraq_n_u16(a, b, 1); 17725} 17726 17727// CHECK-LABEL: define <4 x i32> @test_vsraq_n_u32(<4 x i32> %a, <4 x i32> %b) #0 { 17728// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 17729// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 17730// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 17731// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 17732// CHECK: [[VSRA_N:%.*]] = lshr <4 x i32> [[TMP3]], <i32 1, i32 1, i32 1, i32 1> 17733// CHECK: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]] 17734// CHECK: ret <4 x i32> [[TMP4]] 17735uint32x4_t test_vsraq_n_u32(uint32x4_t a, uint32x4_t b) { 17736 return vsraq_n_u32(a, b, 1); 17737} 17738 17739// CHECK-LABEL: define <2 x i64> @test_vsraq_n_u64(<2 x i64> %a, <2 x i64> %b) #0 { 17740// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 17741// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 17742// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 17743// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 17744// CHECK: [[VSRA_N:%.*]] = lshr <2 x i64> [[TMP3]], <i64 1, i64 1> 17745// CHECK: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]] 17746// CHECK: ret <2 x i64> [[TMP4]] 17747uint64x2_t test_vsraq_n_u64(uint64x2_t a, uint64x2_t b) { 17748 return vsraq_n_u64(a, b, 1); 17749} 17750 17751 17752// CHECK-LABEL: define <8 x i8> @test_vsri_n_s8(<8 x i8> %a, <8 x i8> %b) #0 { 17753// CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 17754// CHECK: ret <8 x i8> [[VSLI_N]] 17755int8x8_t test_vsri_n_s8(int8x8_t a, int8x8_t b) { 17756 return vsri_n_s8(a, b, 1); 17757} 17758 17759// CHECK-LABEL: define <4 x i16> @test_vsri_n_s16(<4 x i16> %a, <4 x i16> %b) #0 { 17760// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 17761// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 17762// CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 17763// CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 17764// CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>) 17765// CHECK: ret <4 x i16> [[VSLI_N2]] 17766int16x4_t test_vsri_n_s16(int16x4_t a, int16x4_t b) { 17767 return vsri_n_s16(a, b, 1); 17768} 17769 17770// CHECK-LABEL: define <2 x i32> @test_vsri_n_s32(<2 x i32> %a, <2 x i32> %b) #0 { 17771// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 17772// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 17773// CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 17774// CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 17775// CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> <i32 -1, i32 -1>) 17776// CHECK: ret <2 x i32> [[VSLI_N2]] 17777int32x2_t test_vsri_n_s32(int32x2_t a, int32x2_t b) { 17778 return vsri_n_s32(a, b, 1); 17779} 17780 17781// CHECK-LABEL: define <1 x i64> @test_vsri_n_s64(<1 x i64> %a, <1 x i64> %b) #0 { 17782// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 17783// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 17784// CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 17785// CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 17786// CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> <i64 -1>) 17787// CHECK: ret <1 x i64> [[VSLI_N2]] 17788int64x1_t test_vsri_n_s64(int64x1_t a, int64x1_t b) { 17789 return vsri_n_s64(a, b, 1); 17790} 17791 17792// CHECK-LABEL: define <8 x i8> @test_vsri_n_u8(<8 x i8> %a, <8 x i8> %b) #0 { 17793// CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 17794// CHECK: ret <8 x i8> [[VSLI_N]] 17795uint8x8_t test_vsri_n_u8(uint8x8_t a, uint8x8_t b) { 17796 return vsri_n_u8(a, b, 1); 17797} 17798 17799// CHECK-LABEL: define <4 x i16> @test_vsri_n_u16(<4 x i16> %a, <4 x i16> %b) #0 { 17800// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 17801// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 17802// CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 17803// CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 17804// CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>) 17805// CHECK: ret <4 x i16> [[VSLI_N2]] 17806uint16x4_t test_vsri_n_u16(uint16x4_t a, uint16x4_t b) { 17807 return vsri_n_u16(a, b, 1); 17808} 17809 17810// CHECK-LABEL: define <2 x i32> @test_vsri_n_u32(<2 x i32> %a, <2 x i32> %b) #0 { 17811// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 17812// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 17813// CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 17814// CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 17815// CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> <i32 -1, i32 -1>) 17816// CHECK: ret <2 x i32> [[VSLI_N2]] 17817uint32x2_t test_vsri_n_u32(uint32x2_t a, uint32x2_t b) { 17818 return vsri_n_u32(a, b, 1); 17819} 17820 17821// CHECK-LABEL: define <1 x i64> @test_vsri_n_u64(<1 x i64> %a, <1 x i64> %b) #0 { 17822// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 17823// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 17824// CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 17825// CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 17826// CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> <i64 -1>) 17827// CHECK: ret <1 x i64> [[VSLI_N2]] 17828uint64x1_t test_vsri_n_u64(uint64x1_t a, uint64x1_t b) { 17829 return vsri_n_u64(a, b, 1); 17830} 17831 17832// CHECK-LABEL: define <8 x i8> @test_vsri_n_p8(<8 x i8> %a, <8 x i8> %b) #0 { 17833// CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 17834// CHECK: ret <8 x i8> [[VSLI_N]] 17835poly8x8_t test_vsri_n_p8(poly8x8_t a, poly8x8_t b) { 17836 return vsri_n_p8(a, b, 1); 17837} 17838 17839// CHECK-LABEL: define <4 x i16> @test_vsri_n_p16(<4 x i16> %a, <4 x i16> %b) #0 { 17840// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 17841// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 17842// CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 17843// CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 17844// CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>) 17845// CHECK: ret <4 x i16> [[VSLI_N2]] 17846poly16x4_t test_vsri_n_p16(poly16x4_t a, poly16x4_t b) { 17847 return vsri_n_p16(a, b, 1); 17848} 17849 17850// CHECK-LABEL: define <16 x i8> @test_vsriq_n_s8(<16 x i8> %a, <16 x i8> %b) #0 { 17851// CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 17852// CHECK: ret <16 x i8> [[VSLI_N]] 17853int8x16_t test_vsriq_n_s8(int8x16_t a, int8x16_t b) { 17854 return vsriq_n_s8(a, b, 1); 17855} 17856 17857// CHECK-LABEL: define <8 x i16> @test_vsriq_n_s16(<8 x i16> %a, <8 x i16> %b) #0 { 17858// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 17859// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 17860// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 17861// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 17862// CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>) 17863// CHECK: ret <8 x i16> [[VSLI_N2]] 17864int16x8_t test_vsriq_n_s16(int16x8_t a, int16x8_t b) { 17865 return vsriq_n_s16(a, b, 1); 17866} 17867 17868// CHECK-LABEL: define <4 x i32> @test_vsriq_n_s32(<4 x i32> %a, <4 x i32> %b) #0 { 17869// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 17870// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 17871// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 17872// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 17873// CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>) 17874// CHECK: ret <4 x i32> [[VSLI_N2]] 17875int32x4_t test_vsriq_n_s32(int32x4_t a, int32x4_t b) { 17876 return vsriq_n_s32(a, b, 1); 17877} 17878 17879// CHECK-LABEL: define <2 x i64> @test_vsriq_n_s64(<2 x i64> %a, <2 x i64> %b) #0 { 17880// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 17881// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 17882// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 17883// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 17884// CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> <i64 -1, i64 -1>) 17885// CHECK: ret <2 x i64> [[VSLI_N2]] 17886int64x2_t test_vsriq_n_s64(int64x2_t a, int64x2_t b) { 17887 return vsriq_n_s64(a, b, 1); 17888} 17889 17890// CHECK-LABEL: define <16 x i8> @test_vsriq_n_u8(<16 x i8> %a, <16 x i8> %b) #0 { 17891// CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 17892// CHECK: ret <16 x i8> [[VSLI_N]] 17893uint8x16_t test_vsriq_n_u8(uint8x16_t a, uint8x16_t b) { 17894 return vsriq_n_u8(a, b, 1); 17895} 17896 17897// CHECK-LABEL: define <8 x i16> @test_vsriq_n_u16(<8 x i16> %a, <8 x i16> %b) #0 { 17898// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 17899// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 17900// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 17901// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 17902// CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>) 17903// CHECK: ret <8 x i16> [[VSLI_N2]] 17904uint16x8_t test_vsriq_n_u16(uint16x8_t a, uint16x8_t b) { 17905 return vsriq_n_u16(a, b, 1); 17906} 17907 17908// CHECK-LABEL: define <4 x i32> @test_vsriq_n_u32(<4 x i32> %a, <4 x i32> %b) #0 { 17909// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 17910// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 17911// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 17912// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 17913// CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>) 17914// CHECK: ret <4 x i32> [[VSLI_N2]] 17915uint32x4_t test_vsriq_n_u32(uint32x4_t a, uint32x4_t b) { 17916 return vsriq_n_u32(a, b, 1); 17917} 17918 17919// CHECK-LABEL: define <2 x i64> @test_vsriq_n_u64(<2 x i64> %a, <2 x i64> %b) #0 { 17920// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 17921// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 17922// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 17923// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 17924// CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> <i64 -1, i64 -1>) 17925// CHECK: ret <2 x i64> [[VSLI_N2]] 17926uint64x2_t test_vsriq_n_u64(uint64x2_t a, uint64x2_t b) { 17927 return vsriq_n_u64(a, b, 1); 17928} 17929 17930// CHECK-LABEL: define <16 x i8> @test_vsriq_n_p8(<16 x i8> %a, <16 x i8> %b) #0 { 17931// CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 17932// CHECK: ret <16 x i8> [[VSLI_N]] 17933poly8x16_t test_vsriq_n_p8(poly8x16_t a, poly8x16_t b) { 17934 return vsriq_n_p8(a, b, 1); 17935} 17936 17937// CHECK-LABEL: define <8 x i16> @test_vsriq_n_p16(<8 x i16> %a, <8 x i16> %b) #0 { 17938// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 17939// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 17940// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 17941// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 17942// CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>) 17943// CHECK: ret <8 x i16> [[VSLI_N2]] 17944poly16x8_t test_vsriq_n_p16(poly16x8_t a, poly16x8_t b) { 17945 return vsriq_n_p16(a, b, 1); 17946} 17947 17948 17949// CHECK-LABEL: define void @test_vst1q_u8(i8* %a, <16 x i8> %b) #0 { 17950// CHECK: call void @llvm.arm.neon.vst1.p0i8.v16i8(i8* %a, <16 x i8> %b, i32 1) 17951// CHECK: ret void 17952void test_vst1q_u8(uint8_t * a, uint8x16_t b) { 17953 vst1q_u8(a, b); 17954} 17955 17956// CHECK-LABEL: define void @test_vst1q_u16(i16* %a, <8 x i16> %b) #0 { 17957// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 17958// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 17959// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 17960// CHECK: call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* [[TMP0]], <8 x i16> [[TMP2]], i32 2) 17961// CHECK: ret void 17962void test_vst1q_u16(uint16_t * a, uint16x8_t b) { 17963 vst1q_u16(a, b); 17964} 17965 17966// CHECK-LABEL: define void @test_vst1q_u32(i32* %a, <4 x i32> %b) #0 { 17967// CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 17968// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 17969// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 17970// CHECK: call void @llvm.arm.neon.vst1.p0i8.v4i32(i8* [[TMP0]], <4 x i32> [[TMP2]], i32 4) 17971// CHECK: ret void 17972void test_vst1q_u32(uint32_t * a, uint32x4_t b) { 17973 vst1q_u32(a, b); 17974} 17975 17976// CHECK-LABEL: define void @test_vst1q_u64(i64* %a, <2 x i64> %b) #0 { 17977// CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 17978// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 17979// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 17980// CHECK: call void @llvm.arm.neon.vst1.p0i8.v2i64(i8* [[TMP0]], <2 x i64> [[TMP2]], i32 4) 17981// CHECK: ret void 17982void test_vst1q_u64(uint64_t * a, uint64x2_t b) { 17983 vst1q_u64(a, b); 17984} 17985 17986// CHECK-LABEL: define void @test_vst1q_s8(i8* %a, <16 x i8> %b) #0 { 17987// CHECK: call void @llvm.arm.neon.vst1.p0i8.v16i8(i8* %a, <16 x i8> %b, i32 1) 17988// CHECK: ret void 17989void test_vst1q_s8(int8_t * a, int8x16_t b) { 17990 vst1q_s8(a, b); 17991} 17992 17993// CHECK-LABEL: define void @test_vst1q_s16(i16* %a, <8 x i16> %b) #0 { 17994// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 17995// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 17996// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 17997// CHECK: call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* [[TMP0]], <8 x i16> [[TMP2]], i32 2) 17998// CHECK: ret void 17999void test_vst1q_s16(int16_t * a, int16x8_t b) { 18000 vst1q_s16(a, b); 18001} 18002 18003// CHECK-LABEL: define void @test_vst1q_s32(i32* %a, <4 x i32> %b) #0 { 18004// CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 18005// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 18006// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 18007// CHECK: call void @llvm.arm.neon.vst1.p0i8.v4i32(i8* [[TMP0]], <4 x i32> [[TMP2]], i32 4) 18008// CHECK: ret void 18009void test_vst1q_s32(int32_t * a, int32x4_t b) { 18010 vst1q_s32(a, b); 18011} 18012 18013// CHECK-LABEL: define void @test_vst1q_s64(i64* %a, <2 x i64> %b) #0 { 18014// CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 18015// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 18016// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 18017// CHECK: call void @llvm.arm.neon.vst1.p0i8.v2i64(i8* [[TMP0]], <2 x i64> [[TMP2]], i32 4) 18018// CHECK: ret void 18019void test_vst1q_s64(int64_t * a, int64x2_t b) { 18020 vst1q_s64(a, b); 18021} 18022 18023// CHECK-LABEL: define void @test_vst1q_f16(half* %a, <8 x half> %b) #0 { 18024// CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* 18025// CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8> 18026// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 18027// CHECK: call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* [[TMP0]], <8 x i16> [[TMP2]], i32 2) 18028// CHECK: ret void 18029void test_vst1q_f16(float16_t * a, float16x8_t b) { 18030 vst1q_f16(a, b); 18031} 18032 18033// CHECK-LABEL: define void @test_vst1q_f32(float* %a, <4 x float> %b) #0 { 18034// CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* 18035// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 18036// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 18037// CHECK: call void @llvm.arm.neon.vst1.p0i8.v4f32(i8* [[TMP0]], <4 x float> [[TMP2]], i32 4) 18038// CHECK: ret void 18039void test_vst1q_f32(float32_t * a, float32x4_t b) { 18040 vst1q_f32(a, b); 18041} 18042 18043// CHECK-LABEL: define void @test_vst1q_p8(i8* %a, <16 x i8> %b) #0 { 18044// CHECK: call void @llvm.arm.neon.vst1.p0i8.v16i8(i8* %a, <16 x i8> %b, i32 1) 18045// CHECK: ret void 18046void test_vst1q_p8(poly8_t * a, poly8x16_t b) { 18047 vst1q_p8(a, b); 18048} 18049 18050// CHECK-LABEL: define void @test_vst1q_p16(i16* %a, <8 x i16> %b) #0 { 18051// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 18052// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 18053// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 18054// CHECK: call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* [[TMP0]], <8 x i16> [[TMP2]], i32 2) 18055// CHECK: ret void 18056void test_vst1q_p16(poly16_t * a, poly16x8_t b) { 18057 vst1q_p16(a, b); 18058} 18059 18060// CHECK-LABEL: define void @test_vst1_u8(i8* %a, <8 x i8> %b) #0 { 18061// CHECK: call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* %a, <8 x i8> %b, i32 1) 18062// CHECK: ret void 18063void test_vst1_u8(uint8_t * a, uint8x8_t b) { 18064 vst1_u8(a, b); 18065} 18066 18067// CHECK-LABEL: define void @test_vst1_u16(i16* %a, <4 x i16> %b) #0 { 18068// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 18069// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 18070// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 18071// CHECK: call void @llvm.arm.neon.vst1.p0i8.v4i16(i8* [[TMP0]], <4 x i16> [[TMP2]], i32 2) 18072// CHECK: ret void 18073void test_vst1_u16(uint16_t * a, uint16x4_t b) { 18074 vst1_u16(a, b); 18075} 18076 18077// CHECK-LABEL: define void @test_vst1_u32(i32* %a, <2 x i32> %b) #0 { 18078// CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 18079// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 18080// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 18081// CHECK: call void @llvm.arm.neon.vst1.p0i8.v2i32(i8* [[TMP0]], <2 x i32> [[TMP2]], i32 4) 18082// CHECK: ret void 18083void test_vst1_u32(uint32_t * a, uint32x2_t b) { 18084 vst1_u32(a, b); 18085} 18086 18087// CHECK-LABEL: define void @test_vst1_u64(i64* %a, <1 x i64> %b) #0 { 18088// CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 18089// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 18090// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 18091// CHECK: call void @llvm.arm.neon.vst1.p0i8.v1i64(i8* [[TMP0]], <1 x i64> [[TMP2]], i32 4) 18092// CHECK: ret void 18093void test_vst1_u64(uint64_t * a, uint64x1_t b) { 18094 vst1_u64(a, b); 18095} 18096 18097// CHECK-LABEL: define void @test_vst1_s8(i8* %a, <8 x i8> %b) #0 { 18098// CHECK: call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* %a, <8 x i8> %b, i32 1) 18099// CHECK: ret void 18100void test_vst1_s8(int8_t * a, int8x8_t b) { 18101 vst1_s8(a, b); 18102} 18103 18104// CHECK-LABEL: define void @test_vst1_s16(i16* %a, <4 x i16> %b) #0 { 18105// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 18106// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 18107// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 18108// CHECK: call void @llvm.arm.neon.vst1.p0i8.v4i16(i8* [[TMP0]], <4 x i16> [[TMP2]], i32 2) 18109// CHECK: ret void 18110void test_vst1_s16(int16_t * a, int16x4_t b) { 18111 vst1_s16(a, b); 18112} 18113 18114// CHECK-LABEL: define void @test_vst1_s32(i32* %a, <2 x i32> %b) #0 { 18115// CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 18116// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 18117// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 18118// CHECK: call void @llvm.arm.neon.vst1.p0i8.v2i32(i8* [[TMP0]], <2 x i32> [[TMP2]], i32 4) 18119// CHECK: ret void 18120void test_vst1_s32(int32_t * a, int32x2_t b) { 18121 vst1_s32(a, b); 18122} 18123 18124// CHECK-LABEL: define void @test_vst1_s64(i64* %a, <1 x i64> %b) #0 { 18125// CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 18126// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 18127// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 18128// CHECK: call void @llvm.arm.neon.vst1.p0i8.v1i64(i8* [[TMP0]], <1 x i64> [[TMP2]], i32 4) 18129// CHECK: ret void 18130void test_vst1_s64(int64_t * a, int64x1_t b) { 18131 vst1_s64(a, b); 18132} 18133 18134// CHECK-LABEL: define void @test_vst1_f16(half* %a, <4 x half> %b) #0 { 18135// CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* 18136// CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8> 18137// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 18138// CHECK: call void @llvm.arm.neon.vst1.p0i8.v4i16(i8* [[TMP0]], <4 x i16> [[TMP2]], i32 2) 18139// CHECK: ret void 18140void test_vst1_f16(float16_t * a, float16x4_t b) { 18141 vst1_f16(a, b); 18142} 18143 18144// CHECK-LABEL: define void @test_vst1_f32(float* %a, <2 x float> %b) #0 { 18145// CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* 18146// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 18147// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 18148// CHECK: call void @llvm.arm.neon.vst1.p0i8.v2f32(i8* [[TMP0]], <2 x float> [[TMP2]], i32 4) 18149// CHECK: ret void 18150void test_vst1_f32(float32_t * a, float32x2_t b) { 18151 vst1_f32(a, b); 18152} 18153 18154// CHECK-LABEL: define void @test_vst1_p8(i8* %a, <8 x i8> %b) #0 { 18155// CHECK: call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* %a, <8 x i8> %b, i32 1) 18156// CHECK: ret void 18157void test_vst1_p8(poly8_t * a, poly8x8_t b) { 18158 vst1_p8(a, b); 18159} 18160 18161// CHECK-LABEL: define void @test_vst1_p16(i16* %a, <4 x i16> %b) #0 { 18162// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 18163// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 18164// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 18165// CHECK: call void @llvm.arm.neon.vst1.p0i8.v4i16(i8* [[TMP0]], <4 x i16> [[TMP2]], i32 2) 18166// CHECK: ret void 18167void test_vst1_p16(poly16_t * a, poly16x4_t b) { 18168 vst1_p16(a, b); 18169} 18170 18171 18172// CHECK-LABEL: define void @test_vst1q_lane_u8(i8* %a, <16 x i8> %b) #0 { 18173// CHECK: [[TMP0:%.*]] = extractelement <16 x i8> %b, i32 15 18174// CHECK: store i8 [[TMP0]], i8* %a, align 1 18175// CHECK: ret void 18176void test_vst1q_lane_u8(uint8_t * a, uint8x16_t b) { 18177 vst1q_lane_u8(a, b, 15); 18178} 18179 18180// CHECK-LABEL: define void @test_vst1q_lane_u16(i16* %a, <8 x i16> %b) #0 { 18181// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 18182// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 18183// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 18184// CHECK: [[TMP3:%.*]] = extractelement <8 x i16> [[TMP2]], i32 7 18185// CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16* 18186// CHECK: store i16 [[TMP3]], i16* [[TMP4]], align 2 18187// CHECK: ret void 18188void test_vst1q_lane_u16(uint16_t * a, uint16x8_t b) { 18189 vst1q_lane_u16(a, b, 7); 18190} 18191 18192// CHECK-LABEL: define void @test_vst1q_lane_u32(i32* %a, <4 x i32> %b) #0 { 18193// CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 18194// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 18195// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 18196// CHECK: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3 18197// CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i32* 18198// CHECK: store i32 [[TMP3]], i32* [[TMP4]], align 4 18199// CHECK: ret void 18200void test_vst1q_lane_u32(uint32_t * a, uint32x4_t b) { 18201 vst1q_lane_u32(a, b, 3); 18202} 18203 18204// CHECK-LABEL: define void @test_vst1q_lane_u64(i64* %a, <2 x i64> %b) #0 { 18205// CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 18206// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 18207// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 18208// CHECK: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP2]], <1 x i32> <i32 1> 18209// CHECK: call void @llvm.arm.neon.vst1.p0i8.v1i64(i8* [[TMP0]], <1 x i64> [[TMP3]], i32 4) 18210// CHECK: ret void 18211void test_vst1q_lane_u64(uint64_t * a, uint64x2_t b) { 18212 vst1q_lane_u64(a, b, 1); 18213} 18214 18215// CHECK-LABEL: define void @test_vst1q_lane_s8(i8* %a, <16 x i8> %b) #0 { 18216// CHECK: [[TMP0:%.*]] = extractelement <16 x i8> %b, i32 15 18217// CHECK: store i8 [[TMP0]], i8* %a, align 1 18218// CHECK: ret void 18219void test_vst1q_lane_s8(int8_t * a, int8x16_t b) { 18220 vst1q_lane_s8(a, b, 15); 18221} 18222 18223// CHECK-LABEL: define void @test_vst1q_lane_s16(i16* %a, <8 x i16> %b) #0 { 18224// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 18225// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 18226// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 18227// CHECK: [[TMP3:%.*]] = extractelement <8 x i16> [[TMP2]], i32 7 18228// CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16* 18229// CHECK: store i16 [[TMP3]], i16* [[TMP4]], align 2 18230// CHECK: ret void 18231void test_vst1q_lane_s16(int16_t * a, int16x8_t b) { 18232 vst1q_lane_s16(a, b, 7); 18233} 18234 18235// CHECK-LABEL: define void @test_vst1q_lane_s32(i32* %a, <4 x i32> %b) #0 { 18236// CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 18237// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 18238// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 18239// CHECK: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3 18240// CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i32* 18241// CHECK: store i32 [[TMP3]], i32* [[TMP4]], align 4 18242// CHECK: ret void 18243void test_vst1q_lane_s32(int32_t * a, int32x4_t b) { 18244 vst1q_lane_s32(a, b, 3); 18245} 18246 18247// CHECK-LABEL: define void @test_vst1q_lane_s64(i64* %a, <2 x i64> %b) #0 { 18248// CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 18249// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 18250// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 18251// CHECK: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP2]], <1 x i32> <i32 1> 18252// CHECK: call void @llvm.arm.neon.vst1.p0i8.v1i64(i8* [[TMP0]], <1 x i64> [[TMP3]], i32 4) 18253// CHECK: ret void 18254void test_vst1q_lane_s64(int64_t * a, int64x2_t b) { 18255 vst1q_lane_s64(a, b, 1); 18256} 18257 18258// CHECK-LABEL: define void @test_vst1q_lane_f16(half* %a, <8 x half> %b) #0 { 18259// CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* 18260// CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8> 18261// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 18262// CHECK: [[TMP3:%.*]] = extractelement <8 x i16> [[TMP2]], i32 7 18263// CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16* 18264// CHECK: store i16 [[TMP3]], i16* [[TMP4]], align 2 18265// CHECK: ret void 18266void test_vst1q_lane_f16(float16_t * a, float16x8_t b) { 18267 vst1q_lane_f16(a, b, 7); 18268} 18269 18270// CHECK-LABEL: define void @test_vst1q_lane_f32(float* %a, <4 x float> %b) #0 { 18271// CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* 18272// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 18273// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 18274// CHECK: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 3 18275// CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to float* 18276// CHECK: store float [[TMP3]], float* [[TMP4]], align 4 18277// CHECK: ret void 18278void test_vst1q_lane_f32(float32_t * a, float32x4_t b) { 18279 vst1q_lane_f32(a, b, 3); 18280} 18281 18282// CHECK-LABEL: define void @test_vst1q_lane_p8(i8* %a, <16 x i8> %b) #0 { 18283// CHECK: [[TMP0:%.*]] = extractelement <16 x i8> %b, i32 15 18284// CHECK: store i8 [[TMP0]], i8* %a, align 1 18285// CHECK: ret void 18286void test_vst1q_lane_p8(poly8_t * a, poly8x16_t b) { 18287 vst1q_lane_p8(a, b, 15); 18288} 18289 18290// CHECK-LABEL: define void @test_vst1q_lane_p16(i16* %a, <8 x i16> %b) #0 { 18291// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 18292// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 18293// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 18294// CHECK: [[TMP3:%.*]] = extractelement <8 x i16> [[TMP2]], i32 7 18295// CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16* 18296// CHECK: store i16 [[TMP3]], i16* [[TMP4]], align 2 18297// CHECK: ret void 18298void test_vst1q_lane_p16(poly16_t * a, poly16x8_t b) { 18299 vst1q_lane_p16(a, b, 7); 18300} 18301 18302// CHECK-LABEL: define void @test_vst1_lane_u8(i8* %a, <8 x i8> %b) #0 { 18303// CHECK: [[TMP0:%.*]] = extractelement <8 x i8> %b, i32 7 18304// CHECK: store i8 [[TMP0]], i8* %a, align 1 18305// CHECK: ret void 18306void test_vst1_lane_u8(uint8_t * a, uint8x8_t b) { 18307 vst1_lane_u8(a, b, 7); 18308} 18309 18310// CHECK-LABEL: define void @test_vst1_lane_u16(i16* %a, <4 x i16> %b) #0 { 18311// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 18312// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 18313// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 18314// CHECK: [[TMP3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3 18315// CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16* 18316// CHECK: store i16 [[TMP3]], i16* [[TMP4]], align 2 18317// CHECK: ret void 18318void test_vst1_lane_u16(uint16_t * a, uint16x4_t b) { 18319 vst1_lane_u16(a, b, 3); 18320} 18321 18322// CHECK-LABEL: define void @test_vst1_lane_u32(i32* %a, <2 x i32> %b) #0 { 18323// CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 18324// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 18325// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 18326// CHECK: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1 18327// CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i32* 18328// CHECK: store i32 [[TMP3]], i32* [[TMP4]], align 4 18329// CHECK: ret void 18330void test_vst1_lane_u32(uint32_t * a, uint32x2_t b) { 18331 vst1_lane_u32(a, b, 1); 18332} 18333 18334// CHECK-LABEL: define void @test_vst1_lane_u64(i64* %a, <1 x i64> %b) #0 { 18335// CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 18336// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 18337// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 18338// CHECK: [[TMP3:%.*]] = extractelement <1 x i64> [[TMP2]], i32 0 18339// CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i64* 18340// CHECK: store i64 [[TMP3]], i64* [[TMP4]], align 4 18341// CHECK: ret void 18342void test_vst1_lane_u64(uint64_t * a, uint64x1_t b) { 18343 vst1_lane_u64(a, b, 0); 18344} 18345 18346// CHECK-LABEL: define void @test_vst1_lane_s8(i8* %a, <8 x i8> %b) #0 { 18347// CHECK: [[TMP0:%.*]] = extractelement <8 x i8> %b, i32 7 18348// CHECK: store i8 [[TMP0]], i8* %a, align 1 18349// CHECK: ret void 18350void test_vst1_lane_s8(int8_t * a, int8x8_t b) { 18351 vst1_lane_s8(a, b, 7); 18352} 18353 18354// CHECK-LABEL: define void @test_vst1_lane_s16(i16* %a, <4 x i16> %b) #0 { 18355// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 18356// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 18357// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 18358// CHECK: [[TMP3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3 18359// CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16* 18360// CHECK: store i16 [[TMP3]], i16* [[TMP4]], align 2 18361// CHECK: ret void 18362void test_vst1_lane_s16(int16_t * a, int16x4_t b) { 18363 vst1_lane_s16(a, b, 3); 18364} 18365 18366// CHECK-LABEL: define void @test_vst1_lane_s32(i32* %a, <2 x i32> %b) #0 { 18367// CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 18368// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 18369// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 18370// CHECK: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1 18371// CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i32* 18372// CHECK: store i32 [[TMP3]], i32* [[TMP4]], align 4 18373// CHECK: ret void 18374void test_vst1_lane_s32(int32_t * a, int32x2_t b) { 18375 vst1_lane_s32(a, b, 1); 18376} 18377 18378// CHECK-LABEL: define void @test_vst1_lane_s64(i64* %a, <1 x i64> %b) #0 { 18379// CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 18380// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 18381// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 18382// CHECK: [[TMP3:%.*]] = extractelement <1 x i64> [[TMP2]], i32 0 18383// CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i64* 18384// CHECK: store i64 [[TMP3]], i64* [[TMP4]], align 4 18385// CHECK: ret void 18386void test_vst1_lane_s64(int64_t * a, int64x1_t b) { 18387 vst1_lane_s64(a, b, 0); 18388} 18389 18390// CHECK-LABEL: define void @test_vst1_lane_f16(half* %a, <4 x half> %b) #0 { 18391// CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* 18392// CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8> 18393// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 18394// CHECK: [[TMP3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3 18395// CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16* 18396// CHECK: store i16 [[TMP3]], i16* [[TMP4]], align 2 18397// CHECK: ret void 18398void test_vst1_lane_f16(float16_t * a, float16x4_t b) { 18399 vst1_lane_f16(a, b, 3); 18400} 18401 18402// CHECK-LABEL: define void @test_vst1_lane_f32(float* %a, <2 x float> %b) #0 { 18403// CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* 18404// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 18405// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 18406// CHECK: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 18407// CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to float* 18408// CHECK: store float [[TMP3]], float* [[TMP4]], align 4 18409// CHECK: ret void 18410void test_vst1_lane_f32(float32_t * a, float32x2_t b) { 18411 vst1_lane_f32(a, b, 1); 18412} 18413 18414// CHECK-LABEL: define void @test_vst1_lane_p8(i8* %a, <8 x i8> %b) #0 { 18415// CHECK: [[TMP0:%.*]] = extractelement <8 x i8> %b, i32 7 18416// CHECK: store i8 [[TMP0]], i8* %a, align 1 18417// CHECK: ret void 18418void test_vst1_lane_p8(poly8_t * a, poly8x8_t b) { 18419 vst1_lane_p8(a, b, 7); 18420} 18421 18422// CHECK-LABEL: define void @test_vst1_lane_p16(i16* %a, <4 x i16> %b) #0 { 18423// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 18424// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 18425// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 18426// CHECK: [[TMP3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3 18427// CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16* 18428// CHECK: store i16 [[TMP3]], i16* [[TMP4]], align 2 18429// CHECK: ret void 18430void test_vst1_lane_p16(poly16_t * a, poly16x4_t b) { 18431 vst1_lane_p16(a, b, 3); 18432} 18433 18434 18435// CHECK-LABEL: define void @test_vst2q_u8(i8* %a, [4 x i64] %b.coerce) #0 { 18436// CHECK: [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16 18437// CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 16 18438// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[B]], i32 0, i32 0 18439// CHECK: [[TMP0:%.*]] = bitcast [2 x <16 x i8>]* [[COERCE_DIVE]] to [4 x i64]* 18440// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 18441// CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x2_t* [[__S1]] to i8* 18442// CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x2_t* [[B]] to i8* 18443// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) 18444// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0 18445// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i32 0, i32 0 18446// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 18447// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0 18448// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i32 0, i32 1 18449// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 18450// CHECK: call void @llvm.arm.neon.vst2.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i32 1) 18451// CHECK: ret void 18452void test_vst2q_u8(uint8_t * a, uint8x16x2_t b) { 18453 vst2q_u8(a, b); 18454} 18455 18456// CHECK-LABEL: define void @test_vst2q_u16(i16* %a, [4 x i64] %b.coerce) #0 { 18457// CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16 18458// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16 18459// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0 18460// CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]* 18461// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 18462// CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8* 18463// CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8* 18464// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) 18465// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 18466// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0 18467// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0 18468// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 18469// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 18470// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0 18471// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1 18472// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 18473// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 18474// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 18475// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 18476// CHECK: call void @llvm.arm.neon.vst2.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 2) 18477// CHECK: ret void 18478void test_vst2q_u16(uint16_t * a, uint16x8x2_t b) { 18479 vst2q_u16(a, b); 18480} 18481 18482// CHECK-LABEL: define void @test_vst2q_u32(i32* %a, [4 x i64] %b.coerce) #0 { 18483// CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16 18484// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16 18485// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0 18486// CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]* 18487// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 18488// CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8* 18489// CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8* 18490// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) 18491// CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 18492// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0 18493// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0 18494// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 18495// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> 18496// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0 18497// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1 18498// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 18499// CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> 18500// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> 18501// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> 18502// CHECK: call void @llvm.arm.neon.vst2.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], i32 4) 18503// CHECK: ret void 18504void test_vst2q_u32(uint32_t * a, uint32x4x2_t b) { 18505 vst2q_u32(a, b); 18506} 18507 18508// CHECK-LABEL: define void @test_vst2q_s8(i8* %a, [4 x i64] %b.coerce) #0 { 18509// CHECK: [[B:%.*]] = alloca %struct.int8x16x2_t, align 16 18510// CHECK: [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 16 18511// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[B]], i32 0, i32 0 18512// CHECK: [[TMP0:%.*]] = bitcast [2 x <16 x i8>]* [[COERCE_DIVE]] to [4 x i64]* 18513// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 18514// CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x2_t* [[__S1]] to i8* 18515// CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x2_t* [[B]] to i8* 18516// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) 18517// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0 18518// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i32 0, i32 0 18519// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 18520// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0 18521// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i32 0, i32 1 18522// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 18523// CHECK: call void @llvm.arm.neon.vst2.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i32 1) 18524// CHECK: ret void 18525void test_vst2q_s8(int8_t * a, int8x16x2_t b) { 18526 vst2q_s8(a, b); 18527} 18528 18529// CHECK-LABEL: define void @test_vst2q_s16(i16* %a, [4 x i64] %b.coerce) #0 { 18530// CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16 18531// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16 18532// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0 18533// CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]* 18534// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 18535// CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8* 18536// CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8* 18537// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) 18538// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 18539// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0 18540// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0 18541// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 18542// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 18543// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0 18544// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1 18545// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 18546// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 18547// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 18548// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 18549// CHECK: call void @llvm.arm.neon.vst2.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 2) 18550// CHECK: ret void 18551void test_vst2q_s16(int16_t * a, int16x8x2_t b) { 18552 vst2q_s16(a, b); 18553} 18554 18555// CHECK-LABEL: define void @test_vst2q_s32(i32* %a, [4 x i64] %b.coerce) #0 { 18556// CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16 18557// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16 18558// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0 18559// CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]* 18560// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 18561// CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8* 18562// CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8* 18563// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) 18564// CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 18565// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0 18566// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0 18567// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 18568// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> 18569// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0 18570// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1 18571// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 18572// CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> 18573// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> 18574// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> 18575// CHECK: call void @llvm.arm.neon.vst2.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], i32 4) 18576// CHECK: ret void 18577void test_vst2q_s32(int32_t * a, int32x4x2_t b) { 18578 vst2q_s32(a, b); 18579} 18580 18581// CHECK-LABEL: define void @test_vst2q_f16(half* %a, [4 x i64] %b.coerce) #0 { 18582// CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16 18583// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16 18584// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0 18585// CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x half>]* [[COERCE_DIVE]] to [4 x i64]* 18586// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 18587// CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8* 18588// CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8* 18589// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) 18590// CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* 18591// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0 18592// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i32 0, i32 0 18593// CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 18594// CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8> 18595// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0 18596// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i32 0, i32 1 18597// CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 18598// CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8> 18599// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 18600// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 18601// CHECK: call void @llvm.arm.neon.vst2.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 2) 18602// CHECK: ret void 18603void test_vst2q_f16(float16_t * a, float16x8x2_t b) { 18604 vst2q_f16(a, b); 18605} 18606 18607// CHECK-LABEL: define void @test_vst2q_f32(float* %a, [4 x i64] %b.coerce) #0 { 18608// CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16 18609// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16 18610// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0 18611// CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x float>]* [[COERCE_DIVE]] to [4 x i64]* 18612// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 18613// CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8* 18614// CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8* 18615// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) 18616// CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* 18617// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0 18618// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i32 0, i32 0 18619// CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 18620// CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8> 18621// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0 18622// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL1]], i32 0, i32 1 18623// CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 18624// CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8> 18625// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float> 18626// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float> 18627// CHECK: call void @llvm.arm.neon.vst2.p0i8.v4f32(i8* [[TMP3]], <4 x float> [[TMP8]], <4 x float> [[TMP9]], i32 4) 18628// CHECK: ret void 18629void test_vst2q_f32(float32_t * a, float32x4x2_t b) { 18630 vst2q_f32(a, b); 18631} 18632 18633// CHECK-LABEL: define void @test_vst2q_p8(i8* %a, [4 x i64] %b.coerce) #0 { 18634// CHECK: [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16 18635// CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 16 18636// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[B]], i32 0, i32 0 18637// CHECK: [[TMP0:%.*]] = bitcast [2 x <16 x i8>]* [[COERCE_DIVE]] to [4 x i64]* 18638// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 18639// CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x2_t* [[__S1]] to i8* 18640// CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x2_t* [[B]] to i8* 18641// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) 18642// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0 18643// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i32 0, i32 0 18644// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 18645// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0 18646// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i32 0, i32 1 18647// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 18648// CHECK: call void @llvm.arm.neon.vst2.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i32 1) 18649// CHECK: ret void 18650void test_vst2q_p8(poly8_t * a, poly8x16x2_t b) { 18651 vst2q_p8(a, b); 18652} 18653 18654// CHECK-LABEL: define void @test_vst2q_p16(i16* %a, [4 x i64] %b.coerce) #0 { 18655// CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16 18656// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16 18657// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0 18658// CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]* 18659// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 18660// CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8* 18661// CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8* 18662// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) 18663// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 18664// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0 18665// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0 18666// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 18667// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 18668// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0 18669// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1 18670// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 18671// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 18672// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 18673// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 18674// CHECK: call void @llvm.arm.neon.vst2.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 2) 18675// CHECK: ret void 18676void test_vst2q_p16(poly16_t * a, poly16x8x2_t b) { 18677 vst2q_p16(a, b); 18678} 18679 18680// CHECK-LABEL: define void @test_vst2_u8(i8* %a, [2 x i64] %b.coerce) #0 { 18681// CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8 18682// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8 18683// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0 18684// CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]* 18685// CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 18686// CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8* 18687// CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8* 18688// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 18689// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0 18690// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0 18691// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 18692// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0 18693// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1 18694// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 18695// CHECK: call void @llvm.arm.neon.vst2.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 1) 18696// CHECK: ret void 18697void test_vst2_u8(uint8_t * a, uint8x8x2_t b) { 18698 vst2_u8(a, b); 18699} 18700 18701// CHECK-LABEL: define void @test_vst2_u16(i16* %a, [2 x i64] %b.coerce) #0 { 18702// CHECK: [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8 18703// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8 18704// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[B]], i32 0, i32 0 18705// CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]* 18706// CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 18707// CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8* 18708// CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8* 18709// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 18710// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 18711// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0 18712// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0 18713// CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 18714// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 18715// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0 18716// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1 18717// CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 18718// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 18719// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 18720// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 18721// CHECK: call void @llvm.arm.neon.vst2.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 2) 18722// CHECK: ret void 18723void test_vst2_u16(uint16_t * a, uint16x4x2_t b) { 18724 vst2_u16(a, b); 18725} 18726 18727// CHECK-LABEL: define void @test_vst2_u32(i32* %a, [2 x i64] %b.coerce) #0 { 18728// CHECK: [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8 18729// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8 18730// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[B]], i32 0, i32 0 18731// CHECK: [[TMP0:%.*]] = bitcast [2 x <2 x i32>]* [[COERCE_DIVE]] to [2 x i64]* 18732// CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 18733// CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8* 18734// CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8* 18735// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 18736// CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 18737// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0 18738// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i32 0, i32 0 18739// CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 18740// CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8> 18741// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0 18742// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i32 0, i32 1 18743// CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 18744// CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8> 18745// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32> 18746// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32> 18747// CHECK: call void @llvm.arm.neon.vst2.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], i32 4) 18748// CHECK: ret void 18749void test_vst2_u32(uint32_t * a, uint32x2x2_t b) { 18750 vst2_u32(a, b); 18751} 18752 18753// CHECK-LABEL: define void @test_vst2_u64(i64* %a, [2 x i64] %b.coerce) #0 { 18754// CHECK: [[B:%.*]] = alloca %struct.uint64x1x2_t, align 8 18755// CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x2_t, align 8 18756// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[B]], i32 0, i32 0 18757// CHECK: [[TMP0:%.*]] = bitcast [2 x <1 x i64>]* [[COERCE_DIVE]] to [2 x i64]* 18758// CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 18759// CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x2_t* [[__S1]] to i8* 18760// CHECK: [[TMP2:%.*]] = bitcast %struct.uint64x1x2_t* [[B]] to i8* 18761// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 18762// CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* 18763// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0 18764// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i32 0, i32 0 18765// CHECK: [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 18766// CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8> 18767// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0 18768// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i32 0, i32 1 18769// CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 18770// CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8> 18771// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> 18772// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64> 18773// CHECK: call void @llvm.arm.neon.vst2.p0i8.v1i64(i8* [[TMP3]], <1 x i64> [[TMP8]], <1 x i64> [[TMP9]], i32 4) 18774// CHECK: ret void 18775void test_vst2_u64(uint64_t * a, uint64x1x2_t b) { 18776 vst2_u64(a, b); 18777} 18778 18779// CHECK-LABEL: define void @test_vst2_s8(i8* %a, [2 x i64] %b.coerce) #0 { 18780// CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8 18781// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8 18782// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0 18783// CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]* 18784// CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 18785// CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8* 18786// CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8* 18787// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 18788// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0 18789// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0 18790// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 18791// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0 18792// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1 18793// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 18794// CHECK: call void @llvm.arm.neon.vst2.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 1) 18795// CHECK: ret void 18796void test_vst2_s8(int8_t * a, int8x8x2_t b) { 18797 vst2_s8(a, b); 18798} 18799 18800// CHECK-LABEL: define void @test_vst2_s16(i16* %a, [2 x i64] %b.coerce) #0 { 18801// CHECK: [[B:%.*]] = alloca %struct.int16x4x2_t, align 8 18802// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8 18803// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[B]], i32 0, i32 0 18804// CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]* 18805// CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 18806// CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8* 18807// CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8* 18808// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 18809// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 18810// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0 18811// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0 18812// CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 18813// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 18814// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0 18815// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1 18816// CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 18817// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 18818// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 18819// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 18820// CHECK: call void @llvm.arm.neon.vst2.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 2) 18821// CHECK: ret void 18822void test_vst2_s16(int16_t * a, int16x4x2_t b) { 18823 vst2_s16(a, b); 18824} 18825 18826// CHECK-LABEL: define void @test_vst2_s32(i32* %a, [2 x i64] %b.coerce) #0 { 18827// CHECK: [[B:%.*]] = alloca %struct.int32x2x2_t, align 8 18828// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8 18829// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[B]], i32 0, i32 0 18830// CHECK: [[TMP0:%.*]] = bitcast [2 x <2 x i32>]* [[COERCE_DIVE]] to [2 x i64]* 18831// CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 18832// CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8* 18833// CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8* 18834// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 18835// CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 18836// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0 18837// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i32 0, i32 0 18838// CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 18839// CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8> 18840// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0 18841// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i32 0, i32 1 18842// CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 18843// CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8> 18844// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32> 18845// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32> 18846// CHECK: call void @llvm.arm.neon.vst2.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], i32 4) 18847// CHECK: ret void 18848void test_vst2_s32(int32_t * a, int32x2x2_t b) { 18849 vst2_s32(a, b); 18850} 18851 18852// CHECK-LABEL: define void @test_vst2_s64(i64* %a, [2 x i64] %b.coerce) #0 { 18853// CHECK: [[B:%.*]] = alloca %struct.int64x1x2_t, align 8 18854// CHECK: [[__S1:%.*]] = alloca %struct.int64x1x2_t, align 8 18855// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[B]], i32 0, i32 0 18856// CHECK: [[TMP0:%.*]] = bitcast [2 x <1 x i64>]* [[COERCE_DIVE]] to [2 x i64]* 18857// CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 18858// CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x2_t* [[__S1]] to i8* 18859// CHECK: [[TMP2:%.*]] = bitcast %struct.int64x1x2_t* [[B]] to i8* 18860// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 18861// CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* 18862// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0 18863// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i32 0, i32 0 18864// CHECK: [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 18865// CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8> 18866// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0 18867// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i32 0, i32 1 18868// CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 18869// CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8> 18870// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> 18871// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64> 18872// CHECK: call void @llvm.arm.neon.vst2.p0i8.v1i64(i8* [[TMP3]], <1 x i64> [[TMP8]], <1 x i64> [[TMP9]], i32 4) 18873// CHECK: ret void 18874void test_vst2_s64(int64_t * a, int64x1x2_t b) { 18875 vst2_s64(a, b); 18876} 18877 18878// CHECK-LABEL: define void @test_vst2_f16(half* %a, [2 x i64] %b.coerce) #0 { 18879// CHECK: [[B:%.*]] = alloca %struct.float16x4x2_t, align 8 18880// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8 18881// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[B]], i32 0, i32 0 18882// CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x half>]* [[COERCE_DIVE]] to [2 x i64]* 18883// CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 18884// CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8* 18885// CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8* 18886// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 18887// CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* 18888// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0 18889// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], i32 0, i32 0 18890// CHECK: [[TMP4:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8 18891// CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8> 18892// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0 18893// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i32 0, i32 1 18894// CHECK: [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8 18895// CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8> 18896// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 18897// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 18898// CHECK: call void @llvm.arm.neon.vst2.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 2) 18899// CHECK: ret void 18900void test_vst2_f16(float16_t * a, float16x4x2_t b) { 18901 vst2_f16(a, b); 18902} 18903 18904// CHECK-LABEL: define void @test_vst2_f32(float* %a, [2 x i64] %b.coerce) #0 { 18905// CHECK: [[B:%.*]] = alloca %struct.float32x2x2_t, align 8 18906// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8 18907// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[B]], i32 0, i32 0 18908// CHECK: [[TMP0:%.*]] = bitcast [2 x <2 x float>]* [[COERCE_DIVE]] to [2 x i64]* 18909// CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 18910// CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8* 18911// CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8* 18912// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 18913// CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* 18914// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0 18915// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], i32 0, i32 0 18916// CHECK: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8 18917// CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8> 18918// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0 18919// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL1]], i32 0, i32 1 18920// CHECK: [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8 18921// CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8> 18922// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float> 18923// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float> 18924// CHECK: call void @llvm.arm.neon.vst2.p0i8.v2f32(i8* [[TMP3]], <2 x float> [[TMP8]], <2 x float> [[TMP9]], i32 4) 18925// CHECK: ret void 18926void test_vst2_f32(float32_t * a, float32x2x2_t b) { 18927 vst2_f32(a, b); 18928} 18929 18930// CHECK-LABEL: define void @test_vst2_p8(i8* %a, [2 x i64] %b.coerce) #0 { 18931// CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8 18932// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8 18933// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0 18934// CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]* 18935// CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 18936// CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8* 18937// CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8* 18938// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 18939// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0 18940// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0 18941// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 18942// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0 18943// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1 18944// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 18945// CHECK: call void @llvm.arm.neon.vst2.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 1) 18946// CHECK: ret void 18947void test_vst2_p8(poly8_t * a, poly8x8x2_t b) { 18948 vst2_p8(a, b); 18949} 18950 18951// CHECK-LABEL: define void @test_vst2_p16(i16* %a, [2 x i64] %b.coerce) #0 { 18952// CHECK: [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8 18953// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8 18954// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[B]], i32 0, i32 0 18955// CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]* 18956// CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 18957// CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8* 18958// CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8* 18959// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 18960// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 18961// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0 18962// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0 18963// CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 18964// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 18965// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0 18966// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1 18967// CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 18968// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 18969// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 18970// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 18971// CHECK: call void @llvm.arm.neon.vst2.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 2) 18972// CHECK: ret void 18973void test_vst2_p16(poly16_t * a, poly16x4x2_t b) { 18974 vst2_p16(a, b); 18975} 18976 18977 18978// CHECK-LABEL: define void @test_vst2q_lane_u16(i16* %a, [4 x i64] %b.coerce) #0 { 18979// CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16 18980// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16 18981// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0 18982// CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]* 18983// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 18984// CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8* 18985// CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8* 18986// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) 18987// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 18988// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0 18989// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0 18990// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 18991// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 18992// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0 18993// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1 18994// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 18995// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 18996// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 18997// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 18998// CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 7, i32 2) 18999// CHECK: ret void 19000void test_vst2q_lane_u16(uint16_t * a, uint16x8x2_t b) { 19001 vst2q_lane_u16(a, b, 7); 19002} 19003 19004// CHECK-LABEL: define void @test_vst2q_lane_u32(i32* %a, [4 x i64] %b.coerce) #0 { 19005// CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16 19006// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16 19007// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0 19008// CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]* 19009// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 19010// CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8* 19011// CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8* 19012// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) 19013// CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 19014// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0 19015// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0 19016// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 19017// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> 19018// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0 19019// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1 19020// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 19021// CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> 19022// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> 19023// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> 19024// CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], i32 3, i32 4) 19025// CHECK: ret void 19026void test_vst2q_lane_u32(uint32_t * a, uint32x4x2_t b) { 19027 vst2q_lane_u32(a, b, 3); 19028} 19029 19030// CHECK-LABEL: define void @test_vst2q_lane_s16(i16* %a, [4 x i64] %b.coerce) #0 { 19031// CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16 19032// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16 19033// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0 19034// CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]* 19035// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 19036// CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8* 19037// CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8* 19038// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) 19039// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 19040// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0 19041// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0 19042// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 19043// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 19044// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0 19045// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1 19046// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 19047// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 19048// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 19049// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 19050// CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 7, i32 2) 19051// CHECK: ret void 19052void test_vst2q_lane_s16(int16_t * a, int16x8x2_t b) { 19053 vst2q_lane_s16(a, b, 7); 19054} 19055 19056// CHECK-LABEL: define void @test_vst2q_lane_s32(i32* %a, [4 x i64] %b.coerce) #0 { 19057// CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16 19058// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16 19059// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0 19060// CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]* 19061// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 19062// CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8* 19063// CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8* 19064// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) 19065// CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 19066// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0 19067// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0 19068// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 19069// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> 19070// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0 19071// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1 19072// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 19073// CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> 19074// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> 19075// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> 19076// CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], i32 3, i32 4) 19077// CHECK: ret void 19078void test_vst2q_lane_s32(int32_t * a, int32x4x2_t b) { 19079 vst2q_lane_s32(a, b, 3); 19080} 19081 19082// CHECK-LABEL: define void @test_vst2q_lane_f16(half* %a, [4 x i64] %b.coerce) #0 { 19083// CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16 19084// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16 19085// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0 19086// CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x half>]* [[COERCE_DIVE]] to [4 x i64]* 19087// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 19088// CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8* 19089// CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8* 19090// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) 19091// CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* 19092// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0 19093// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i32 0, i32 0 19094// CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 19095// CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8> 19096// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0 19097// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i32 0, i32 1 19098// CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 19099// CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8> 19100// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 19101// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 19102// CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 7, i32 2) 19103// CHECK: ret void 19104void test_vst2q_lane_f16(float16_t * a, float16x8x2_t b) { 19105 vst2q_lane_f16(a, b, 7); 19106} 19107 19108// CHECK-LABEL: define void @test_vst2q_lane_f32(float* %a, [4 x i64] %b.coerce) #0 { 19109// CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16 19110// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16 19111// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0 19112// CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x float>]* [[COERCE_DIVE]] to [4 x i64]* 19113// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 19114// CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8* 19115// CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8* 19116// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) 19117// CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* 19118// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0 19119// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i32 0, i32 0 19120// CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 19121// CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8> 19122// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0 19123// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL1]], i32 0, i32 1 19124// CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 19125// CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8> 19126// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float> 19127// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float> 19128// CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v4f32(i8* [[TMP3]], <4 x float> [[TMP8]], <4 x float> [[TMP9]], i32 3, i32 4) 19129// CHECK: ret void 19130void test_vst2q_lane_f32(float32_t * a, float32x4x2_t b) { 19131 vst2q_lane_f32(a, b, 3); 19132} 19133 19134// CHECK-LABEL: define void @test_vst2q_lane_p16(i16* %a, [4 x i64] %b.coerce) #0 { 19135// CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16 19136// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16 19137// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0 19138// CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]* 19139// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 19140// CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8* 19141// CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8* 19142// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) 19143// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 19144// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0 19145// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0 19146// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 19147// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 19148// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0 19149// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1 19150// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 19151// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 19152// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 19153// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 19154// CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 7, i32 2) 19155// CHECK: ret void 19156void test_vst2q_lane_p16(poly16_t * a, poly16x8x2_t b) { 19157 vst2q_lane_p16(a, b, 7); 19158} 19159 19160// CHECK-LABEL: define void @test_vst2_lane_u8(i8* %a, [2 x i64] %b.coerce) #0 { 19161// CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8 19162// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8 19163// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0 19164// CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]* 19165// CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 19166// CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8* 19167// CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8* 19168// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 19169// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0 19170// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0 19171// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 19172// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0 19173// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1 19174// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 19175// CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 7, i32 1) 19176// CHECK: ret void 19177void test_vst2_lane_u8(uint8_t * a, uint8x8x2_t b) { 19178 vst2_lane_u8(a, b, 7); 19179} 19180 19181// CHECK-LABEL: define void @test_vst2_lane_u16(i16* %a, [2 x i64] %b.coerce) #0 { 19182// CHECK: [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8 19183// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8 19184// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[B]], i32 0, i32 0 19185// CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]* 19186// CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 19187// CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8* 19188// CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8* 19189// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 19190// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 19191// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0 19192// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0 19193// CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 19194// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 19195// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0 19196// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1 19197// CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 19198// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 19199// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 19200// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 19201// CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 3, i32 2) 19202// CHECK: ret void 19203void test_vst2_lane_u16(uint16_t * a, uint16x4x2_t b) { 19204 vst2_lane_u16(a, b, 3); 19205} 19206 19207// CHECK-LABEL: define void @test_vst2_lane_u32(i32* %a, [2 x i64] %b.coerce) #0 { 19208// CHECK: [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8 19209// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8 19210// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[B]], i32 0, i32 0 19211// CHECK: [[TMP0:%.*]] = bitcast [2 x <2 x i32>]* [[COERCE_DIVE]] to [2 x i64]* 19212// CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 19213// CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8* 19214// CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8* 19215// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 19216// CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 19217// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0 19218// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i32 0, i32 0 19219// CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 19220// CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8> 19221// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0 19222// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i32 0, i32 1 19223// CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 19224// CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8> 19225// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32> 19226// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32> 19227// CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], i32 1, i32 4) 19228// CHECK: ret void 19229void test_vst2_lane_u32(uint32_t * a, uint32x2x2_t b) { 19230 vst2_lane_u32(a, b, 1); 19231} 19232 19233// CHECK-LABEL: define void @test_vst2_lane_s8(i8* %a, [2 x i64] %b.coerce) #0 { 19234// CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8 19235// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8 19236// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0 19237// CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]* 19238// CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 19239// CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8* 19240// CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8* 19241// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 19242// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0 19243// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0 19244// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 19245// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0 19246// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1 19247// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 19248// CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 7, i32 1) 19249// CHECK: ret void 19250void test_vst2_lane_s8(int8_t * a, int8x8x2_t b) { 19251 vst2_lane_s8(a, b, 7); 19252} 19253 19254// CHECK-LABEL: define void @test_vst2_lane_s16(i16* %a, [2 x i64] %b.coerce) #0 { 19255// CHECK: [[B:%.*]] = alloca %struct.int16x4x2_t, align 8 19256// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8 19257// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[B]], i32 0, i32 0 19258// CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]* 19259// CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 19260// CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8* 19261// CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8* 19262// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 19263// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 19264// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0 19265// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0 19266// CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 19267// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 19268// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0 19269// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1 19270// CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 19271// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 19272// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 19273// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 19274// CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 3, i32 2) 19275// CHECK: ret void 19276void test_vst2_lane_s16(int16_t * a, int16x4x2_t b) { 19277 vst2_lane_s16(a, b, 3); 19278} 19279 19280// CHECK-LABEL: define void @test_vst2_lane_s32(i32* %a, [2 x i64] %b.coerce) #0 { 19281// CHECK: [[B:%.*]] = alloca %struct.int32x2x2_t, align 8 19282// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8 19283// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[B]], i32 0, i32 0 19284// CHECK: [[TMP0:%.*]] = bitcast [2 x <2 x i32>]* [[COERCE_DIVE]] to [2 x i64]* 19285// CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 19286// CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8* 19287// CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8* 19288// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 19289// CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 19290// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0 19291// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i32 0, i32 0 19292// CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 19293// CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8> 19294// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0 19295// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i32 0, i32 1 19296// CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 19297// CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8> 19298// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32> 19299// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32> 19300// CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], i32 1, i32 4) 19301// CHECK: ret void 19302void test_vst2_lane_s32(int32_t * a, int32x2x2_t b) { 19303 vst2_lane_s32(a, b, 1); 19304} 19305 19306// CHECK-LABEL: define void @test_vst2_lane_f16(half* %a, [2 x i64] %b.coerce) #0 { 19307// CHECK: [[B:%.*]] = alloca %struct.float16x4x2_t, align 8 19308// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8 19309// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[B]], i32 0, i32 0 19310// CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x half>]* [[COERCE_DIVE]] to [2 x i64]* 19311// CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 19312// CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8* 19313// CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8* 19314// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 19315// CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* 19316// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0 19317// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], i32 0, i32 0 19318// CHECK: [[TMP4:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8 19319// CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8> 19320// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0 19321// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i32 0, i32 1 19322// CHECK: [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8 19323// CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8> 19324// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 19325// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 19326// CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 3, i32 2) 19327// CHECK: ret void 19328void test_vst2_lane_f16(float16_t * a, float16x4x2_t b) { 19329 vst2_lane_f16(a, b, 3); 19330} 19331 19332// CHECK-LABEL: define void @test_vst2_lane_f32(float* %a, [2 x i64] %b.coerce) #0 { 19333// CHECK: [[B:%.*]] = alloca %struct.float32x2x2_t, align 8 19334// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8 19335// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[B]], i32 0, i32 0 19336// CHECK: [[TMP0:%.*]] = bitcast [2 x <2 x float>]* [[COERCE_DIVE]] to [2 x i64]* 19337// CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 19338// CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8* 19339// CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8* 19340// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 19341// CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* 19342// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0 19343// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], i32 0, i32 0 19344// CHECK: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8 19345// CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8> 19346// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0 19347// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL1]], i32 0, i32 1 19348// CHECK: [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8 19349// CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8> 19350// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float> 19351// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float> 19352// CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v2f32(i8* [[TMP3]], <2 x float> [[TMP8]], <2 x float> [[TMP9]], i32 1, i32 4) 19353// CHECK: ret void 19354void test_vst2_lane_f32(float32_t * a, float32x2x2_t b) { 19355 vst2_lane_f32(a, b, 1); 19356} 19357 19358// CHECK-LABEL: define void @test_vst2_lane_p8(i8* %a, [2 x i64] %b.coerce) #0 { 19359// CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8 19360// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8 19361// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0 19362// CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]* 19363// CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 19364// CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8* 19365// CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8* 19366// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 19367// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0 19368// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0 19369// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 19370// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0 19371// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1 19372// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 19373// CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 7, i32 1) 19374// CHECK: ret void 19375void test_vst2_lane_p8(poly8_t * a, poly8x8x2_t b) { 19376 vst2_lane_p8(a, b, 7); 19377} 19378 19379// CHECK-LABEL: define void @test_vst2_lane_p16(i16* %a, [2 x i64] %b.coerce) #0 { 19380// CHECK: [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8 19381// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8 19382// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[B]], i32 0, i32 0 19383// CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]* 19384// CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 19385// CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8* 19386// CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8* 19387// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 19388// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 19389// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0 19390// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0 19391// CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 19392// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 19393// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0 19394// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1 19395// CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 19396// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 19397// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 19398// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 19399// CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 3, i32 2) 19400// CHECK: ret void 19401void test_vst2_lane_p16(poly16_t * a, poly16x4x2_t b) { 19402 vst2_lane_p16(a, b, 3); 19403} 19404 19405 19406// CHECK-LABEL: define void @test_vst3q_u8(i8* %a, [6 x i64] %b.coerce) #0 { 19407// CHECK: [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16 19408// CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 16 19409// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[B]], i32 0, i32 0 19410// CHECK: [[TMP0:%.*]] = bitcast [3 x <16 x i8>]* [[COERCE_DIVE]] to [6 x i64]* 19411// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 19412// CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x3_t* [[__S1]] to i8* 19413// CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x3_t* [[B]] to i8* 19414// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) 19415// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0 19416// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i32 0, i32 0 19417// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 19418// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0 19419// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i32 0, i32 1 19420// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 19421// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0 19422// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i32 0, i32 2 19423// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 19424// CHECK: call void @llvm.arm.neon.vst3.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i32 1) 19425// CHECK: ret void 19426void test_vst3q_u8(uint8_t * a, uint8x16x3_t b) { 19427 vst3q_u8(a, b); 19428} 19429 19430// CHECK-LABEL: define void @test_vst3q_u16(i16* %a, [6 x i64] %b.coerce) #0 { 19431// CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16 19432// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16 19433// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0 19434// CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]* 19435// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 19436// CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8* 19437// CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8* 19438// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) 19439// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 19440// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 19441// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0 19442// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 19443// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 19444// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 19445// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1 19446// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 19447// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 19448// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 19449// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2 19450// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 19451// CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> 19452// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 19453// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 19454// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> 19455// CHECK: call void @llvm.arm.neon.vst3.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 2) 19456// CHECK: ret void 19457void test_vst3q_u16(uint16_t * a, uint16x8x3_t b) { 19458 vst3q_u16(a, b); 19459} 19460 19461// CHECK-LABEL: define void @test_vst3q_u32(i32* %a, [6 x i64] %b.coerce) #0 { 19462// CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16 19463// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16 19464// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0 19465// CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]* 19466// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 19467// CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8* 19468// CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8* 19469// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) 19470// CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 19471// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 19472// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0 19473// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 19474// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> 19475// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 19476// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1 19477// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 19478// CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> 19479// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 19480// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2 19481// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 19482// CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8> 19483// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> 19484// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> 19485// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32> 19486// CHECK: call void @llvm.arm.neon.vst3.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], i32 4) 19487// CHECK: ret void 19488void test_vst3q_u32(uint32_t * a, uint32x4x3_t b) { 19489 vst3q_u32(a, b); 19490} 19491 19492// CHECK-LABEL: define void @test_vst3q_s8(i8* %a, [6 x i64] %b.coerce) #0 { 19493// CHECK: [[B:%.*]] = alloca %struct.int8x16x3_t, align 16 19494// CHECK: [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 16 19495// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[B]], i32 0, i32 0 19496// CHECK: [[TMP0:%.*]] = bitcast [3 x <16 x i8>]* [[COERCE_DIVE]] to [6 x i64]* 19497// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 19498// CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x3_t* [[__S1]] to i8* 19499// CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x3_t* [[B]] to i8* 19500// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) 19501// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0 19502// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i32 0, i32 0 19503// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 19504// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0 19505// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i32 0, i32 1 19506// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 19507// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0 19508// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i32 0, i32 2 19509// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 19510// CHECK: call void @llvm.arm.neon.vst3.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i32 1) 19511// CHECK: ret void 19512void test_vst3q_s8(int8_t * a, int8x16x3_t b) { 19513 vst3q_s8(a, b); 19514} 19515 19516// CHECK-LABEL: define void @test_vst3q_s16(i16* %a, [6 x i64] %b.coerce) #0 { 19517// CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16 19518// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16 19519// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0 19520// CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]* 19521// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 19522// CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8* 19523// CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8* 19524// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) 19525// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 19526// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 19527// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0 19528// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 19529// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 19530// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 19531// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1 19532// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 19533// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 19534// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 19535// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2 19536// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 19537// CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> 19538// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 19539// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 19540// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> 19541// CHECK: call void @llvm.arm.neon.vst3.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 2) 19542// CHECK: ret void 19543void test_vst3q_s16(int16_t * a, int16x8x3_t b) { 19544 vst3q_s16(a, b); 19545} 19546 19547// CHECK-LABEL: define void @test_vst3q_s32(i32* %a, [6 x i64] %b.coerce) #0 { 19548// CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16 19549// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16 19550// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0 19551// CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]* 19552// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 19553// CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8* 19554// CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8* 19555// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) 19556// CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 19557// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 19558// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0 19559// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 19560// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> 19561// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 19562// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1 19563// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 19564// CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> 19565// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 19566// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2 19567// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 19568// CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8> 19569// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> 19570// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> 19571// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32> 19572// CHECK: call void @llvm.arm.neon.vst3.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], i32 4) 19573// CHECK: ret void 19574void test_vst3q_s32(int32_t * a, int32x4x3_t b) { 19575 vst3q_s32(a, b); 19576} 19577 19578// CHECK-LABEL: define void @test_vst3q_f16(half* %a, [6 x i64] %b.coerce) #0 { 19579// CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16 19580// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16 19581// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0 19582// CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x half>]* [[COERCE_DIVE]] to [6 x i64]* 19583// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 19584// CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8* 19585// CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8* 19586// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) 19587// CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* 19588// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 19589// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i32 0, i32 0 19590// CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 19591// CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8> 19592// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 19593// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL1]], i32 0, i32 1 19594// CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 19595// CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8> 19596// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 19597// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i32 0, i32 2 19598// CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16 19599// CHECK: [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8> 19600// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 19601// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 19602// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> 19603// CHECK: call void @llvm.arm.neon.vst3.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 2) 19604// CHECK: ret void 19605void test_vst3q_f16(float16_t * a, float16x8x3_t b) { 19606 vst3q_f16(a, b); 19607} 19608 19609// CHECK-LABEL: define void @test_vst3q_f32(float* %a, [6 x i64] %b.coerce) #0 { 19610// CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16 19611// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16 19612// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0 19613// CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x float>]* [[COERCE_DIVE]] to [6 x i64]* 19614// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 19615// CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8* 19616// CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8* 19617// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) 19618// CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* 19619// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 19620// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i32 0, i32 0 19621// CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 19622// CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8> 19623// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 19624// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL1]], i32 0, i32 1 19625// CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 19626// CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8> 19627// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 19628// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL3]], i32 0, i32 2 19629// CHECK: [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16 19630// CHECK: [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8> 19631// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float> 19632// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float> 19633// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x float> 19634// CHECK: call void @llvm.arm.neon.vst3.p0i8.v4f32(i8* [[TMP3]], <4 x float> [[TMP10]], <4 x float> [[TMP11]], <4 x float> [[TMP12]], i32 4) 19635// CHECK: ret void 19636void test_vst3q_f32(float32_t * a, float32x4x3_t b) { 19637 vst3q_f32(a, b); 19638} 19639 19640// CHECK-LABEL: define void @test_vst3q_p8(i8* %a, [6 x i64] %b.coerce) #0 { 19641// CHECK: [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16 19642// CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 16 19643// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[B]], i32 0, i32 0 19644// CHECK: [[TMP0:%.*]] = bitcast [3 x <16 x i8>]* [[COERCE_DIVE]] to [6 x i64]* 19645// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 19646// CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x3_t* [[__S1]] to i8* 19647// CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x3_t* [[B]] to i8* 19648// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) 19649// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0 19650// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i32 0, i32 0 19651// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 19652// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0 19653// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i32 0, i32 1 19654// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 19655// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0 19656// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i32 0, i32 2 19657// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 19658// CHECK: call void @llvm.arm.neon.vst3.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i32 1) 19659// CHECK: ret void 19660void test_vst3q_p8(poly8_t * a, poly8x16x3_t b) { 19661 vst3q_p8(a, b); 19662} 19663 19664// CHECK-LABEL: define void @test_vst3q_p16(i16* %a, [6 x i64] %b.coerce) #0 { 19665// CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16 19666// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16 19667// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0 19668// CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]* 19669// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 19670// CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8* 19671// CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8* 19672// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) 19673// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 19674// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 19675// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0 19676// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 19677// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 19678// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 19679// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1 19680// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 19681// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 19682// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 19683// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2 19684// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 19685// CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> 19686// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 19687// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 19688// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> 19689// CHECK: call void @llvm.arm.neon.vst3.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 2) 19690// CHECK: ret void 19691void test_vst3q_p16(poly16_t * a, poly16x8x3_t b) { 19692 vst3q_p16(a, b); 19693} 19694 19695// CHECK-LABEL: define void @test_vst3_u8(i8* %a, [3 x i64] %b.coerce) #0 { 19696// CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8 19697// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8 19698// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0 19699// CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]* 19700// CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 19701// CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8* 19702// CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8* 19703// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 19704// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 19705// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0 19706// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 19707// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 19708// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1 19709// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 19710// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 19711// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2 19712// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 19713// CHECK: call void @llvm.arm.neon.vst3.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 1) 19714// CHECK: ret void 19715void test_vst3_u8(uint8_t * a, uint8x8x3_t b) { 19716 vst3_u8(a, b); 19717} 19718 19719// CHECK-LABEL: define void @test_vst3_u16(i16* %a, [3 x i64] %b.coerce) #0 { 19720// CHECK: [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8 19721// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8 19722// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[B]], i32 0, i32 0 19723// CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]* 19724// CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 19725// CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8* 19726// CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8* 19727// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 19728// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 19729// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 19730// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0 19731// CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 19732// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 19733// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 19734// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1 19735// CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 19736// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 19737// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 19738// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2 19739// CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 19740// CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8> 19741// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 19742// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 19743// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> 19744// CHECK: call void @llvm.arm.neon.vst3.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 2) 19745// CHECK: ret void 19746void test_vst3_u16(uint16_t * a, uint16x4x3_t b) { 19747 vst3_u16(a, b); 19748} 19749 19750// CHECK-LABEL: define void @test_vst3_u32(i32* %a, [3 x i64] %b.coerce) #0 { 19751// CHECK: [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8 19752// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8 19753// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[B]], i32 0, i32 0 19754// CHECK: [[TMP0:%.*]] = bitcast [3 x <2 x i32>]* [[COERCE_DIVE]] to [3 x i64]* 19755// CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 19756// CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8* 19757// CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8* 19758// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 19759// CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 19760// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 19761// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i32 0, i32 0 19762// CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 19763// CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8> 19764// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 19765// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i32 0, i32 1 19766// CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 19767// CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8> 19768// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 19769// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i32 0, i32 2 19770// CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 19771// CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8> 19772// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32> 19773// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32> 19774// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32> 19775// CHECK: call void @llvm.arm.neon.vst3.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], i32 4) 19776// CHECK: ret void 19777void test_vst3_u32(uint32_t * a, uint32x2x3_t b) { 19778 vst3_u32(a, b); 19779} 19780 19781// CHECK-LABEL: define void @test_vst3_u64(i64* %a, [3 x i64] %b.coerce) #0 { 19782// CHECK: [[B:%.*]] = alloca %struct.uint64x1x3_t, align 8 19783// CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x3_t, align 8 19784// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[B]], i32 0, i32 0 19785// CHECK: [[TMP0:%.*]] = bitcast [3 x <1 x i64>]* [[COERCE_DIVE]] to [3 x i64]* 19786// CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 19787// CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x3_t* [[__S1]] to i8* 19788// CHECK: [[TMP2:%.*]] = bitcast %struct.uint64x1x3_t* [[B]] to i8* 19789// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 19790// CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* 19791// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0 19792// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i32 0, i32 0 19793// CHECK: [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 19794// CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8> 19795// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0 19796// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i32 0, i32 1 19797// CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 19798// CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8> 19799// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0 19800// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i32 0, i32 2 19801// CHECK: [[TMP8:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 19802// CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> 19803// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> 19804// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64> 19805// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> 19806// CHECK: call void @llvm.arm.neon.vst3.p0i8.v1i64(i8* [[TMP3]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], <1 x i64> [[TMP12]], i32 4) 19807// CHECK: ret void 19808void test_vst3_u64(uint64_t * a, uint64x1x3_t b) { 19809 vst3_u64(a, b); 19810} 19811 19812// CHECK-LABEL: define void @test_vst3_s8(i8* %a, [3 x i64] %b.coerce) #0 { 19813// CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8 19814// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8 19815// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0 19816// CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]* 19817// CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 19818// CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8* 19819// CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8* 19820// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 19821// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 19822// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0 19823// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 19824// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 19825// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1 19826// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 19827// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 19828// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2 19829// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 19830// CHECK: call void @llvm.arm.neon.vst3.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 1) 19831// CHECK: ret void 19832void test_vst3_s8(int8_t * a, int8x8x3_t b) { 19833 vst3_s8(a, b); 19834} 19835 19836// CHECK-LABEL: define void @test_vst3_s16(i16* %a, [3 x i64] %b.coerce) #0 { 19837// CHECK: [[B:%.*]] = alloca %struct.int16x4x3_t, align 8 19838// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8 19839// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[B]], i32 0, i32 0 19840// CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]* 19841// CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 19842// CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8* 19843// CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8* 19844// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 19845// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 19846// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 19847// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0 19848// CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 19849// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 19850// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 19851// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1 19852// CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 19853// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 19854// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 19855// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2 19856// CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 19857// CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8> 19858// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 19859// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 19860// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> 19861// CHECK: call void @llvm.arm.neon.vst3.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 2) 19862// CHECK: ret void 19863void test_vst3_s16(int16_t * a, int16x4x3_t b) { 19864 vst3_s16(a, b); 19865} 19866 19867// CHECK-LABEL: define void @test_vst3_s32(i32* %a, [3 x i64] %b.coerce) #0 { 19868// CHECK: [[B:%.*]] = alloca %struct.int32x2x3_t, align 8 19869// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8 19870// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[B]], i32 0, i32 0 19871// CHECK: [[TMP0:%.*]] = bitcast [3 x <2 x i32>]* [[COERCE_DIVE]] to [3 x i64]* 19872// CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 19873// CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8* 19874// CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8* 19875// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 19876// CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 19877// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 19878// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i32 0, i32 0 19879// CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 19880// CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8> 19881// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 19882// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i32 0, i32 1 19883// CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 19884// CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8> 19885// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 19886// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i32 0, i32 2 19887// CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 19888// CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8> 19889// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32> 19890// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32> 19891// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32> 19892// CHECK: call void @llvm.arm.neon.vst3.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], i32 4) 19893// CHECK: ret void 19894void test_vst3_s32(int32_t * a, int32x2x3_t b) { 19895 vst3_s32(a, b); 19896} 19897 19898// CHECK-LABEL: define void @test_vst3_s64(i64* %a, [3 x i64] %b.coerce) #0 { 19899// CHECK: [[B:%.*]] = alloca %struct.int64x1x3_t, align 8 19900// CHECK: [[__S1:%.*]] = alloca %struct.int64x1x3_t, align 8 19901// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[B]], i32 0, i32 0 19902// CHECK: [[TMP0:%.*]] = bitcast [3 x <1 x i64>]* [[COERCE_DIVE]] to [3 x i64]* 19903// CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 19904// CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x3_t* [[__S1]] to i8* 19905// CHECK: [[TMP2:%.*]] = bitcast %struct.int64x1x3_t* [[B]] to i8* 19906// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 19907// CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* 19908// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0 19909// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i32 0, i32 0 19910// CHECK: [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 19911// CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8> 19912// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0 19913// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i32 0, i32 1 19914// CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 19915// CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8> 19916// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0 19917// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i32 0, i32 2 19918// CHECK: [[TMP8:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 19919// CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> 19920// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> 19921// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64> 19922// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> 19923// CHECK: call void @llvm.arm.neon.vst3.p0i8.v1i64(i8* [[TMP3]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], <1 x i64> [[TMP12]], i32 4) 19924// CHECK: ret void 19925void test_vst3_s64(int64_t * a, int64x1x3_t b) { 19926 vst3_s64(a, b); 19927} 19928 19929// CHECK-LABEL: define void @test_vst3_f16(half* %a, [3 x i64] %b.coerce) #0 { 19930// CHECK: [[B:%.*]] = alloca %struct.float16x4x3_t, align 8 19931// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8 19932// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[B]], i32 0, i32 0 19933// CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x half>]* [[COERCE_DIVE]] to [3 x i64]* 19934// CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 19935// CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8* 19936// CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8* 19937// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 19938// CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* 19939// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 19940// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], i32 0, i32 0 19941// CHECK: [[TMP4:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8 19942// CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8> 19943// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 19944// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL1]], i32 0, i32 1 19945// CHECK: [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8 19946// CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8> 19947// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 19948// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i32 0, i32 2 19949// CHECK: [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8 19950// CHECK: [[TMP9:%.*]] = bitcast <4 x half> [[TMP8]] to <8 x i8> 19951// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 19952// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 19953// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> 19954// CHECK: call void @llvm.arm.neon.vst3.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 2) 19955// CHECK: ret void 19956void test_vst3_f16(float16_t * a, float16x4x3_t b) { 19957 vst3_f16(a, b); 19958} 19959 19960// CHECK-LABEL: define void @test_vst3_f32(float* %a, [3 x i64] %b.coerce) #0 { 19961// CHECK: [[B:%.*]] = alloca %struct.float32x2x3_t, align 8 19962// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8 19963// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[B]], i32 0, i32 0 19964// CHECK: [[TMP0:%.*]] = bitcast [3 x <2 x float>]* [[COERCE_DIVE]] to [3 x i64]* 19965// CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 19966// CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8* 19967// CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8* 19968// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 19969// CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* 19970// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 19971// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], i32 0, i32 0 19972// CHECK: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8 19973// CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8> 19974// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 19975// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL1]], i32 0, i32 1 19976// CHECK: [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8 19977// CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8> 19978// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 19979// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL3]], i32 0, i32 2 19980// CHECK: [[TMP8:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8 19981// CHECK: [[TMP9:%.*]] = bitcast <2 x float> [[TMP8]] to <8 x i8> 19982// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float> 19983// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float> 19984// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x float> 19985// CHECK: call void @llvm.arm.neon.vst3.p0i8.v2f32(i8* [[TMP3]], <2 x float> [[TMP10]], <2 x float> [[TMP11]], <2 x float> [[TMP12]], i32 4) 19986// CHECK: ret void 19987void test_vst3_f32(float32_t * a, float32x2x3_t b) { 19988 vst3_f32(a, b); 19989} 19990 19991// CHECK-LABEL: define void @test_vst3_p8(i8* %a, [3 x i64] %b.coerce) #0 { 19992// CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8 19993// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8 19994// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0 19995// CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]* 19996// CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 19997// CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8* 19998// CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8* 19999// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 20000// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 20001// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0 20002// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 20003// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 20004// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1 20005// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 20006// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 20007// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2 20008// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 20009// CHECK: call void @llvm.arm.neon.vst3.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 1) 20010// CHECK: ret void 20011void test_vst3_p8(poly8_t * a, poly8x8x3_t b) { 20012 vst3_p8(a, b); 20013} 20014 20015// CHECK-LABEL: define void @test_vst3_p16(i16* %a, [3 x i64] %b.coerce) #0 { 20016// CHECK: [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8 20017// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8 20018// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[B]], i32 0, i32 0 20019// CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]* 20020// CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 20021// CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8* 20022// CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8* 20023// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 20024// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 20025// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 20026// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0 20027// CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 20028// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 20029// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 20030// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1 20031// CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 20032// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 20033// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 20034// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2 20035// CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 20036// CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8> 20037// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 20038// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 20039// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> 20040// CHECK: call void @llvm.arm.neon.vst3.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 2) 20041// CHECK: ret void 20042void test_vst3_p16(poly16_t * a, poly16x4x3_t b) { 20043 vst3_p16(a, b); 20044} 20045 20046 20047// CHECK-LABEL: define void @test_vst3q_lane_u16(i16* %a, [6 x i64] %b.coerce) #0 { 20048// CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16 20049// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16 20050// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0 20051// CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]* 20052// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 20053// CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8* 20054// CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8* 20055// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) 20056// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 20057// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 20058// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0 20059// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 20060// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 20061// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 20062// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1 20063// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 20064// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 20065// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 20066// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2 20067// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 20068// CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> 20069// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 20070// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 20071// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> 20072// CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 7, i32 2) 20073// CHECK: ret void 20074void test_vst3q_lane_u16(uint16_t * a, uint16x8x3_t b) { 20075 vst3q_lane_u16(a, b, 7); 20076} 20077 20078// CHECK-LABEL: define void @test_vst3q_lane_u32(i32* %a, [6 x i64] %b.coerce) #0 { 20079// CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16 20080// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16 20081// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0 20082// CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]* 20083// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 20084// CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8* 20085// CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8* 20086// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) 20087// CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 20088// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 20089// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0 20090// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 20091// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> 20092// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 20093// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1 20094// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 20095// CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> 20096// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 20097// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2 20098// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 20099// CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8> 20100// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> 20101// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> 20102// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32> 20103// CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], i32 3, i32 4) 20104// CHECK: ret void 20105void test_vst3q_lane_u32(uint32_t * a, uint32x4x3_t b) { 20106 vst3q_lane_u32(a, b, 3); 20107} 20108 20109// CHECK-LABEL: define void @test_vst3q_lane_s16(i16* %a, [6 x i64] %b.coerce) #0 { 20110// CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16 20111// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16 20112// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0 20113// CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]* 20114// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 20115// CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8* 20116// CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8* 20117// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) 20118// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 20119// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 20120// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0 20121// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 20122// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 20123// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 20124// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1 20125// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 20126// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 20127// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 20128// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2 20129// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 20130// CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> 20131// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 20132// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 20133// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> 20134// CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 7, i32 2) 20135// CHECK: ret void 20136void test_vst3q_lane_s16(int16_t * a, int16x8x3_t b) { 20137 vst3q_lane_s16(a, b, 7); 20138} 20139 20140// CHECK-LABEL: define void @test_vst3q_lane_s32(i32* %a, [6 x i64] %b.coerce) #0 { 20141// CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16 20142// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16 20143// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0 20144// CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]* 20145// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 20146// CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8* 20147// CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8* 20148// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) 20149// CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 20150// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 20151// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0 20152// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 20153// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> 20154// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 20155// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1 20156// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 20157// CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> 20158// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 20159// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2 20160// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 20161// CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8> 20162// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> 20163// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> 20164// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32> 20165// CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], i32 3, i32 4) 20166// CHECK: ret void 20167void test_vst3q_lane_s32(int32_t * a, int32x4x3_t b) { 20168 vst3q_lane_s32(a, b, 3); 20169} 20170 20171// CHECK-LABEL: define void @test_vst3q_lane_f16(half* %a, [6 x i64] %b.coerce) #0 { 20172// CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16 20173// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16 20174// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0 20175// CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x half>]* [[COERCE_DIVE]] to [6 x i64]* 20176// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 20177// CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8* 20178// CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8* 20179// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) 20180// CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* 20181// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 20182// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i32 0, i32 0 20183// CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 20184// CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8> 20185// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 20186// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL1]], i32 0, i32 1 20187// CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 20188// CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8> 20189// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 20190// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i32 0, i32 2 20191// CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16 20192// CHECK: [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8> 20193// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 20194// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 20195// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> 20196// CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 7, i32 2) 20197// CHECK: ret void 20198void test_vst3q_lane_f16(float16_t * a, float16x8x3_t b) { 20199 vst3q_lane_f16(a, b, 7); 20200} 20201 20202// CHECK-LABEL: define void @test_vst3q_lane_f32(float* %a, [6 x i64] %b.coerce) #0 { 20203// CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16 20204// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16 20205// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0 20206// CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x float>]* [[COERCE_DIVE]] to [6 x i64]* 20207// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 20208// CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8* 20209// CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8* 20210// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) 20211// CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* 20212// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 20213// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i32 0, i32 0 20214// CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 20215// CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8> 20216// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 20217// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL1]], i32 0, i32 1 20218// CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 20219// CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8> 20220// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 20221// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL3]], i32 0, i32 2 20222// CHECK: [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16 20223// CHECK: [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8> 20224// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float> 20225// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float> 20226// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x float> 20227// CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v4f32(i8* [[TMP3]], <4 x float> [[TMP10]], <4 x float> [[TMP11]], <4 x float> [[TMP12]], i32 3, i32 4) 20228// CHECK: ret void 20229void test_vst3q_lane_f32(float32_t * a, float32x4x3_t b) { 20230 vst3q_lane_f32(a, b, 3); 20231} 20232 20233// CHECK-LABEL: define void @test_vst3q_lane_p16(i16* %a, [6 x i64] %b.coerce) #0 { 20234// CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16 20235// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16 20236// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0 20237// CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]* 20238// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 20239// CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8* 20240// CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8* 20241// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) 20242// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 20243// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 20244// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0 20245// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 20246// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 20247// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 20248// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1 20249// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 20250// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 20251// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 20252// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2 20253// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 20254// CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> 20255// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 20256// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 20257// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> 20258// CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 7, i32 2) 20259// CHECK: ret void 20260void test_vst3q_lane_p16(poly16_t * a, poly16x8x3_t b) { 20261 vst3q_lane_p16(a, b, 7); 20262} 20263 20264// CHECK-LABEL: define void @test_vst3_lane_u8(i8* %a, [3 x i64] %b.coerce) #0 { 20265// CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8 20266// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8 20267// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0 20268// CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]* 20269// CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 20270// CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8* 20271// CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8* 20272// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 20273// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 20274// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0 20275// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 20276// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 20277// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1 20278// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 20279// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 20280// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2 20281// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 20282// CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 7, i32 1) 20283// CHECK: ret void 20284void test_vst3_lane_u8(uint8_t * a, uint8x8x3_t b) { 20285 vst3_lane_u8(a, b, 7); 20286} 20287 20288// CHECK-LABEL: define void @test_vst3_lane_u16(i16* %a, [3 x i64] %b.coerce) #0 { 20289// CHECK: [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8 20290// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8 20291// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[B]], i32 0, i32 0 20292// CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]* 20293// CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 20294// CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8* 20295// CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8* 20296// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 20297// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 20298// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 20299// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0 20300// CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 20301// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 20302// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 20303// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1 20304// CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 20305// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 20306// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 20307// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2 20308// CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 20309// CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8> 20310// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 20311// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 20312// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> 20313// CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 3, i32 2) 20314// CHECK: ret void 20315void test_vst3_lane_u16(uint16_t * a, uint16x4x3_t b) { 20316 vst3_lane_u16(a, b, 3); 20317} 20318 20319// CHECK-LABEL: define void @test_vst3_lane_u32(i32* %a, [3 x i64] %b.coerce) #0 { 20320// CHECK: [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8 20321// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8 20322// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[B]], i32 0, i32 0 20323// CHECK: [[TMP0:%.*]] = bitcast [3 x <2 x i32>]* [[COERCE_DIVE]] to [3 x i64]* 20324// CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 20325// CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8* 20326// CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8* 20327// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 20328// CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 20329// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 20330// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i32 0, i32 0 20331// CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 20332// CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8> 20333// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 20334// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i32 0, i32 1 20335// CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 20336// CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8> 20337// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 20338// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i32 0, i32 2 20339// CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 20340// CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8> 20341// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32> 20342// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32> 20343// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32> 20344// CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], i32 1, i32 4) 20345// CHECK: ret void 20346void test_vst3_lane_u32(uint32_t * a, uint32x2x3_t b) { 20347 vst3_lane_u32(a, b, 1); 20348} 20349 20350// CHECK-LABEL: define void @test_vst3_lane_s8(i8* %a, [3 x i64] %b.coerce) #0 { 20351// CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8 20352// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8 20353// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0 20354// CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]* 20355// CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 20356// CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8* 20357// CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8* 20358// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 20359// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 20360// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0 20361// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 20362// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 20363// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1 20364// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 20365// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 20366// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2 20367// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 20368// CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 7, i32 1) 20369// CHECK: ret void 20370void test_vst3_lane_s8(int8_t * a, int8x8x3_t b) { 20371 vst3_lane_s8(a, b, 7); 20372} 20373 20374// CHECK-LABEL: define void @test_vst3_lane_s16(i16* %a, [3 x i64] %b.coerce) #0 { 20375// CHECK: [[B:%.*]] = alloca %struct.int16x4x3_t, align 8 20376// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8 20377// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[B]], i32 0, i32 0 20378// CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]* 20379// CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 20380// CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8* 20381// CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8* 20382// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 20383// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 20384// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 20385// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0 20386// CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 20387// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 20388// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 20389// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1 20390// CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 20391// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 20392// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 20393// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2 20394// CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 20395// CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8> 20396// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 20397// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 20398// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> 20399// CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 3, i32 2) 20400// CHECK: ret void 20401void test_vst3_lane_s16(int16_t * a, int16x4x3_t b) { 20402 vst3_lane_s16(a, b, 3); 20403} 20404 20405// CHECK-LABEL: define void @test_vst3_lane_s32(i32* %a, [3 x i64] %b.coerce) #0 { 20406// CHECK: [[B:%.*]] = alloca %struct.int32x2x3_t, align 8 20407// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8 20408// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[B]], i32 0, i32 0 20409// CHECK: [[TMP0:%.*]] = bitcast [3 x <2 x i32>]* [[COERCE_DIVE]] to [3 x i64]* 20410// CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 20411// CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8* 20412// CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8* 20413// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 20414// CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 20415// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 20416// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i32 0, i32 0 20417// CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 20418// CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8> 20419// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 20420// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i32 0, i32 1 20421// CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 20422// CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8> 20423// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 20424// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i32 0, i32 2 20425// CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 20426// CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8> 20427// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32> 20428// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32> 20429// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32> 20430// CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], i32 1, i32 4) 20431// CHECK: ret void 20432void test_vst3_lane_s32(int32_t * a, int32x2x3_t b) { 20433 vst3_lane_s32(a, b, 1); 20434} 20435 20436// CHECK-LABEL: define void @test_vst3_lane_f16(half* %a, [3 x i64] %b.coerce) #0 { 20437// CHECK: [[B:%.*]] = alloca %struct.float16x4x3_t, align 8 20438// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8 20439// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[B]], i32 0, i32 0 20440// CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x half>]* [[COERCE_DIVE]] to [3 x i64]* 20441// CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 20442// CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8* 20443// CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8* 20444// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 20445// CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* 20446// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 20447// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], i32 0, i32 0 20448// CHECK: [[TMP4:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8 20449// CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8> 20450// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 20451// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL1]], i32 0, i32 1 20452// CHECK: [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8 20453// CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8> 20454// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 20455// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i32 0, i32 2 20456// CHECK: [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8 20457// CHECK: [[TMP9:%.*]] = bitcast <4 x half> [[TMP8]] to <8 x i8> 20458// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 20459// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 20460// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> 20461// CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 3, i32 2) 20462// CHECK: ret void 20463void test_vst3_lane_f16(float16_t * a, float16x4x3_t b) { 20464 vst3_lane_f16(a, b, 3); 20465} 20466 20467// CHECK-LABEL: define void @test_vst3_lane_f32(float* %a, [3 x i64] %b.coerce) #0 { 20468// CHECK: [[B:%.*]] = alloca %struct.float32x2x3_t, align 8 20469// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8 20470// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[B]], i32 0, i32 0 20471// CHECK: [[TMP0:%.*]] = bitcast [3 x <2 x float>]* [[COERCE_DIVE]] to [3 x i64]* 20472// CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 20473// CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8* 20474// CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8* 20475// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 20476// CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* 20477// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 20478// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], i32 0, i32 0 20479// CHECK: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8 20480// CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8> 20481// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 20482// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL1]], i32 0, i32 1 20483// CHECK: [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8 20484// CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8> 20485// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 20486// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL3]], i32 0, i32 2 20487// CHECK: [[TMP8:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8 20488// CHECK: [[TMP9:%.*]] = bitcast <2 x float> [[TMP8]] to <8 x i8> 20489// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float> 20490// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float> 20491// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x float> 20492// CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v2f32(i8* [[TMP3]], <2 x float> [[TMP10]], <2 x float> [[TMP11]], <2 x float> [[TMP12]], i32 1, i32 4) 20493// CHECK: ret void 20494void test_vst3_lane_f32(float32_t * a, float32x2x3_t b) { 20495 vst3_lane_f32(a, b, 1); 20496} 20497 20498// CHECK-LABEL: define void @test_vst3_lane_p8(i8* %a, [3 x i64] %b.coerce) #0 { 20499// CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8 20500// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8 20501// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0 20502// CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]* 20503// CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 20504// CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8* 20505// CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8* 20506// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 20507// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 20508// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0 20509// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 20510// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 20511// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1 20512// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 20513// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 20514// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2 20515// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 20516// CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 7, i32 1) 20517// CHECK: ret void 20518void test_vst3_lane_p8(poly8_t * a, poly8x8x3_t b) { 20519 vst3_lane_p8(a, b, 7); 20520} 20521 20522// CHECK-LABEL: define void @test_vst3_lane_p16(i16* %a, [3 x i64] %b.coerce) #0 { 20523// CHECK: [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8 20524// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8 20525// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[B]], i32 0, i32 0 20526// CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]* 20527// CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 20528// CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8* 20529// CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8* 20530// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 20531// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 20532// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 20533// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0 20534// CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 20535// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 20536// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 20537// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1 20538// CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 20539// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 20540// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 20541// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2 20542// CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 20543// CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8> 20544// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 20545// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 20546// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> 20547// CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 3, i32 2) 20548// CHECK: ret void 20549void test_vst3_lane_p16(poly16_t * a, poly16x4x3_t b) { 20550 vst3_lane_p16(a, b, 3); 20551} 20552 20553 20554// CHECK-LABEL: define void @test_vst4q_u8(i8* %a, [8 x i64] %b.coerce) #0 { 20555// CHECK: [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16 20556// CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 16 20557// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[B]], i32 0, i32 0 20558// CHECK: [[TMP0:%.*]] = bitcast [4 x <16 x i8>]* [[COERCE_DIVE]] to [8 x i64]* 20559// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 20560// CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x4_t* [[__S1]] to i8* 20561// CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x4_t* [[B]] to i8* 20562// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) 20563// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 20564// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i32 0, i32 0 20565// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 20566// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 20567// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i32 0, i32 1 20568// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 20569// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 20570// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i32 0, i32 2 20571// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 20572// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 20573// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i32 0, i32 3 20574// CHECK: [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16 20575// CHECK: call void @llvm.arm.neon.vst4.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[TMP6]], i32 1) 20576// CHECK: ret void 20577void test_vst4q_u8(uint8_t * a, uint8x16x4_t b) { 20578 vst4q_u8(a, b); 20579} 20580 20581// CHECK-LABEL: define void @test_vst4q_u16(i16* %a, [8 x i64] %b.coerce) #0 { 20582// CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16 20583// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16 20584// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0 20585// CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]* 20586// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 20587// CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8* 20588// CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8* 20589// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) 20590// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 20591// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 20592// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0 20593// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 20594// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 20595// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 20596// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1 20597// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 20598// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 20599// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 20600// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2 20601// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 20602// CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> 20603// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 20604// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3 20605// CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 20606// CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8> 20607// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 20608// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 20609// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> 20610// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16> 20611// CHECK: call void @llvm.arm.neon.vst4.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 2) 20612// CHECK: ret void 20613void test_vst4q_u16(uint16_t * a, uint16x8x4_t b) { 20614 vst4q_u16(a, b); 20615} 20616 20617// CHECK-LABEL: define void @test_vst4q_u32(i32* %a, [8 x i64] %b.coerce) #0 { 20618// CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16 20619// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16 20620// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0 20621// CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]* 20622// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 20623// CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8* 20624// CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8* 20625// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) 20626// CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 20627// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 20628// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0 20629// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 20630// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> 20631// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 20632// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1 20633// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 20634// CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> 20635// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 20636// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2 20637// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 20638// CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8> 20639// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 20640// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3 20641// CHECK: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16 20642// CHECK: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8> 20643// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> 20644// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> 20645// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32> 20646// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x i32> 20647// CHECK: call void @llvm.arm.neon.vst4.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], i32 4) 20648// CHECK: ret void 20649void test_vst4q_u32(uint32_t * a, uint32x4x4_t b) { 20650 vst4q_u32(a, b); 20651} 20652 20653// CHECK-LABEL: define void @test_vst4q_s8(i8* %a, [8 x i64] %b.coerce) #0 { 20654// CHECK: [[B:%.*]] = alloca %struct.int8x16x4_t, align 16 20655// CHECK: [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 16 20656// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[B]], i32 0, i32 0 20657// CHECK: [[TMP0:%.*]] = bitcast [4 x <16 x i8>]* [[COERCE_DIVE]] to [8 x i64]* 20658// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 20659// CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x4_t* [[__S1]] to i8* 20660// CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x4_t* [[B]] to i8* 20661// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) 20662// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 20663// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i32 0, i32 0 20664// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 20665// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 20666// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i32 0, i32 1 20667// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 20668// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 20669// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i32 0, i32 2 20670// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 20671// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 20672// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i32 0, i32 3 20673// CHECK: [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16 20674// CHECK: call void @llvm.arm.neon.vst4.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[TMP6]], i32 1) 20675// CHECK: ret void 20676void test_vst4q_s8(int8_t * a, int8x16x4_t b) { 20677 vst4q_s8(a, b); 20678} 20679 20680// CHECK-LABEL: define void @test_vst4q_s16(i16* %a, [8 x i64] %b.coerce) #0 { 20681// CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16 20682// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16 20683// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0 20684// CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]* 20685// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 20686// CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8* 20687// CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8* 20688// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) 20689// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 20690// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 20691// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0 20692// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 20693// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 20694// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 20695// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1 20696// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 20697// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 20698// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 20699// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2 20700// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 20701// CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> 20702// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 20703// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3 20704// CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 20705// CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8> 20706// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 20707// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 20708// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> 20709// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16> 20710// CHECK: call void @llvm.arm.neon.vst4.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 2) 20711// CHECK: ret void 20712void test_vst4q_s16(int16_t * a, int16x8x4_t b) { 20713 vst4q_s16(a, b); 20714} 20715 20716// CHECK-LABEL: define void @test_vst4q_s32(i32* %a, [8 x i64] %b.coerce) #0 { 20717// CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16 20718// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16 20719// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0 20720// CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]* 20721// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 20722// CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8* 20723// CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8* 20724// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) 20725// CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 20726// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 20727// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0 20728// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 20729// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> 20730// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 20731// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1 20732// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 20733// CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> 20734// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 20735// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2 20736// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 20737// CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8> 20738// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 20739// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3 20740// CHECK: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16 20741// CHECK: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8> 20742// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> 20743// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> 20744// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32> 20745// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x i32> 20746// CHECK: call void @llvm.arm.neon.vst4.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], i32 4) 20747// CHECK: ret void 20748void test_vst4q_s32(int32_t * a, int32x4x4_t b) { 20749 vst4q_s32(a, b); 20750} 20751 20752// CHECK-LABEL: define void @test_vst4q_f16(half* %a, [8 x i64] %b.coerce) #0 { 20753// CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16 20754// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16 20755// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0 20756// CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x half>]* [[COERCE_DIVE]] to [8 x i64]* 20757// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 20758// CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8* 20759// CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8* 20760// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) 20761// CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* 20762// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 20763// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i32 0, i32 0 20764// CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 20765// CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8> 20766// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 20767// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL1]], i32 0, i32 1 20768// CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 20769// CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8> 20770// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 20771// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL3]], i32 0, i32 2 20772// CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16 20773// CHECK: [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8> 20774// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 20775// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i32 0, i32 3 20776// CHECK: [[TMP10:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16 20777// CHECK: [[TMP11:%.*]] = bitcast <8 x half> [[TMP10]] to <16 x i8> 20778// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 20779// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 20780// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> 20781// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16> 20782// CHECK: call void @llvm.arm.neon.vst4.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 2) 20783// CHECK: ret void 20784void test_vst4q_f16(float16_t * a, float16x8x4_t b) { 20785 vst4q_f16(a, b); 20786} 20787 20788// CHECK-LABEL: define void @test_vst4q_f32(float* %a, [8 x i64] %b.coerce) #0 { 20789// CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16 20790// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16 20791// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0 20792// CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x float>]* [[COERCE_DIVE]] to [8 x i64]* 20793// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 20794// CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8* 20795// CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8* 20796// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) 20797// CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* 20798// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 20799// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i32 0, i32 0 20800// CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 20801// CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8> 20802// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 20803// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL1]], i32 0, i32 1 20804// CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 20805// CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8> 20806// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 20807// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL3]], i32 0, i32 2 20808// CHECK: [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16 20809// CHECK: [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8> 20810// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 20811// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL5]], i32 0, i32 3 20812// CHECK: [[TMP10:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 16 20813// CHECK: [[TMP11:%.*]] = bitcast <4 x float> [[TMP10]] to <16 x i8> 20814// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float> 20815// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float> 20816// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x float> 20817// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x float> 20818// CHECK: call void @llvm.arm.neon.vst4.p0i8.v4f32(i8* [[TMP3]], <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], <4 x float> [[TMP15]], i32 4) 20819// CHECK: ret void 20820void test_vst4q_f32(float32_t * a, float32x4x4_t b) { 20821 vst4q_f32(a, b); 20822} 20823 20824// CHECK-LABEL: define void @test_vst4q_p8(i8* %a, [8 x i64] %b.coerce) #0 { 20825// CHECK: [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16 20826// CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 16 20827// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[B]], i32 0, i32 0 20828// CHECK: [[TMP0:%.*]] = bitcast [4 x <16 x i8>]* [[COERCE_DIVE]] to [8 x i64]* 20829// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 20830// CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x4_t* [[__S1]] to i8* 20831// CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x4_t* [[B]] to i8* 20832// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) 20833// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 20834// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i32 0, i32 0 20835// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 20836// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 20837// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i32 0, i32 1 20838// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 20839// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 20840// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i32 0, i32 2 20841// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 20842// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 20843// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i32 0, i32 3 20844// CHECK: [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16 20845// CHECK: call void @llvm.arm.neon.vst4.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[TMP6]], i32 1) 20846// CHECK: ret void 20847void test_vst4q_p8(poly8_t * a, poly8x16x4_t b) { 20848 vst4q_p8(a, b); 20849} 20850 20851// CHECK-LABEL: define void @test_vst4q_p16(i16* %a, [8 x i64] %b.coerce) #0 { 20852// CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16 20853// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16 20854// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0 20855// CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]* 20856// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 20857// CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8* 20858// CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8* 20859// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) 20860// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 20861// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 20862// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0 20863// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 20864// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 20865// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 20866// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1 20867// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 20868// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 20869// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 20870// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2 20871// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 20872// CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> 20873// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 20874// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3 20875// CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 20876// CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8> 20877// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 20878// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 20879// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> 20880// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16> 20881// CHECK: call void @llvm.arm.neon.vst4.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 2) 20882// CHECK: ret void 20883void test_vst4q_p16(poly16_t * a, poly16x8x4_t b) { 20884 vst4q_p16(a, b); 20885} 20886 20887// CHECK-LABEL: define void @test_vst4_u8(i8* %a, [4 x i64] %b.coerce) #0 { 20888// CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8 20889// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8 20890// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0 20891// CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]* 20892// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 20893// CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8* 20894// CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8* 20895// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 20896// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 20897// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0 20898// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 20899// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 20900// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1 20901// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 20902// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 20903// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2 20904// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 20905// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 20906// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3 20907// CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 20908// CHECK: call void @llvm.arm.neon.vst4.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 1) 20909// CHECK: ret void 20910void test_vst4_u8(uint8_t * a, uint8x8x4_t b) { 20911 vst4_u8(a, b); 20912} 20913 20914// CHECK-LABEL: define void @test_vst4_u16(i16* %a, [4 x i64] %b.coerce) #0 { 20915// CHECK: [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8 20916// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8 20917// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[B]], i32 0, i32 0 20918// CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]* 20919// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 20920// CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8* 20921// CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8* 20922// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 20923// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 20924// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 20925// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0 20926// CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 20927// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 20928// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 20929// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1 20930// CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 20931// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 20932// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 20933// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2 20934// CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 20935// CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8> 20936// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 20937// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3 20938// CHECK: [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8 20939// CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8> 20940// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 20941// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 20942// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> 20943// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16> 20944// CHECK: call void @llvm.arm.neon.vst4.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 2) 20945// CHECK: ret void 20946void test_vst4_u16(uint16_t * a, uint16x4x4_t b) { 20947 vst4_u16(a, b); 20948} 20949 20950// CHECK-LABEL: define void @test_vst4_u32(i32* %a, [4 x i64] %b.coerce) #0 { 20951// CHECK: [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8 20952// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8 20953// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[B]], i32 0, i32 0 20954// CHECK: [[TMP0:%.*]] = bitcast [4 x <2 x i32>]* [[COERCE_DIVE]] to [4 x i64]* 20955// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 20956// CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8* 20957// CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8* 20958// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 20959// CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 20960// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 20961// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i32 0, i32 0 20962// CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 20963// CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8> 20964// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 20965// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i32 0, i32 1 20966// CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 20967// CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8> 20968// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 20969// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i32 0, i32 2 20970// CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 20971// CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8> 20972// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 20973// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i32 0, i32 3 20974// CHECK: [[TMP10:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8 20975// CHECK: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP10]] to <8 x i8> 20976// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32> 20977// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32> 20978// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32> 20979// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x i32> 20980// CHECK: call void @llvm.arm.neon.vst4.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], i32 4) 20981// CHECK: ret void 20982void test_vst4_u32(uint32_t * a, uint32x2x4_t b) { 20983 vst4_u32(a, b); 20984} 20985 20986// CHECK-LABEL: define void @test_vst4_u64(i64* %a, [4 x i64] %b.coerce) #0 { 20987// CHECK: [[B:%.*]] = alloca %struct.uint64x1x4_t, align 8 20988// CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x4_t, align 8 20989// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[B]], i32 0, i32 0 20990// CHECK: [[TMP0:%.*]] = bitcast [4 x <1 x i64>]* [[COERCE_DIVE]] to [4 x i64]* 20991// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 20992// CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x4_t* [[__S1]] to i8* 20993// CHECK: [[TMP2:%.*]] = bitcast %struct.uint64x1x4_t* [[B]] to i8* 20994// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 20995// CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* 20996// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 20997// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i32 0, i32 0 20998// CHECK: [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 20999// CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8> 21000// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 21001// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i32 0, i32 1 21002// CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 21003// CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8> 21004// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 21005// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i32 0, i32 2 21006// CHECK: [[TMP8:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 21007// CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> 21008// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 21009// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i32 0, i32 3 21010// CHECK: [[TMP10:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8 21011// CHECK: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> 21012// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> 21013// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64> 21014// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> 21015// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64> 21016// CHECK: call void @llvm.arm.neon.vst4.p0i8.v1i64(i8* [[TMP3]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], <1 x i64> [[TMP15]], i32 4) 21017// CHECK: ret void 21018void test_vst4_u64(uint64_t * a, uint64x1x4_t b) { 21019 vst4_u64(a, b); 21020} 21021 21022// CHECK-LABEL: define void @test_vst4_s8(i8* %a, [4 x i64] %b.coerce) #0 { 21023// CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8 21024// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8 21025// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0 21026// CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]* 21027// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 21028// CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8* 21029// CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8* 21030// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 21031// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 21032// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0 21033// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 21034// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 21035// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1 21036// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 21037// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 21038// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2 21039// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 21040// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 21041// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3 21042// CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 21043// CHECK: call void @llvm.arm.neon.vst4.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 1) 21044// CHECK: ret void 21045void test_vst4_s8(int8_t * a, int8x8x4_t b) { 21046 vst4_s8(a, b); 21047} 21048 21049// CHECK-LABEL: define void @test_vst4_s16(i16* %a, [4 x i64] %b.coerce) #0 { 21050// CHECK: [[B:%.*]] = alloca %struct.int16x4x4_t, align 8 21051// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8 21052// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[B]], i32 0, i32 0 21053// CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]* 21054// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 21055// CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8* 21056// CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8* 21057// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 21058// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 21059// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 21060// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0 21061// CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 21062// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 21063// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 21064// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1 21065// CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 21066// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 21067// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 21068// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2 21069// CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 21070// CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8> 21071// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 21072// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3 21073// CHECK: [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8 21074// CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8> 21075// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 21076// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 21077// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> 21078// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16> 21079// CHECK: call void @llvm.arm.neon.vst4.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 2) 21080// CHECK: ret void 21081void test_vst4_s16(int16_t * a, int16x4x4_t b) { 21082 vst4_s16(a, b); 21083} 21084 21085// CHECK-LABEL: define void @test_vst4_s32(i32* %a, [4 x i64] %b.coerce) #0 { 21086// CHECK: [[B:%.*]] = alloca %struct.int32x2x4_t, align 8 21087// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8 21088// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[B]], i32 0, i32 0 21089// CHECK: [[TMP0:%.*]] = bitcast [4 x <2 x i32>]* [[COERCE_DIVE]] to [4 x i64]* 21090// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 21091// CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8* 21092// CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8* 21093// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 21094// CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 21095// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 21096// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i32 0, i32 0 21097// CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 21098// CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8> 21099// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 21100// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i32 0, i32 1 21101// CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 21102// CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8> 21103// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 21104// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i32 0, i32 2 21105// CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 21106// CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8> 21107// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 21108// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i32 0, i32 3 21109// CHECK: [[TMP10:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8 21110// CHECK: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP10]] to <8 x i8> 21111// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32> 21112// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32> 21113// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32> 21114// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x i32> 21115// CHECK: call void @llvm.arm.neon.vst4.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], i32 4) 21116// CHECK: ret void 21117void test_vst4_s32(int32_t * a, int32x2x4_t b) { 21118 vst4_s32(a, b); 21119} 21120 21121// CHECK-LABEL: define void @test_vst4_s64(i64* %a, [4 x i64] %b.coerce) #0 { 21122// CHECK: [[B:%.*]] = alloca %struct.int64x1x4_t, align 8 21123// CHECK: [[__S1:%.*]] = alloca %struct.int64x1x4_t, align 8 21124// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[B]], i32 0, i32 0 21125// CHECK: [[TMP0:%.*]] = bitcast [4 x <1 x i64>]* [[COERCE_DIVE]] to [4 x i64]* 21126// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 21127// CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x4_t* [[__S1]] to i8* 21128// CHECK: [[TMP2:%.*]] = bitcast %struct.int64x1x4_t* [[B]] to i8* 21129// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 21130// CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* 21131// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 21132// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i32 0, i32 0 21133// CHECK: [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 21134// CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8> 21135// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 21136// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i32 0, i32 1 21137// CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 21138// CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8> 21139// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 21140// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i32 0, i32 2 21141// CHECK: [[TMP8:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 21142// CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> 21143// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 21144// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i32 0, i32 3 21145// CHECK: [[TMP10:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8 21146// CHECK: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> 21147// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> 21148// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64> 21149// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> 21150// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64> 21151// CHECK: call void @llvm.arm.neon.vst4.p0i8.v1i64(i8* [[TMP3]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], <1 x i64> [[TMP15]], i32 4) 21152// CHECK: ret void 21153void test_vst4_s64(int64_t * a, int64x1x4_t b) { 21154 vst4_s64(a, b); 21155} 21156 21157// CHECK-LABEL: define void @test_vst4_f16(half* %a, [4 x i64] %b.coerce) #0 { 21158// CHECK: [[B:%.*]] = alloca %struct.float16x4x4_t, align 8 21159// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8 21160// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[B]], i32 0, i32 0 21161// CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x half>]* [[COERCE_DIVE]] to [4 x i64]* 21162// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 21163// CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8* 21164// CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8* 21165// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 21166// CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* 21167// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 21168// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], i32 0, i32 0 21169// CHECK: [[TMP4:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8 21170// CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8> 21171// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 21172// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL1]], i32 0, i32 1 21173// CHECK: [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8 21174// CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8> 21175// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 21176// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL3]], i32 0, i32 2 21177// CHECK: [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8 21178// CHECK: [[TMP9:%.*]] = bitcast <4 x half> [[TMP8]] to <8 x i8> 21179// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 21180// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i32 0, i32 3 21181// CHECK: [[TMP10:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8 21182// CHECK: [[TMP11:%.*]] = bitcast <4 x half> [[TMP10]] to <8 x i8> 21183// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 21184// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 21185// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> 21186// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16> 21187// CHECK: call void @llvm.arm.neon.vst4.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 2) 21188// CHECK: ret void 21189void test_vst4_f16(float16_t * a, float16x4x4_t b) { 21190 vst4_f16(a, b); 21191} 21192 21193// CHECK-LABEL: define void @test_vst4_f32(float* %a, [4 x i64] %b.coerce) #0 { 21194// CHECK: [[B:%.*]] = alloca %struct.float32x2x4_t, align 8 21195// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8 21196// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[B]], i32 0, i32 0 21197// CHECK: [[TMP0:%.*]] = bitcast [4 x <2 x float>]* [[COERCE_DIVE]] to [4 x i64]* 21198// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 21199// CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8* 21200// CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8* 21201// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 21202// CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* 21203// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 21204// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], i32 0, i32 0 21205// CHECK: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8 21206// CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8> 21207// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 21208// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL1]], i32 0, i32 1 21209// CHECK: [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8 21210// CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8> 21211// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 21212// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL3]], i32 0, i32 2 21213// CHECK: [[TMP8:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8 21214// CHECK: [[TMP9:%.*]] = bitcast <2 x float> [[TMP8]] to <8 x i8> 21215// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 21216// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL5]], i32 0, i32 3 21217// CHECK: [[TMP10:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX6]], align 8 21218// CHECK: [[TMP11:%.*]] = bitcast <2 x float> [[TMP10]] to <8 x i8> 21219// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float> 21220// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float> 21221// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x float> 21222// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x float> 21223// CHECK: call void @llvm.arm.neon.vst4.p0i8.v2f32(i8* [[TMP3]], <2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], <2 x float> [[TMP15]], i32 4) 21224// CHECK: ret void 21225void test_vst4_f32(float32_t * a, float32x2x4_t b) { 21226 vst4_f32(a, b); 21227} 21228 21229// CHECK-LABEL: define void @test_vst4_p8(i8* %a, [4 x i64] %b.coerce) #0 { 21230// CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8 21231// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8 21232// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0 21233// CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]* 21234// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 21235// CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8* 21236// CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8* 21237// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 21238// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 21239// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0 21240// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 21241// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 21242// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1 21243// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 21244// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 21245// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2 21246// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 21247// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 21248// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3 21249// CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 21250// CHECK: call void @llvm.arm.neon.vst4.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 1) 21251// CHECK: ret void 21252void test_vst4_p8(poly8_t * a, poly8x8x4_t b) { 21253 vst4_p8(a, b); 21254} 21255 21256// CHECK-LABEL: define void @test_vst4_p16(i16* %a, [4 x i64] %b.coerce) #0 { 21257// CHECK: [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8 21258// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8 21259// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[B]], i32 0, i32 0 21260// CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]* 21261// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 21262// CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8* 21263// CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8* 21264// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 21265// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 21266// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 21267// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0 21268// CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 21269// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 21270// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 21271// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1 21272// CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 21273// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 21274// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 21275// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2 21276// CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 21277// CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8> 21278// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 21279// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3 21280// CHECK: [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8 21281// CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8> 21282// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 21283// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 21284// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> 21285// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16> 21286// CHECK: call void @llvm.arm.neon.vst4.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 2) 21287// CHECK: ret void 21288void test_vst4_p16(poly16_t * a, poly16x4x4_t b) { 21289 vst4_p16(a, b); 21290} 21291 21292 21293// CHECK-LABEL: define void @test_vst4q_lane_u16(i16* %a, [8 x i64] %b.coerce) #0 { 21294// CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16 21295// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16 21296// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0 21297// CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]* 21298// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 21299// CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8* 21300// CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8* 21301// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) 21302// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 21303// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 21304// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0 21305// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 21306// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 21307// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 21308// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1 21309// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 21310// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 21311// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 21312// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2 21313// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 21314// CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> 21315// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 21316// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3 21317// CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 21318// CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8> 21319// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 21320// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 21321// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> 21322// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16> 21323// CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 7, i32 2) 21324// CHECK: ret void 21325void test_vst4q_lane_u16(uint16_t * a, uint16x8x4_t b) { 21326 vst4q_lane_u16(a, b, 7); 21327} 21328 21329// CHECK-LABEL: define void @test_vst4q_lane_u32(i32* %a, [8 x i64] %b.coerce) #0 { 21330// CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16 21331// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16 21332// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0 21333// CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]* 21334// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 21335// CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8* 21336// CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8* 21337// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) 21338// CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 21339// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 21340// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0 21341// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 21342// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> 21343// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 21344// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1 21345// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 21346// CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> 21347// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 21348// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2 21349// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 21350// CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8> 21351// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 21352// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3 21353// CHECK: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16 21354// CHECK: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8> 21355// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> 21356// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> 21357// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32> 21358// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x i32> 21359// CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], i32 3, i32 4) 21360// CHECK: ret void 21361void test_vst4q_lane_u32(uint32_t * a, uint32x4x4_t b) { 21362 vst4q_lane_u32(a, b, 3); 21363} 21364 21365// CHECK-LABEL: define void @test_vst4q_lane_s16(i16* %a, [8 x i64] %b.coerce) #0 { 21366// CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16 21367// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16 21368// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0 21369// CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]* 21370// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 21371// CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8* 21372// CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8* 21373// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) 21374// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 21375// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 21376// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0 21377// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 21378// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 21379// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 21380// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1 21381// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 21382// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 21383// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 21384// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2 21385// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 21386// CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> 21387// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 21388// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3 21389// CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 21390// CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8> 21391// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 21392// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 21393// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> 21394// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16> 21395// CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 7, i32 2) 21396// CHECK: ret void 21397void test_vst4q_lane_s16(int16_t * a, int16x8x4_t b) { 21398 vst4q_lane_s16(a, b, 7); 21399} 21400 21401// CHECK-LABEL: define void @test_vst4q_lane_s32(i32* %a, [8 x i64] %b.coerce) #0 { 21402// CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16 21403// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16 21404// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0 21405// CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]* 21406// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 21407// CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8* 21408// CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8* 21409// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) 21410// CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 21411// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 21412// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0 21413// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 21414// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> 21415// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 21416// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1 21417// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 21418// CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> 21419// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 21420// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2 21421// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 21422// CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8> 21423// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 21424// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3 21425// CHECK: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16 21426// CHECK: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8> 21427// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> 21428// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> 21429// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32> 21430// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x i32> 21431// CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], i32 3, i32 4) 21432// CHECK: ret void 21433void test_vst4q_lane_s32(int32_t * a, int32x4x4_t b) { 21434 vst4q_lane_s32(a, b, 3); 21435} 21436 21437// CHECK-LABEL: define void @test_vst4q_lane_f16(half* %a, [8 x i64] %b.coerce) #0 { 21438// CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16 21439// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16 21440// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0 21441// CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x half>]* [[COERCE_DIVE]] to [8 x i64]* 21442// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 21443// CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8* 21444// CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8* 21445// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) 21446// CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* 21447// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 21448// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i32 0, i32 0 21449// CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 21450// CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8> 21451// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 21452// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL1]], i32 0, i32 1 21453// CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 21454// CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8> 21455// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 21456// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL3]], i32 0, i32 2 21457// CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16 21458// CHECK: [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8> 21459// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 21460// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i32 0, i32 3 21461// CHECK: [[TMP10:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16 21462// CHECK: [[TMP11:%.*]] = bitcast <8 x half> [[TMP10]] to <16 x i8> 21463// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 21464// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 21465// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> 21466// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16> 21467// CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 7, i32 2) 21468// CHECK: ret void 21469void test_vst4q_lane_f16(float16_t * a, float16x8x4_t b) { 21470 vst4q_lane_f16(a, b, 7); 21471} 21472 21473// CHECK-LABEL: define void @test_vst4q_lane_f32(float* %a, [8 x i64] %b.coerce) #0 { 21474// CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16 21475// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16 21476// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0 21477// CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x float>]* [[COERCE_DIVE]] to [8 x i64]* 21478// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 21479// CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8* 21480// CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8* 21481// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) 21482// CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* 21483// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 21484// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i32 0, i32 0 21485// CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 21486// CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8> 21487// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 21488// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL1]], i32 0, i32 1 21489// CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 21490// CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8> 21491// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 21492// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL3]], i32 0, i32 2 21493// CHECK: [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16 21494// CHECK: [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8> 21495// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 21496// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL5]], i32 0, i32 3 21497// CHECK: [[TMP10:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 16 21498// CHECK: [[TMP11:%.*]] = bitcast <4 x float> [[TMP10]] to <16 x i8> 21499// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float> 21500// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float> 21501// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x float> 21502// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x float> 21503// CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v4f32(i8* [[TMP3]], <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], <4 x float> [[TMP15]], i32 3, i32 4) 21504// CHECK: ret void 21505void test_vst4q_lane_f32(float32_t * a, float32x4x4_t b) { 21506 vst4q_lane_f32(a, b, 3); 21507} 21508 21509// CHECK-LABEL: define void @test_vst4q_lane_p16(i16* %a, [8 x i64] %b.coerce) #0 { 21510// CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16 21511// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16 21512// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0 21513// CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]* 21514// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 21515// CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8* 21516// CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8* 21517// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) 21518// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 21519// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 21520// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0 21521// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 21522// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 21523// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 21524// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1 21525// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 21526// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 21527// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 21528// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2 21529// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 21530// CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> 21531// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 21532// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3 21533// CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 21534// CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8> 21535// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 21536// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 21537// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> 21538// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16> 21539// CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 7, i32 2) 21540// CHECK: ret void 21541void test_vst4q_lane_p16(poly16_t * a, poly16x8x4_t b) { 21542 vst4q_lane_p16(a, b, 7); 21543} 21544 21545// CHECK-LABEL: define void @test_vst4_lane_u8(i8* %a, [4 x i64] %b.coerce) #0 { 21546// CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8 21547// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8 21548// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0 21549// CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]* 21550// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 21551// CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8* 21552// CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8* 21553// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 21554// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 21555// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0 21556// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 21557// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 21558// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1 21559// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 21560// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 21561// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2 21562// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 21563// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 21564// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3 21565// CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 21566// CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 7, i32 1) 21567// CHECK: ret void 21568void test_vst4_lane_u8(uint8_t * a, uint8x8x4_t b) { 21569 vst4_lane_u8(a, b, 7); 21570} 21571 21572// CHECK-LABEL: define void @test_vst4_lane_u16(i16* %a, [4 x i64] %b.coerce) #0 { 21573// CHECK: [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8 21574// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8 21575// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[B]], i32 0, i32 0 21576// CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]* 21577// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 21578// CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8* 21579// CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8* 21580// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 21581// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 21582// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 21583// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0 21584// CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 21585// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 21586// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 21587// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1 21588// CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 21589// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 21590// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 21591// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2 21592// CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 21593// CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8> 21594// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 21595// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3 21596// CHECK: [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8 21597// CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8> 21598// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 21599// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 21600// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> 21601// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16> 21602// CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 3, i32 2) 21603// CHECK: ret void 21604void test_vst4_lane_u16(uint16_t * a, uint16x4x4_t b) { 21605 vst4_lane_u16(a, b, 3); 21606} 21607 21608// CHECK-LABEL: define void @test_vst4_lane_u32(i32* %a, [4 x i64] %b.coerce) #0 { 21609// CHECK: [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8 21610// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8 21611// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[B]], i32 0, i32 0 21612// CHECK: [[TMP0:%.*]] = bitcast [4 x <2 x i32>]* [[COERCE_DIVE]] to [4 x i64]* 21613// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 21614// CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8* 21615// CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8* 21616// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 21617// CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 21618// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 21619// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i32 0, i32 0 21620// CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 21621// CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8> 21622// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 21623// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i32 0, i32 1 21624// CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 21625// CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8> 21626// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 21627// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i32 0, i32 2 21628// CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 21629// CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8> 21630// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 21631// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i32 0, i32 3 21632// CHECK: [[TMP10:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8 21633// CHECK: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP10]] to <8 x i8> 21634// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32> 21635// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32> 21636// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32> 21637// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x i32> 21638// CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], i32 1, i32 4) 21639// CHECK: ret void 21640void test_vst4_lane_u32(uint32_t * a, uint32x2x4_t b) { 21641 vst4_lane_u32(a, b, 1); 21642} 21643 21644// CHECK-LABEL: define void @test_vst4_lane_s8(i8* %a, [4 x i64] %b.coerce) #0 { 21645// CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8 21646// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8 21647// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0 21648// CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]* 21649// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 21650// CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8* 21651// CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8* 21652// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 21653// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 21654// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0 21655// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 21656// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 21657// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1 21658// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 21659// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 21660// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2 21661// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 21662// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 21663// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3 21664// CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 21665// CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 7, i32 1) 21666// CHECK: ret void 21667void test_vst4_lane_s8(int8_t * a, int8x8x4_t b) { 21668 vst4_lane_s8(a, b, 7); 21669} 21670 21671// CHECK-LABEL: define void @test_vst4_lane_s16(i16* %a, [4 x i64] %b.coerce) #0 { 21672// CHECK: [[B:%.*]] = alloca %struct.int16x4x4_t, align 8 21673// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8 21674// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[B]], i32 0, i32 0 21675// CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]* 21676// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 21677// CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8* 21678// CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8* 21679// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 21680// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 21681// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 21682// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0 21683// CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 21684// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 21685// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 21686// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1 21687// CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 21688// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 21689// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 21690// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2 21691// CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 21692// CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8> 21693// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 21694// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3 21695// CHECK: [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8 21696// CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8> 21697// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 21698// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 21699// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> 21700// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16> 21701// CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 3, i32 2) 21702// CHECK: ret void 21703void test_vst4_lane_s16(int16_t * a, int16x4x4_t b) { 21704 vst4_lane_s16(a, b, 3); 21705} 21706 21707// CHECK-LABEL: define void @test_vst4_lane_s32(i32* %a, [4 x i64] %b.coerce) #0 { 21708// CHECK: [[B:%.*]] = alloca %struct.int32x2x4_t, align 8 21709// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8 21710// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[B]], i32 0, i32 0 21711// CHECK: [[TMP0:%.*]] = bitcast [4 x <2 x i32>]* [[COERCE_DIVE]] to [4 x i64]* 21712// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 21713// CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8* 21714// CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8* 21715// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 21716// CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 21717// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 21718// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i32 0, i32 0 21719// CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 21720// CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8> 21721// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 21722// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i32 0, i32 1 21723// CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 21724// CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8> 21725// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 21726// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i32 0, i32 2 21727// CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 21728// CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8> 21729// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 21730// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i32 0, i32 3 21731// CHECK: [[TMP10:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8 21732// CHECK: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP10]] to <8 x i8> 21733// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32> 21734// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32> 21735// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32> 21736// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x i32> 21737// CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], i32 1, i32 4) 21738// CHECK: ret void 21739void test_vst4_lane_s32(int32_t * a, int32x2x4_t b) { 21740 vst4_lane_s32(a, b, 1); 21741} 21742 21743// CHECK-LABEL: define void @test_vst4_lane_f16(half* %a, [4 x i64] %b.coerce) #0 { 21744// CHECK: [[B:%.*]] = alloca %struct.float16x4x4_t, align 8 21745// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8 21746// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[B]], i32 0, i32 0 21747// CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x half>]* [[COERCE_DIVE]] to [4 x i64]* 21748// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 21749// CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8* 21750// CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8* 21751// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 21752// CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* 21753// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 21754// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], i32 0, i32 0 21755// CHECK: [[TMP4:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8 21756// CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8> 21757// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 21758// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL1]], i32 0, i32 1 21759// CHECK: [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8 21760// CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8> 21761// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 21762// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL3]], i32 0, i32 2 21763// CHECK: [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8 21764// CHECK: [[TMP9:%.*]] = bitcast <4 x half> [[TMP8]] to <8 x i8> 21765// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 21766// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i32 0, i32 3 21767// CHECK: [[TMP10:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8 21768// CHECK: [[TMP11:%.*]] = bitcast <4 x half> [[TMP10]] to <8 x i8> 21769// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 21770// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 21771// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> 21772// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16> 21773// CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 3, i32 2) 21774// CHECK: ret void 21775void test_vst4_lane_f16(float16_t * a, float16x4x4_t b) { 21776 vst4_lane_f16(a, b, 3); 21777} 21778 21779// CHECK-LABEL: define void @test_vst4_lane_f32(float* %a, [4 x i64] %b.coerce) #0 { 21780// CHECK: [[B:%.*]] = alloca %struct.float32x2x4_t, align 8 21781// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8 21782// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[B]], i32 0, i32 0 21783// CHECK: [[TMP0:%.*]] = bitcast [4 x <2 x float>]* [[COERCE_DIVE]] to [4 x i64]* 21784// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 21785// CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8* 21786// CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8* 21787// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 21788// CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* 21789// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 21790// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], i32 0, i32 0 21791// CHECK: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8 21792// CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8> 21793// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 21794// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL1]], i32 0, i32 1 21795// CHECK: [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8 21796// CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8> 21797// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 21798// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL3]], i32 0, i32 2 21799// CHECK: [[TMP8:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8 21800// CHECK: [[TMP9:%.*]] = bitcast <2 x float> [[TMP8]] to <8 x i8> 21801// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 21802// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL5]], i32 0, i32 3 21803// CHECK: [[TMP10:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX6]], align 8 21804// CHECK: [[TMP11:%.*]] = bitcast <2 x float> [[TMP10]] to <8 x i8> 21805// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float> 21806// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float> 21807// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x float> 21808// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x float> 21809// CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v2f32(i8* [[TMP3]], <2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], <2 x float> [[TMP15]], i32 1, i32 4) 21810// CHECK: ret void 21811void test_vst4_lane_f32(float32_t * a, float32x2x4_t b) { 21812 vst4_lane_f32(a, b, 1); 21813} 21814 21815// CHECK-LABEL: define void @test_vst4_lane_p8(i8* %a, [4 x i64] %b.coerce) #0 { 21816// CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8 21817// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8 21818// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0 21819// CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]* 21820// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 21821// CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8* 21822// CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8* 21823// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 21824// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 21825// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0 21826// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 21827// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 21828// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1 21829// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 21830// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 21831// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2 21832// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 21833// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 21834// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3 21835// CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 21836// CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 7, i32 1) 21837// CHECK: ret void 21838void test_vst4_lane_p8(poly8_t * a, poly8x8x4_t b) { 21839 vst4_lane_p8(a, b, 7); 21840} 21841 21842// CHECK-LABEL: define void @test_vst4_lane_p16(i16* %a, [4 x i64] %b.coerce) #0 { 21843// CHECK: [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8 21844// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8 21845// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[B]], i32 0, i32 0 21846// CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]* 21847// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 21848// CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8* 21849// CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8* 21850// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 21851// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 21852// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 21853// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0 21854// CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 21855// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 21856// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 21857// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1 21858// CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 21859// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 21860// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 21861// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2 21862// CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 21863// CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8> 21864// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 21865// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3 21866// CHECK: [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8 21867// CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8> 21868// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 21869// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 21870// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> 21871// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16> 21872// CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 3, i32 2) 21873// CHECK: ret void 21874void test_vst4_lane_p16(poly16_t * a, poly16x4x4_t b) { 21875 vst4_lane_p16(a, b, 3); 21876} 21877 21878 21879// CHECK-LABEL: define <8 x i8> @test_vsub_s8(<8 x i8> %a, <8 x i8> %b) #0 { 21880// CHECK: [[SUB_I:%.*]] = sub <8 x i8> %a, %b 21881// CHECK: ret <8 x i8> [[SUB_I]] 21882int8x8_t test_vsub_s8(int8x8_t a, int8x8_t b) { 21883 return vsub_s8(a, b); 21884} 21885 21886// CHECK-LABEL: define <4 x i16> @test_vsub_s16(<4 x i16> %a, <4 x i16> %b) #0 { 21887// CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, %b 21888// CHECK: ret <4 x i16> [[SUB_I]] 21889int16x4_t test_vsub_s16(int16x4_t a, int16x4_t b) { 21890 return vsub_s16(a, b); 21891} 21892 21893// CHECK-LABEL: define <2 x i32> @test_vsub_s32(<2 x i32> %a, <2 x i32> %b) #0 { 21894// CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, %b 21895// CHECK: ret <2 x i32> [[SUB_I]] 21896int32x2_t test_vsub_s32(int32x2_t a, int32x2_t b) { 21897 return vsub_s32(a, b); 21898} 21899 21900// CHECK-LABEL: define <1 x i64> @test_vsub_s64(<1 x i64> %a, <1 x i64> %b) #0 { 21901// CHECK: [[SUB_I:%.*]] = sub <1 x i64> %a, %b 21902// CHECK: ret <1 x i64> [[SUB_I]] 21903int64x1_t test_vsub_s64(int64x1_t a, int64x1_t b) { 21904 return vsub_s64(a, b); 21905} 21906 21907// CHECK-LABEL: define <2 x float> @test_vsub_f32(<2 x float> %a, <2 x float> %b) #0 { 21908// CHECK: [[SUB_I:%.*]] = fsub <2 x float> %a, %b 21909// CHECK: ret <2 x float> [[SUB_I]] 21910float32x2_t test_vsub_f32(float32x2_t a, float32x2_t b) { 21911 return vsub_f32(a, b); 21912} 21913 21914// CHECK-LABEL: define <8 x i8> @test_vsub_u8(<8 x i8> %a, <8 x i8> %b) #0 { 21915// CHECK: [[SUB_I:%.*]] = sub <8 x i8> %a, %b 21916// CHECK: ret <8 x i8> [[SUB_I]] 21917uint8x8_t test_vsub_u8(uint8x8_t a, uint8x8_t b) { 21918 return vsub_u8(a, b); 21919} 21920 21921// CHECK-LABEL: define <4 x i16> @test_vsub_u16(<4 x i16> %a, <4 x i16> %b) #0 { 21922// CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, %b 21923// CHECK: ret <4 x i16> [[SUB_I]] 21924uint16x4_t test_vsub_u16(uint16x4_t a, uint16x4_t b) { 21925 return vsub_u16(a, b); 21926} 21927 21928// CHECK-LABEL: define <2 x i32> @test_vsub_u32(<2 x i32> %a, <2 x i32> %b) #0 { 21929// CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, %b 21930// CHECK: ret <2 x i32> [[SUB_I]] 21931uint32x2_t test_vsub_u32(uint32x2_t a, uint32x2_t b) { 21932 return vsub_u32(a, b); 21933} 21934 21935// CHECK-LABEL: define <1 x i64> @test_vsub_u64(<1 x i64> %a, <1 x i64> %b) #0 { 21936// CHECK: [[SUB_I:%.*]] = sub <1 x i64> %a, %b 21937// CHECK: ret <1 x i64> [[SUB_I]] 21938uint64x1_t test_vsub_u64(uint64x1_t a, uint64x1_t b) { 21939 return vsub_u64(a, b); 21940} 21941 21942// CHECK-LABEL: define <16 x i8> @test_vsubq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 21943// CHECK: [[SUB_I:%.*]] = sub <16 x i8> %a, %b 21944// CHECK: ret <16 x i8> [[SUB_I]] 21945int8x16_t test_vsubq_s8(int8x16_t a, int8x16_t b) { 21946 return vsubq_s8(a, b); 21947} 21948 21949// CHECK-LABEL: define <8 x i16> @test_vsubq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 21950// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, %b 21951// CHECK: ret <8 x i16> [[SUB_I]] 21952int16x8_t test_vsubq_s16(int16x8_t a, int16x8_t b) { 21953 return vsubq_s16(a, b); 21954} 21955 21956// CHECK-LABEL: define <4 x i32> @test_vsubq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 21957// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, %b 21958// CHECK: ret <4 x i32> [[SUB_I]] 21959int32x4_t test_vsubq_s32(int32x4_t a, int32x4_t b) { 21960 return vsubq_s32(a, b); 21961} 21962 21963// CHECK-LABEL: define <2 x i64> @test_vsubq_s64(<2 x i64> %a, <2 x i64> %b) #0 { 21964// CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, %b 21965// CHECK: ret <2 x i64> [[SUB_I]] 21966int64x2_t test_vsubq_s64(int64x2_t a, int64x2_t b) { 21967 return vsubq_s64(a, b); 21968} 21969 21970// CHECK-LABEL: define <4 x float> @test_vsubq_f32(<4 x float> %a, <4 x float> %b) #0 { 21971// CHECK: [[SUB_I:%.*]] = fsub <4 x float> %a, %b 21972// CHECK: ret <4 x float> [[SUB_I]] 21973float32x4_t test_vsubq_f32(float32x4_t a, float32x4_t b) { 21974 return vsubq_f32(a, b); 21975} 21976 21977// CHECK-LABEL: define <16 x i8> @test_vsubq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 21978// CHECK: [[SUB_I:%.*]] = sub <16 x i8> %a, %b 21979// CHECK: ret <16 x i8> [[SUB_I]] 21980uint8x16_t test_vsubq_u8(uint8x16_t a, uint8x16_t b) { 21981 return vsubq_u8(a, b); 21982} 21983 21984// CHECK-LABEL: define <8 x i16> @test_vsubq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 21985// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, %b 21986// CHECK: ret <8 x i16> [[SUB_I]] 21987uint16x8_t test_vsubq_u16(uint16x8_t a, uint16x8_t b) { 21988 return vsubq_u16(a, b); 21989} 21990 21991// CHECK-LABEL: define <4 x i32> @test_vsubq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 21992// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, %b 21993// CHECK: ret <4 x i32> [[SUB_I]] 21994uint32x4_t test_vsubq_u32(uint32x4_t a, uint32x4_t b) { 21995 return vsubq_u32(a, b); 21996} 21997 21998// CHECK-LABEL: define <2 x i64> @test_vsubq_u64(<2 x i64> %a, <2 x i64> %b) #0 { 21999// CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, %b 22000// CHECK: ret <2 x i64> [[SUB_I]] 22001uint64x2_t test_vsubq_u64(uint64x2_t a, uint64x2_t b) { 22002 return vsubq_u64(a, b); 22003} 22004 22005 22006// CHECK-LABEL: define <8 x i8> @test_vsubhn_s16(<8 x i16> %a, <8 x i16> %b) #0 { 22007// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 22008// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 22009// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 22010// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 22011// CHECK: [[VSUBHN_I:%.*]] = sub <8 x i16> [[TMP2]], [[TMP3]] 22012// CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 22013// CHECK: [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8> 22014// CHECK: ret <8 x i8> [[VSUBHN2_I]] 22015int8x8_t test_vsubhn_s16(int16x8_t a, int16x8_t b) { 22016 return vsubhn_s16(a, b); 22017} 22018 22019// CHECK-LABEL: define <4 x i16> @test_vsubhn_s32(<4 x i32> %a, <4 x i32> %b) #0 { 22020// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 22021// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 22022// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 22023// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 22024// CHECK: [[VSUBHN_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]] 22025// CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], <i32 16, i32 16, i32 16, i32 16> 22026// CHECK: [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16> 22027// CHECK: ret <4 x i16> [[VSUBHN2_I]] 22028int16x4_t test_vsubhn_s32(int32x4_t a, int32x4_t b) { 22029 return vsubhn_s32(a, b); 22030} 22031 22032// CHECK-LABEL: define <2 x i32> @test_vsubhn_s64(<2 x i64> %a, <2 x i64> %b) #0 { 22033// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 22034// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 22035// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 22036// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 22037// CHECK: [[VSUBHN_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP3]] 22038// CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], <i64 32, i64 32> 22039// CHECK: [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32> 22040// CHECK: ret <2 x i32> [[VSUBHN2_I]] 22041int32x2_t test_vsubhn_s64(int64x2_t a, int64x2_t b) { 22042 return vsubhn_s64(a, b); 22043} 22044 22045// CHECK-LABEL: define <8 x i8> @test_vsubhn_u16(<8 x i16> %a, <8 x i16> %b) #0 { 22046// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 22047// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 22048// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 22049// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 22050// CHECK: [[VSUBHN_I:%.*]] = sub <8 x i16> [[TMP2]], [[TMP3]] 22051// CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 22052// CHECK: [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8> 22053// CHECK: ret <8 x i8> [[VSUBHN2_I]] 22054uint8x8_t test_vsubhn_u16(uint16x8_t a, uint16x8_t b) { 22055 return vsubhn_u16(a, b); 22056} 22057 22058// CHECK-LABEL: define <4 x i16> @test_vsubhn_u32(<4 x i32> %a, <4 x i32> %b) #0 { 22059// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 22060// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 22061// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 22062// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 22063// CHECK: [[VSUBHN_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]] 22064// CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], <i32 16, i32 16, i32 16, i32 16> 22065// CHECK: [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16> 22066// CHECK: ret <4 x i16> [[VSUBHN2_I]] 22067uint16x4_t test_vsubhn_u32(uint32x4_t a, uint32x4_t b) { 22068 return vsubhn_u32(a, b); 22069} 22070 22071// CHECK-LABEL: define <2 x i32> @test_vsubhn_u64(<2 x i64> %a, <2 x i64> %b) #0 { 22072// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 22073// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 22074// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 22075// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 22076// CHECK: [[VSUBHN_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP3]] 22077// CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], <i64 32, i64 32> 22078// CHECK: [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32> 22079// CHECK: ret <2 x i32> [[VSUBHN2_I]] 22080uint32x2_t test_vsubhn_u64(uint64x2_t a, uint64x2_t b) { 22081 return vsubhn_u64(a, b); 22082} 22083 22084 22085// CHECK-LABEL: define <8 x i16> @test_vsubl_s8(<8 x i8> %a, <8 x i8> %b) #0 { 22086// CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16> 22087// CHECK: [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16> 22088// CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]] 22089// CHECK: ret <8 x i16> [[SUB_I]] 22090int16x8_t test_vsubl_s8(int8x8_t a, int8x8_t b) { 22091 return vsubl_s8(a, b); 22092} 22093 22094// CHECK-LABEL: define <4 x i32> @test_vsubl_s16(<4 x i16> %a, <4 x i16> %b) #0 { 22095// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 22096// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 22097// CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> 22098// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> 22099// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 22100// CHECK: [[VMOVL_I4_I:%.*]] = sext <4 x i16> [[TMP3]] to <4 x i32> 22101// CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]] 22102// CHECK: ret <4 x i32> [[SUB_I]] 22103int32x4_t test_vsubl_s16(int16x4_t a, int16x4_t b) { 22104 return vsubl_s16(a, b); 22105} 22106 22107// CHECK-LABEL: define <2 x i64> @test_vsubl_s32(<2 x i32> %a, <2 x i32> %b) #0 { 22108// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 22109// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 22110// CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> 22111// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> 22112// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 22113// CHECK: [[VMOVL_I4_I:%.*]] = sext <2 x i32> [[TMP3]] to <2 x i64> 22114// CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]] 22115// CHECK: ret <2 x i64> [[SUB_I]] 22116int64x2_t test_vsubl_s32(int32x2_t a, int32x2_t b) { 22117 return vsubl_s32(a, b); 22118} 22119 22120// CHECK-LABEL: define <8 x i16> @test_vsubl_u8(<8 x i8> %a, <8 x i8> %b) #0 { 22121// CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16> 22122// CHECK: [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16> 22123// CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]] 22124// CHECK: ret <8 x i16> [[SUB_I]] 22125uint16x8_t test_vsubl_u8(uint8x8_t a, uint8x8_t b) { 22126 return vsubl_u8(a, b); 22127} 22128 22129// CHECK-LABEL: define <4 x i32> @test_vsubl_u16(<4 x i16> %a, <4 x i16> %b) #0 { 22130// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 22131// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 22132// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> 22133// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> 22134// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 22135// CHECK: [[VMOVL_I4_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> 22136// CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]] 22137// CHECK: ret <4 x i32> [[SUB_I]] 22138uint32x4_t test_vsubl_u16(uint16x4_t a, uint16x4_t b) { 22139 return vsubl_u16(a, b); 22140} 22141 22142// CHECK-LABEL: define <2 x i64> @test_vsubl_u32(<2 x i32> %a, <2 x i32> %b) #0 { 22143// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 22144// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 22145// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> 22146// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> 22147// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 22148// CHECK: [[VMOVL_I4_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64> 22149// CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]] 22150// CHECK: ret <2 x i64> [[SUB_I]] 22151uint64x2_t test_vsubl_u32(uint32x2_t a, uint32x2_t b) { 22152 return vsubl_u32(a, b); 22153} 22154 22155 22156// CHECK-LABEL: define <8 x i16> @test_vsubw_s8(<8 x i16> %a, <8 x i8> %b) #0 { 22157// CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16> 22158// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]] 22159// CHECK: ret <8 x i16> [[SUB_I]] 22160int16x8_t test_vsubw_s8(int16x8_t a, int8x8_t b) { 22161 return vsubw_s8(a, b); 22162} 22163 22164// CHECK-LABEL: define <4 x i32> @test_vsubw_s16(<4 x i32> %a, <4 x i16> %b) #0 { 22165// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 22166// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 22167// CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> 22168// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]] 22169// CHECK: ret <4 x i32> [[SUB_I]] 22170int32x4_t test_vsubw_s16(int32x4_t a, int16x4_t b) { 22171 return vsubw_s16(a, b); 22172} 22173 22174// CHECK-LABEL: define <2 x i64> @test_vsubw_s32(<2 x i64> %a, <2 x i32> %b) #0 { 22175// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 22176// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 22177// CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> 22178// CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]] 22179// CHECK: ret <2 x i64> [[SUB_I]] 22180int64x2_t test_vsubw_s32(int64x2_t a, int32x2_t b) { 22181 return vsubw_s32(a, b); 22182} 22183 22184// CHECK-LABEL: define <8 x i16> @test_vsubw_u8(<8 x i16> %a, <8 x i8> %b) #0 { 22185// CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16> 22186// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]] 22187// CHECK: ret <8 x i16> [[SUB_I]] 22188uint16x8_t test_vsubw_u8(uint16x8_t a, uint8x8_t b) { 22189 return vsubw_u8(a, b); 22190} 22191 22192// CHECK-LABEL: define <4 x i32> @test_vsubw_u16(<4 x i32> %a, <4 x i16> %b) #0 { 22193// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 22194// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 22195// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> 22196// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]] 22197// CHECK: ret <4 x i32> [[SUB_I]] 22198uint32x4_t test_vsubw_u16(uint32x4_t a, uint16x4_t b) { 22199 return vsubw_u16(a, b); 22200} 22201 22202// CHECK-LABEL: define <2 x i64> @test_vsubw_u32(<2 x i64> %a, <2 x i32> %b) #0 { 22203// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 22204// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 22205// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> 22206// CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]] 22207// CHECK: ret <2 x i64> [[SUB_I]] 22208uint64x2_t test_vsubw_u32(uint64x2_t a, uint32x2_t b) { 22209 return vsubw_u32(a, b); 22210} 22211 22212 22213// CHECK-LABEL: define <8 x i8> @test_vtbl1_u8(<8 x i8> %a, <8 x i8> %b) #0 { 22214// CHECK: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %a, <8 x i8> %b) #4 22215// CHECK: ret <8 x i8> [[VTBL1_I]] 22216uint8x8_t test_vtbl1_u8(uint8x8_t a, uint8x8_t b) { 22217 return vtbl1_u8(a, b); 22218} 22219 22220// CHECK-LABEL: define <8 x i8> @test_vtbl1_s8(<8 x i8> %a, <8 x i8> %b) #0 { 22221// CHECK: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %a, <8 x i8> %b) #4 22222// CHECK: ret <8 x i8> [[VTBL1_I]] 22223int8x8_t test_vtbl1_s8(int8x8_t a, int8x8_t b) { 22224 return vtbl1_s8(a, b); 22225} 22226 22227// CHECK-LABEL: define <8 x i8> @test_vtbl1_p8(<8 x i8> %a, <8 x i8> %b) #0 { 22228// CHECK: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %a, <8 x i8> %b) #4 22229// CHECK: ret <8 x i8> [[VTBL1_I]] 22230poly8x8_t test_vtbl1_p8(poly8x8_t a, uint8x8_t b) { 22231 return vtbl1_p8(a, b); 22232} 22233 22234 22235// CHECK-LABEL: define <8 x i8> @test_vtbl2_u8([2 x i64] %a.coerce, <8 x i8> %b) #0 { 22236// CHECK: [[__P0_I:%.*]] = alloca %struct.uint8x8x2_t, align 8 22237// CHECK: [[A:%.*]] = alloca %struct.uint8x8x2_t, align 8 22238// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[A]], i32 0, i32 0 22239// CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]* 22240// CHECK: store [2 x i64] [[A]].coerce, [2 x i64]* [[TMP0]], align 8 22241// CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[A]], i32 0, i32 0 22242// CHECK: [[TMP1:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE1]] to [2 x i64]* 22243// CHECK: [[TMP2:%.*]] = load [2 x i64], [2 x i64]* [[TMP1]], align 8 22244// CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P0_I]], i32 0, i32 0 22245// CHECK: [[TMP3:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE_I]] to [2 x i64]* 22246// CHECK: store [2 x i64] [[TMP2]], [2 x i64]* [[TMP3]], align 8 22247// CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P0_I]], i32 0, i32 0 22248// CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i32 0, i32 0 22249// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 22250// CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P0_I]], i32 0, i32 0 22251// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1 22252// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 22253// CHECK: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %b) #4 22254// CHECK: ret <8 x i8> [[VTBL2_I]] 22255uint8x8_t test_vtbl2_u8(uint8x8x2_t a, uint8x8_t b) { 22256 return vtbl2_u8(a, b); 22257} 22258 22259// CHECK-LABEL: define <8 x i8> @test_vtbl2_s8([2 x i64] %a.coerce, <8 x i8> %b) #0 { 22260// CHECK: [[__P0_I:%.*]] = alloca %struct.int8x8x2_t, align 8 22261// CHECK: [[A:%.*]] = alloca %struct.int8x8x2_t, align 8 22262// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[A]], i32 0, i32 0 22263// CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]* 22264// CHECK: store [2 x i64] [[A]].coerce, [2 x i64]* [[TMP0]], align 8 22265// CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[A]], i32 0, i32 0 22266// CHECK: [[TMP1:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE1]] to [2 x i64]* 22267// CHECK: [[TMP2:%.*]] = load [2 x i64], [2 x i64]* [[TMP1]], align 8 22268// CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P0_I]], i32 0, i32 0 22269// CHECK: [[TMP3:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE_I]] to [2 x i64]* 22270// CHECK: store [2 x i64] [[TMP2]], [2 x i64]* [[TMP3]], align 8 22271// CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P0_I]], i32 0, i32 0 22272// CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i32 0, i32 0 22273// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 22274// CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P0_I]], i32 0, i32 0 22275// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1 22276// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 22277// CHECK: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %b) #4 22278// CHECK: ret <8 x i8> [[VTBL2_I]] 22279int8x8_t test_vtbl2_s8(int8x8x2_t a, int8x8_t b) { 22280 return vtbl2_s8(a, b); 22281} 22282 22283// CHECK-LABEL: define <8 x i8> @test_vtbl2_p8([2 x i64] %a.coerce, <8 x i8> %b) #0 { 22284// CHECK: [[__P0_I:%.*]] = alloca %struct.poly8x8x2_t, align 8 22285// CHECK: [[A:%.*]] = alloca %struct.poly8x8x2_t, align 8 22286// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[A]], i32 0, i32 0 22287// CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]* 22288// CHECK: store [2 x i64] [[A]].coerce, [2 x i64]* [[TMP0]], align 8 22289// CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[A]], i32 0, i32 0 22290// CHECK: [[TMP1:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE1]] to [2 x i64]* 22291// CHECK: [[TMP2:%.*]] = load [2 x i64], [2 x i64]* [[TMP1]], align 8 22292// CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P0_I]], i32 0, i32 0 22293// CHECK: [[TMP3:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE_I]] to [2 x i64]* 22294// CHECK: store [2 x i64] [[TMP2]], [2 x i64]* [[TMP3]], align 8 22295// CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P0_I]], i32 0, i32 0 22296// CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i32 0, i32 0 22297// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 22298// CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P0_I]], i32 0, i32 0 22299// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1 22300// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 22301// CHECK: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %b) #4 22302// CHECK: ret <8 x i8> [[VTBL2_I]] 22303poly8x8_t test_vtbl2_p8(poly8x8x2_t a, uint8x8_t b) { 22304 return vtbl2_p8(a, b); 22305} 22306 22307 22308// CHECK-LABEL: define <8 x i8> @test_vtbl3_u8([3 x i64] %a.coerce, <8 x i8> %b) #0 { 22309// CHECK: [[__P0_I:%.*]] = alloca %struct.uint8x8x3_t, align 8 22310// CHECK: [[A:%.*]] = alloca %struct.uint8x8x3_t, align 8 22311// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[A]], i32 0, i32 0 22312// CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]* 22313// CHECK: store [3 x i64] [[A]].coerce, [3 x i64]* [[TMP0]], align 8 22314// CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[A]], i32 0, i32 0 22315// CHECK: [[TMP1:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE1]] to [3 x i64]* 22316// CHECK: [[TMP2:%.*]] = load [3 x i64], [3 x i64]* [[TMP1]], align 8 22317// CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P0_I]], i32 0, i32 0 22318// CHECK: [[TMP3:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE_I]] to [3 x i64]* 22319// CHECK: store [3 x i64] [[TMP2]], [3 x i64]* [[TMP3]], align 8 22320// CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P0_I]], i32 0, i32 0 22321// CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i32 0, i32 0 22322// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 22323// CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P0_I]], i32 0, i32 0 22324// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1 22325// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 22326// CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P0_I]], i32 0, i32 0 22327// CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2 22328// CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8 22329// CHECK: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %b) #4 22330// CHECK: ret <8 x i8> [[VTBL3_I]] 22331uint8x8_t test_vtbl3_u8(uint8x8x3_t a, uint8x8_t b) { 22332 return vtbl3_u8(a, b); 22333} 22334 22335// CHECK-LABEL: define <8 x i8> @test_vtbl3_s8([3 x i64] %a.coerce, <8 x i8> %b) #0 { 22336// CHECK: [[__P0_I:%.*]] = alloca %struct.int8x8x3_t, align 8 22337// CHECK: [[A:%.*]] = alloca %struct.int8x8x3_t, align 8 22338// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[A]], i32 0, i32 0 22339// CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]* 22340// CHECK: store [3 x i64] [[A]].coerce, [3 x i64]* [[TMP0]], align 8 22341// CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[A]], i32 0, i32 0 22342// CHECK: [[TMP1:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE1]] to [3 x i64]* 22343// CHECK: [[TMP2:%.*]] = load [3 x i64], [3 x i64]* [[TMP1]], align 8 22344// CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P0_I]], i32 0, i32 0 22345// CHECK: [[TMP3:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE_I]] to [3 x i64]* 22346// CHECK: store [3 x i64] [[TMP2]], [3 x i64]* [[TMP3]], align 8 22347// CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P0_I]], i32 0, i32 0 22348// CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i32 0, i32 0 22349// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 22350// CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P0_I]], i32 0, i32 0 22351// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1 22352// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 22353// CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P0_I]], i32 0, i32 0 22354// CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2 22355// CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8 22356// CHECK: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %b) #4 22357// CHECK: ret <8 x i8> [[VTBL3_I]] 22358int8x8_t test_vtbl3_s8(int8x8x3_t a, int8x8_t b) { 22359 return vtbl3_s8(a, b); 22360} 22361 22362// CHECK-LABEL: define <8 x i8> @test_vtbl3_p8([3 x i64] %a.coerce, <8 x i8> %b) #0 { 22363// CHECK: [[__P0_I:%.*]] = alloca %struct.poly8x8x3_t, align 8 22364// CHECK: [[A:%.*]] = alloca %struct.poly8x8x3_t, align 8 22365// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[A]], i32 0, i32 0 22366// CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]* 22367// CHECK: store [3 x i64] [[A]].coerce, [3 x i64]* [[TMP0]], align 8 22368// CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[A]], i32 0, i32 0 22369// CHECK: [[TMP1:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE1]] to [3 x i64]* 22370// CHECK: [[TMP2:%.*]] = load [3 x i64], [3 x i64]* [[TMP1]], align 8 22371// CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P0_I]], i32 0, i32 0 22372// CHECK: [[TMP3:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE_I]] to [3 x i64]* 22373// CHECK: store [3 x i64] [[TMP2]], [3 x i64]* [[TMP3]], align 8 22374// CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P0_I]], i32 0, i32 0 22375// CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i32 0, i32 0 22376// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 22377// CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P0_I]], i32 0, i32 0 22378// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1 22379// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 22380// CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P0_I]], i32 0, i32 0 22381// CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2 22382// CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8 22383// CHECK: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %b) #4 22384// CHECK: ret <8 x i8> [[VTBL3_I]] 22385poly8x8_t test_vtbl3_p8(poly8x8x3_t a, uint8x8_t b) { 22386 return vtbl3_p8(a, b); 22387} 22388 22389 22390// CHECK-LABEL: define <8 x i8> @test_vtbl4_u8([4 x i64] %a.coerce, <8 x i8> %b) #0 { 22391// CHECK: [[__P0_I:%.*]] = alloca %struct.uint8x8x4_t, align 8 22392// CHECK: [[A:%.*]] = alloca %struct.uint8x8x4_t, align 8 22393// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[A]], i32 0, i32 0 22394// CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]* 22395// CHECK: store [4 x i64] [[A]].coerce, [4 x i64]* [[TMP0]], align 8 22396// CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[A]], i32 0, i32 0 22397// CHECK: [[TMP1:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE1]] to [4 x i64]* 22398// CHECK: [[TMP2:%.*]] = load [4 x i64], [4 x i64]* [[TMP1]], align 8 22399// CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0 22400// CHECK: [[TMP3:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE_I]] to [4 x i64]* 22401// CHECK: store [4 x i64] [[TMP2]], [4 x i64]* [[TMP3]], align 8 22402// CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0 22403// CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i32 0, i32 0 22404// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 22405// CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0 22406// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1 22407// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 22408// CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0 22409// CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2 22410// CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8 22411// CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0 22412// CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i32 0, i32 3 22413// CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8 22414// CHECK: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %b) #4 22415// CHECK: ret <8 x i8> [[VTBL4_I]] 22416uint8x8_t test_vtbl4_u8(uint8x8x4_t a, uint8x8_t b) { 22417 return vtbl4_u8(a, b); 22418} 22419 22420// CHECK-LABEL: define <8 x i8> @test_vtbl4_s8([4 x i64] %a.coerce, <8 x i8> %b) #0 { 22421// CHECK: [[__P0_I:%.*]] = alloca %struct.int8x8x4_t, align 8 22422// CHECK: [[A:%.*]] = alloca %struct.int8x8x4_t, align 8 22423// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[A]], i32 0, i32 0 22424// CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]* 22425// CHECK: store [4 x i64] [[A]].coerce, [4 x i64]* [[TMP0]], align 8 22426// CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[A]], i32 0, i32 0 22427// CHECK: [[TMP1:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE1]] to [4 x i64]* 22428// CHECK: [[TMP2:%.*]] = load [4 x i64], [4 x i64]* [[TMP1]], align 8 22429// CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0 22430// CHECK: [[TMP3:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE_I]] to [4 x i64]* 22431// CHECK: store [4 x i64] [[TMP2]], [4 x i64]* [[TMP3]], align 8 22432// CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0 22433// CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i32 0, i32 0 22434// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 22435// CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0 22436// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1 22437// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 22438// CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0 22439// CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2 22440// CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8 22441// CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0 22442// CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i32 0, i32 3 22443// CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8 22444// CHECK: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %b) #4 22445// CHECK: ret <8 x i8> [[VTBL4_I]] 22446int8x8_t test_vtbl4_s8(int8x8x4_t a, int8x8_t b) { 22447 return vtbl4_s8(a, b); 22448} 22449 22450// CHECK-LABEL: define <8 x i8> @test_vtbl4_p8([4 x i64] %a.coerce, <8 x i8> %b) #0 { 22451// CHECK: [[__P0_I:%.*]] = alloca %struct.poly8x8x4_t, align 8 22452// CHECK: [[A:%.*]] = alloca %struct.poly8x8x4_t, align 8 22453// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[A]], i32 0, i32 0 22454// CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]* 22455// CHECK: store [4 x i64] [[A]].coerce, [4 x i64]* [[TMP0]], align 8 22456// CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[A]], i32 0, i32 0 22457// CHECK: [[TMP1:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE1]] to [4 x i64]* 22458// CHECK: [[TMP2:%.*]] = load [4 x i64], [4 x i64]* [[TMP1]], align 8 22459// CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0 22460// CHECK: [[TMP3:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE_I]] to [4 x i64]* 22461// CHECK: store [4 x i64] [[TMP2]], [4 x i64]* [[TMP3]], align 8 22462// CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0 22463// CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i32 0, i32 0 22464// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 22465// CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0 22466// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1 22467// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 22468// CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0 22469// CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2 22470// CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8 22471// CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0 22472// CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i32 0, i32 3 22473// CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8 22474// CHECK: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %b) #4 22475// CHECK: ret <8 x i8> [[VTBL4_I]] 22476poly8x8_t test_vtbl4_p8(poly8x8x4_t a, uint8x8_t b) { 22477 return vtbl4_p8(a, b); 22478} 22479 22480 22481// CHECK-LABEL: define <8 x i8> @test_vtbx1_u8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 { 22482// CHECK: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #4 22483// CHECK: ret <8 x i8> [[VTBX1_I]] 22484uint8x8_t test_vtbx1_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) { 22485 return vtbx1_u8(a, b, c); 22486} 22487 22488// CHECK-LABEL: define <8 x i8> @test_vtbx1_s8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 { 22489// CHECK: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #4 22490// CHECK: ret <8 x i8> [[VTBX1_I]] 22491int8x8_t test_vtbx1_s8(int8x8_t a, int8x8_t b, int8x8_t c) { 22492 return vtbx1_s8(a, b, c); 22493} 22494 22495// CHECK-LABEL: define <8 x i8> @test_vtbx1_p8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 { 22496// CHECK: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #4 22497// CHECK: ret <8 x i8> [[VTBX1_I]] 22498poly8x8_t test_vtbx1_p8(poly8x8_t a, poly8x8_t b, uint8x8_t c) { 22499 return vtbx1_p8(a, b, c); 22500} 22501 22502 22503// CHECK-LABEL: define <8 x i8> @test_vtbx2_u8(<8 x i8> %a, [2 x i64] %b.coerce, <8 x i8> %c) #0 { 22504// CHECK: [[__P1_I:%.*]] = alloca %struct.uint8x8x2_t, align 8 22505// CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8 22506// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0 22507// CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]* 22508// CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 22509// CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0 22510// CHECK: [[TMP1:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE1]] to [2 x i64]* 22511// CHECK: [[TMP2:%.*]] = load [2 x i64], [2 x i64]* [[TMP1]], align 8 22512// CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P1_I]], i32 0, i32 0 22513// CHECK: [[TMP3:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE_I]] to [2 x i64]* 22514// CHECK: store [2 x i64] [[TMP2]], [2 x i64]* [[TMP3]], align 8 22515// CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P1_I]], i32 0, i32 0 22516// CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i32 0, i32 0 22517// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 22518// CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P1_I]], i32 0, i32 0 22519// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1 22520// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 22521// CHECK: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %c) #4 22522// CHECK: ret <8 x i8> [[VTBX2_I]] 22523uint8x8_t test_vtbx2_u8(uint8x8_t a, uint8x8x2_t b, uint8x8_t c) { 22524 return vtbx2_u8(a, b, c); 22525} 22526 22527// CHECK-LABEL: define <8 x i8> @test_vtbx2_s8(<8 x i8> %a, [2 x i64] %b.coerce, <8 x i8> %c) #0 { 22528// CHECK: [[__P1_I:%.*]] = alloca %struct.int8x8x2_t, align 8 22529// CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8 22530// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0 22531// CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]* 22532// CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 22533// CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0 22534// CHECK: [[TMP1:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE1]] to [2 x i64]* 22535// CHECK: [[TMP2:%.*]] = load [2 x i64], [2 x i64]* [[TMP1]], align 8 22536// CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P1_I]], i32 0, i32 0 22537// CHECK: [[TMP3:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE_I]] to [2 x i64]* 22538// CHECK: store [2 x i64] [[TMP2]], [2 x i64]* [[TMP3]], align 8 22539// CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P1_I]], i32 0, i32 0 22540// CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i32 0, i32 0 22541// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 22542// CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P1_I]], i32 0, i32 0 22543// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1 22544// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 22545// CHECK: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %c) #4 22546// CHECK: ret <8 x i8> [[VTBX2_I]] 22547int8x8_t test_vtbx2_s8(int8x8_t a, int8x8x2_t b, int8x8_t c) { 22548 return vtbx2_s8(a, b, c); 22549} 22550 22551// CHECK-LABEL: define <8 x i8> @test_vtbx2_p8(<8 x i8> %a, [2 x i64] %b.coerce, <8 x i8> %c) #0 { 22552// CHECK: [[__P1_I:%.*]] = alloca %struct.poly8x8x2_t, align 8 22553// CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8 22554// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0 22555// CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]* 22556// CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 22557// CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0 22558// CHECK: [[TMP1:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE1]] to [2 x i64]* 22559// CHECK: [[TMP2:%.*]] = load [2 x i64], [2 x i64]* [[TMP1]], align 8 22560// CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P1_I]], i32 0, i32 0 22561// CHECK: [[TMP3:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE_I]] to [2 x i64]* 22562// CHECK: store [2 x i64] [[TMP2]], [2 x i64]* [[TMP3]], align 8 22563// CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P1_I]], i32 0, i32 0 22564// CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i32 0, i32 0 22565// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 22566// CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P1_I]], i32 0, i32 0 22567// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1 22568// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 22569// CHECK: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %c) #4 22570// CHECK: ret <8 x i8> [[VTBX2_I]] 22571poly8x8_t test_vtbx2_p8(poly8x8_t a, poly8x8x2_t b, uint8x8_t c) { 22572 return vtbx2_p8(a, b, c); 22573} 22574 22575 22576// CHECK-LABEL: define <8 x i8> @test_vtbx3_u8(<8 x i8> %a, [3 x i64] %b.coerce, <8 x i8> %c) #0 { 22577// CHECK: [[__P1_I:%.*]] = alloca %struct.uint8x8x3_t, align 8 22578// CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8 22579// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0 22580// CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]* 22581// CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 22582// CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0 22583// CHECK: [[TMP1:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE1]] to [3 x i64]* 22584// CHECK: [[TMP2:%.*]] = load [3 x i64], [3 x i64]* [[TMP1]], align 8 22585// CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P1_I]], i32 0, i32 0 22586// CHECK: [[TMP3:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE_I]] to [3 x i64]* 22587// CHECK: store [3 x i64] [[TMP2]], [3 x i64]* [[TMP3]], align 8 22588// CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P1_I]], i32 0, i32 0 22589// CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i32 0, i32 0 22590// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 22591// CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P1_I]], i32 0, i32 0 22592// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1 22593// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 22594// CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P1_I]], i32 0, i32 0 22595// CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2 22596// CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8 22597// CHECK: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %c) #4 22598// CHECK: ret <8 x i8> [[VTBX3_I]] 22599uint8x8_t test_vtbx3_u8(uint8x8_t a, uint8x8x3_t b, uint8x8_t c) { 22600 return vtbx3_u8(a, b, c); 22601} 22602 22603// CHECK-LABEL: define <8 x i8> @test_vtbx3_s8(<8 x i8> %a, [3 x i64] %b.coerce, <8 x i8> %c) #0 { 22604// CHECK: [[__P1_I:%.*]] = alloca %struct.int8x8x3_t, align 8 22605// CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8 22606// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0 22607// CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]* 22608// CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 22609// CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0 22610// CHECK: [[TMP1:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE1]] to [3 x i64]* 22611// CHECK: [[TMP2:%.*]] = load [3 x i64], [3 x i64]* [[TMP1]], align 8 22612// CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P1_I]], i32 0, i32 0 22613// CHECK: [[TMP3:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE_I]] to [3 x i64]* 22614// CHECK: store [3 x i64] [[TMP2]], [3 x i64]* [[TMP3]], align 8 22615// CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P1_I]], i32 0, i32 0 22616// CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i32 0, i32 0 22617// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 22618// CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P1_I]], i32 0, i32 0 22619// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1 22620// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 22621// CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P1_I]], i32 0, i32 0 22622// CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2 22623// CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8 22624// CHECK: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %c) #4 22625// CHECK: ret <8 x i8> [[VTBX3_I]] 22626int8x8_t test_vtbx3_s8(int8x8_t a, int8x8x3_t b, int8x8_t c) { 22627 return vtbx3_s8(a, b, c); 22628} 22629 22630// CHECK-LABEL: define <8 x i8> @test_vtbx3_p8(<8 x i8> %a, [3 x i64] %b.coerce, <8 x i8> %c) #0 { 22631// CHECK: [[__P1_I:%.*]] = alloca %struct.poly8x8x3_t, align 8 22632// CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8 22633// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0 22634// CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]* 22635// CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 22636// CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0 22637// CHECK: [[TMP1:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE1]] to [3 x i64]* 22638// CHECK: [[TMP2:%.*]] = load [3 x i64], [3 x i64]* [[TMP1]], align 8 22639// CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P1_I]], i32 0, i32 0 22640// CHECK: [[TMP3:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE_I]] to [3 x i64]* 22641// CHECK: store [3 x i64] [[TMP2]], [3 x i64]* [[TMP3]], align 8 22642// CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P1_I]], i32 0, i32 0 22643// CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i32 0, i32 0 22644// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 22645// CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P1_I]], i32 0, i32 0 22646// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1 22647// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 22648// CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P1_I]], i32 0, i32 0 22649// CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2 22650// CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8 22651// CHECK: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %c) #4 22652// CHECK: ret <8 x i8> [[VTBX3_I]] 22653poly8x8_t test_vtbx3_p8(poly8x8_t a, poly8x8x3_t b, uint8x8_t c) { 22654 return vtbx3_p8(a, b, c); 22655} 22656 22657 22658// CHECK-LABEL: define <8 x i8> @test_vtbx4_u8(<8 x i8> %a, [4 x i64] %b.coerce, <8 x i8> %c) #0 { 22659// CHECK: [[__P1_I:%.*]] = alloca %struct.uint8x8x4_t, align 8 22660// CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8 22661// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0 22662// CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]* 22663// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 22664// CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0 22665// CHECK: [[TMP1:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE1]] to [4 x i64]* 22666// CHECK: [[TMP2:%.*]] = load [4 x i64], [4 x i64]* [[TMP1]], align 8 22667// CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0 22668// CHECK: [[TMP3:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE_I]] to [4 x i64]* 22669// CHECK: store [4 x i64] [[TMP2]], [4 x i64]* [[TMP3]], align 8 22670// CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0 22671// CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i32 0, i32 0 22672// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 22673// CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0 22674// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1 22675// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 22676// CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0 22677// CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2 22678// CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8 22679// CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0 22680// CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i32 0, i32 3 22681// CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8 22682// CHECK: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %c) #4 22683// CHECK: ret <8 x i8> [[VTBX4_I]] 22684uint8x8_t test_vtbx4_u8(uint8x8_t a, uint8x8x4_t b, uint8x8_t c) { 22685 return vtbx4_u8(a, b, c); 22686} 22687 22688// CHECK-LABEL: define <8 x i8> @test_vtbx4_s8(<8 x i8> %a, [4 x i64] %b.coerce, <8 x i8> %c) #0 { 22689// CHECK: [[__P1_I:%.*]] = alloca %struct.int8x8x4_t, align 8 22690// CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8 22691// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0 22692// CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]* 22693// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 22694// CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0 22695// CHECK: [[TMP1:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE1]] to [4 x i64]* 22696// CHECK: [[TMP2:%.*]] = load [4 x i64], [4 x i64]* [[TMP1]], align 8 22697// CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0 22698// CHECK: [[TMP3:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE_I]] to [4 x i64]* 22699// CHECK: store [4 x i64] [[TMP2]], [4 x i64]* [[TMP3]], align 8 22700// CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0 22701// CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i32 0, i32 0 22702// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 22703// CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0 22704// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1 22705// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 22706// CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0 22707// CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2 22708// CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8 22709// CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0 22710// CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i32 0, i32 3 22711// CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8 22712// CHECK: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %c) #4 22713// CHECK: ret <8 x i8> [[VTBX4_I]] 22714int8x8_t test_vtbx4_s8(int8x8_t a, int8x8x4_t b, int8x8_t c) { 22715 return vtbx4_s8(a, b, c); 22716} 22717 22718// CHECK-LABEL: define <8 x i8> @test_vtbx4_p8(<8 x i8> %a, [4 x i64] %b.coerce, <8 x i8> %c) #0 { 22719// CHECK: [[__P1_I:%.*]] = alloca %struct.poly8x8x4_t, align 8 22720// CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8 22721// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0 22722// CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]* 22723// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 22724// CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0 22725// CHECK: [[TMP1:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE1]] to [4 x i64]* 22726// CHECK: [[TMP2:%.*]] = load [4 x i64], [4 x i64]* [[TMP1]], align 8 22727// CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0 22728// CHECK: [[TMP3:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE_I]] to [4 x i64]* 22729// CHECK: store [4 x i64] [[TMP2]], [4 x i64]* [[TMP3]], align 8 22730// CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0 22731// CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i32 0, i32 0 22732// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 22733// CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0 22734// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1 22735// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 22736// CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0 22737// CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2 22738// CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8 22739// CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0 22740// CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i32 0, i32 3 22741// CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8 22742// CHECK: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %c) #4 22743// CHECK: ret <8 x i8> [[VTBX4_I]] 22744poly8x8_t test_vtbx4_p8(poly8x8_t a, poly8x8x4_t b, uint8x8_t c) { 22745 return vtbx4_p8(a, b, c); 22746} 22747 22748 22749// CHECK-LABEL: define void @test_vtrn_s8(%struct.int8x8x2_t* noalias sret %agg.result, <8 x i8> %a, <8 x i8> %b) #0 { 22750// CHECK: [[__RET_I:%.*]] = alloca %struct.int8x8x2_t, align 8 22751// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8* 22752// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>* 22753// CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 22754// CHECK: store <8 x i8> [[VTRN_I]], <8 x i8>* [[TMP1]] 22755// CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1 22756// CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 22757// CHECK: store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]] 22758// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* %agg.result to i8* 22759// CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8* 22760// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) #4 22761// CHECK: ret void 22762int8x8x2_t test_vtrn_s8(int8x8_t a, int8x8_t b) { 22763 return vtrn_s8(a, b); 22764} 22765 22766// CHECK-LABEL: define void @test_vtrn_s16(%struct.int16x4x2_t* noalias sret %agg.result, <4 x i16> %a, <4 x i16> %b) #0 { 22767// CHECK: [[__RET_I:%.*]] = alloca %struct.int16x4x2_t, align 8 22768// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8* 22769// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> 22770// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> 22771// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* 22772// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 22773// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 22774// CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> 22775// CHECK: store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]] 22776// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 22777// CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> 22778// CHECK: store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP6]] 22779// CHECK: [[TMP7:%.*]] = bitcast %struct.int16x4x2_t* %agg.result to i8* 22780// CHECK: [[TMP8:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8* 22781// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 22782// CHECK: ret void 22783int16x4x2_t test_vtrn_s16(int16x4_t a, int16x4_t b) { 22784 return vtrn_s16(a, b); 22785} 22786 22787// CHECK-LABEL: define void @test_vtrn_s32(%struct.int32x2x2_t* noalias sret %agg.result, <2 x i32> %a, <2 x i32> %b) #0 { 22788// CHECK: [[__RET_I:%.*]] = alloca %struct.int32x2x2_t, align 8 22789// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8* 22790// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8> 22791// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> 22792// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* 22793// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 22794// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 22795// CHECK: [[VTRN_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 0, i32 2> 22796// CHECK: store <2 x i32> [[VTRN_I]], <2 x i32>* [[TMP3]] 22797// CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1 22798// CHECK: [[VTRN1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 1, i32 3> 22799// CHECK: store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP6]] 22800// CHECK: [[TMP7:%.*]] = bitcast %struct.int32x2x2_t* %agg.result to i8* 22801// CHECK: [[TMP8:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8* 22802// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 22803// CHECK: ret void 22804int32x2x2_t test_vtrn_s32(int32x2_t a, int32x2_t b) { 22805 return vtrn_s32(a, b); 22806} 22807 22808// CHECK-LABEL: define void @test_vtrn_u8(%struct.uint8x8x2_t* noalias sret %agg.result, <8 x i8> %a, <8 x i8> %b) #0 { 22809// CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x8x2_t, align 8 22810// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8* 22811// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>* 22812// CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 22813// CHECK: store <8 x i8> [[VTRN_I]], <8 x i8>* [[TMP1]] 22814// CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1 22815// CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 22816// CHECK: store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]] 22817// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* %agg.result to i8* 22818// CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8* 22819// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) #4 22820// CHECK: ret void 22821uint8x8x2_t test_vtrn_u8(uint8x8_t a, uint8x8_t b) { 22822 return vtrn_u8(a, b); 22823} 22824 22825// CHECK-LABEL: define void @test_vtrn_u16(%struct.uint16x4x2_t* noalias sret %agg.result, <4 x i16> %a, <4 x i16> %b) #0 { 22826// CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x4x2_t, align 8 22827// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8* 22828// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> 22829// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> 22830// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* 22831// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 22832// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 22833// CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> 22834// CHECK: store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]] 22835// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 22836// CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> 22837// CHECK: store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP6]] 22838// CHECK: [[TMP7:%.*]] = bitcast %struct.uint16x4x2_t* %agg.result to i8* 22839// CHECK: [[TMP8:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8* 22840// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 22841// CHECK: ret void 22842uint16x4x2_t test_vtrn_u16(uint16x4_t a, uint16x4_t b) { 22843 return vtrn_u16(a, b); 22844} 22845 22846// CHECK-LABEL: define void @test_vtrn_u32(%struct.uint32x2x2_t* noalias sret %agg.result, <2 x i32> %a, <2 x i32> %b) #0 { 22847// CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x2x2_t, align 8 22848// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8* 22849// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8> 22850// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> 22851// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* 22852// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 22853// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 22854// CHECK: [[VTRN_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 0, i32 2> 22855// CHECK: store <2 x i32> [[VTRN_I]], <2 x i32>* [[TMP3]] 22856// CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1 22857// CHECK: [[VTRN1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 1, i32 3> 22858// CHECK: store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP6]] 22859// CHECK: [[TMP7:%.*]] = bitcast %struct.uint32x2x2_t* %agg.result to i8* 22860// CHECK: [[TMP8:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8* 22861// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 22862// CHECK: ret void 22863uint32x2x2_t test_vtrn_u32(uint32x2_t a, uint32x2_t b) { 22864 return vtrn_u32(a, b); 22865} 22866 22867// CHECK-LABEL: define void @test_vtrn_f32(%struct.float32x2x2_t* noalias sret %agg.result, <2 x float> %a, <2 x float> %b) #0 { 22868// CHECK: [[__RET_I:%.*]] = alloca %struct.float32x2x2_t, align 8 22869// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8* 22870// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8> 22871// CHECK: [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8> 22872// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x float>* 22873// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 22874// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> 22875// CHECK: [[VTRN_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 0, i32 2> 22876// CHECK: store <2 x float> [[VTRN_I]], <2 x float>* [[TMP3]] 22877// CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1 22878// CHECK: [[VTRN1_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 1, i32 3> 22879// CHECK: store <2 x float> [[VTRN1_I]], <2 x float>* [[TMP6]] 22880// CHECK: [[TMP7:%.*]] = bitcast %struct.float32x2x2_t* %agg.result to i8* 22881// CHECK: [[TMP8:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8* 22882// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 22883// CHECK: ret void 22884float32x2x2_t test_vtrn_f32(float32x2_t a, float32x2_t b) { 22885 return vtrn_f32(a, b); 22886} 22887 22888// CHECK-LABEL: define void @test_vtrn_p8(%struct.poly8x8x2_t* noalias sret %agg.result, <8 x i8> %a, <8 x i8> %b) #0 { 22889// CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x8x2_t, align 8 22890// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8* 22891// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>* 22892// CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 22893// CHECK: store <8 x i8> [[VTRN_I]], <8 x i8>* [[TMP1]] 22894// CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1 22895// CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 22896// CHECK: store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]] 22897// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* %agg.result to i8* 22898// CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8* 22899// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) #4 22900// CHECK: ret void 22901poly8x8x2_t test_vtrn_p8(poly8x8_t a, poly8x8_t b) { 22902 return vtrn_p8(a, b); 22903} 22904 22905// CHECK-LABEL: define void @test_vtrn_p16(%struct.poly16x4x2_t* noalias sret %agg.result, <4 x i16> %a, <4 x i16> %b) #0 { 22906// CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x4x2_t, align 8 22907// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8* 22908// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> 22909// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> 22910// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* 22911// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 22912// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 22913// CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> 22914// CHECK: store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]] 22915// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 22916// CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> 22917// CHECK: store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP6]] 22918// CHECK: [[TMP7:%.*]] = bitcast %struct.poly16x4x2_t* %agg.result to i8* 22919// CHECK: [[TMP8:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8* 22920// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 22921// CHECK: ret void 22922poly16x4x2_t test_vtrn_p16(poly16x4_t a, poly16x4_t b) { 22923 return vtrn_p16(a, b); 22924} 22925 22926// CHECK-LABEL: define void @test_vtrnq_s8(%struct.int8x16x2_t* noalias sret %agg.result, <16 x i8> %a, <16 x i8> %b) #0 { 22927// CHECK: [[__RET_I:%.*]] = alloca %struct.int8x16x2_t, align 16 22928// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8* 22929// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* 22930// CHECK: [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30> 22931// CHECK: store <16 x i8> [[VTRN_I]], <16 x i8>* [[TMP1]] 22932// CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1 22933// CHECK: [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31> 22934// CHECK: store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]] 22935// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* %agg.result to i8* 22936// CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8* 22937// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) #4 22938// CHECK: ret void 22939int8x16x2_t test_vtrnq_s8(int8x16_t a, int8x16_t b) { 22940 return vtrnq_s8(a, b); 22941} 22942 22943// CHECK-LABEL: define void @test_vtrnq_s16(%struct.int16x8x2_t* noalias sret %agg.result, <8 x i16> %a, <8 x i16> %b) #0 { 22944// CHECK: [[__RET_I:%.*]] = alloca %struct.int16x8x2_t, align 16 22945// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* 22946// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> 22947// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> 22948// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* 22949// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 22950// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> 22951// CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 22952// CHECK: store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]] 22953// CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 22954// CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 22955// CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP6]] 22956// CHECK: [[TMP7:%.*]] = bitcast %struct.int16x8x2_t* %agg.result to i8* 22957// CHECK: [[TMP8:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* 22958// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 22959// CHECK: ret void 22960int16x8x2_t test_vtrnq_s16(int16x8_t a, int16x8_t b) { 22961 return vtrnq_s16(a, b); 22962} 22963 22964// CHECK-LABEL: define void @test_vtrnq_s32(%struct.int32x4x2_t* noalias sret %agg.result, <4 x i32> %a, <4 x i32> %b) #0 { 22965// CHECK: [[__RET_I:%.*]] = alloca %struct.int32x4x2_t, align 16 22966// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* 22967// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8> 22968// CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8> 22969// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* 22970// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 22971// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> 22972// CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> 22973// CHECK: store <4 x i32> [[VTRN_I]], <4 x i32>* [[TMP3]] 22974// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1 22975// CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> 22976// CHECK: store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP6]] 22977// CHECK: [[TMP7:%.*]] = bitcast %struct.int32x4x2_t* %agg.result to i8* 22978// CHECK: [[TMP8:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* 22979// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 22980// CHECK: ret void 22981int32x4x2_t test_vtrnq_s32(int32x4_t a, int32x4_t b) { 22982 return vtrnq_s32(a, b); 22983} 22984 22985// CHECK-LABEL: define void @test_vtrnq_u8(%struct.uint8x16x2_t* noalias sret %agg.result, <16 x i8> %a, <16 x i8> %b) #0 { 22986// CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x16x2_t, align 16 22987// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8* 22988// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* 22989// CHECK: [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30> 22990// CHECK: store <16 x i8> [[VTRN_I]], <16 x i8>* [[TMP1]] 22991// CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1 22992// CHECK: [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31> 22993// CHECK: store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]] 22994// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* %agg.result to i8* 22995// CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8* 22996// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) #4 22997// CHECK: ret void 22998uint8x16x2_t test_vtrnq_u8(uint8x16_t a, uint8x16_t b) { 22999 return vtrnq_u8(a, b); 23000} 23001 23002// CHECK-LABEL: define void @test_vtrnq_u16(%struct.uint16x8x2_t* noalias sret %agg.result, <8 x i16> %a, <8 x i16> %b) #0 { 23003// CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x8x2_t, align 16 23004// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* 23005// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> 23006// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> 23007// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* 23008// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 23009// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> 23010// CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 23011// CHECK: store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]] 23012// CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 23013// CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 23014// CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP6]] 23015// CHECK: [[TMP7:%.*]] = bitcast %struct.uint16x8x2_t* %agg.result to i8* 23016// CHECK: [[TMP8:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* 23017// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 23018// CHECK: ret void 23019uint16x8x2_t test_vtrnq_u16(uint16x8_t a, uint16x8_t b) { 23020 return vtrnq_u16(a, b); 23021} 23022 23023// CHECK-LABEL: define void @test_vtrnq_u32(%struct.uint32x4x2_t* noalias sret %agg.result, <4 x i32> %a, <4 x i32> %b) #0 { 23024// CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x4x2_t, align 16 23025// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* 23026// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8> 23027// CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8> 23028// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* 23029// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 23030// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> 23031// CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> 23032// CHECK: store <4 x i32> [[VTRN_I]], <4 x i32>* [[TMP3]] 23033// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1 23034// CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> 23035// CHECK: store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP6]] 23036// CHECK: [[TMP7:%.*]] = bitcast %struct.uint32x4x2_t* %agg.result to i8* 23037// CHECK: [[TMP8:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* 23038// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 23039// CHECK: ret void 23040uint32x4x2_t test_vtrnq_u32(uint32x4_t a, uint32x4_t b) { 23041 return vtrnq_u32(a, b); 23042} 23043 23044// CHECK-LABEL: define void @test_vtrnq_f32(%struct.float32x4x2_t* noalias sret %agg.result, <4 x float> %a, <4 x float> %b) #0 { 23045// CHECK: [[__RET_I:%.*]] = alloca %struct.float32x4x2_t, align 16 23046// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* 23047// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8> 23048// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8> 23049// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x float>* 23050// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 23051// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> 23052// CHECK: [[VTRN_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> 23053// CHECK: store <4 x float> [[VTRN_I]], <4 x float>* [[TMP3]] 23054// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1 23055// CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> 23056// CHECK: store <4 x float> [[VTRN1_I]], <4 x float>* [[TMP6]] 23057// CHECK: [[TMP7:%.*]] = bitcast %struct.float32x4x2_t* %agg.result to i8* 23058// CHECK: [[TMP8:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* 23059// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 23060// CHECK: ret void 23061float32x4x2_t test_vtrnq_f32(float32x4_t a, float32x4_t b) { 23062 return vtrnq_f32(a, b); 23063} 23064 23065// CHECK-LABEL: define void @test_vtrnq_p8(%struct.poly8x16x2_t* noalias sret %agg.result, <16 x i8> %a, <16 x i8> %b) #0 { 23066// CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x16x2_t, align 16 23067// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8* 23068// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* 23069// CHECK: [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30> 23070// CHECK: store <16 x i8> [[VTRN_I]], <16 x i8>* [[TMP1]] 23071// CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1 23072// CHECK: [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31> 23073// CHECK: store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]] 23074// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* %agg.result to i8* 23075// CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8* 23076// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) #4 23077// CHECK: ret void 23078poly8x16x2_t test_vtrnq_p8(poly8x16_t a, poly8x16_t b) { 23079 return vtrnq_p8(a, b); 23080} 23081 23082// CHECK-LABEL: define void @test_vtrnq_p16(%struct.poly16x8x2_t* noalias sret %agg.result, <8 x i16> %a, <8 x i16> %b) #0 { 23083// CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x8x2_t, align 16 23084// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* 23085// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> 23086// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> 23087// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* 23088// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 23089// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> 23090// CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 23091// CHECK: store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]] 23092// CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 23093// CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 23094// CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP6]] 23095// CHECK: [[TMP7:%.*]] = bitcast %struct.poly16x8x2_t* %agg.result to i8* 23096// CHECK: [[TMP8:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* 23097// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 23098// CHECK: ret void 23099poly16x8x2_t test_vtrnq_p16(poly16x8_t a, poly16x8_t b) { 23100 return vtrnq_p16(a, b); 23101} 23102 23103 23104// CHECK-LABEL: define <8 x i8> @test_vtst_s8(<8 x i8> %a, <8 x i8> %b) #0 { 23105// CHECK: [[TMP0:%.*]] = and <8 x i8> %a, %b 23106// CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer 23107// CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8> 23108// CHECK: ret <8 x i8> [[VTST_I]] 23109uint8x8_t test_vtst_s8(int8x8_t a, int8x8_t b) { 23110 return vtst_s8(a, b); 23111} 23112 23113// CHECK-LABEL: define <4 x i16> @test_vtst_s16(<4 x i16> %a, <4 x i16> %b) #0 { 23114// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 23115// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 23116// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 23117// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 23118// CHECK: [[TMP4:%.*]] = and <4 x i16> [[TMP2]], [[TMP3]] 23119// CHECK: [[TMP5:%.*]] = icmp ne <4 x i16> [[TMP4]], zeroinitializer 23120// CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i16> 23121// CHECK: ret <4 x i16> [[VTST_I]] 23122uint16x4_t test_vtst_s16(int16x4_t a, int16x4_t b) { 23123 return vtst_s16(a, b); 23124} 23125 23126// CHECK-LABEL: define <2 x i32> @test_vtst_s32(<2 x i32> %a, <2 x i32> %b) #0 { 23127// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 23128// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 23129// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 23130// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 23131// CHECK: [[TMP4:%.*]] = and <2 x i32> [[TMP2]], [[TMP3]] 23132// CHECK: [[TMP5:%.*]] = icmp ne <2 x i32> [[TMP4]], zeroinitializer 23133// CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i32> 23134// CHECK: ret <2 x i32> [[VTST_I]] 23135uint32x2_t test_vtst_s32(int32x2_t a, int32x2_t b) { 23136 return vtst_s32(a, b); 23137} 23138 23139// CHECK-LABEL: define <8 x i8> @test_vtst_u8(<8 x i8> %a, <8 x i8> %b) #0 { 23140// CHECK: [[TMP0:%.*]] = and <8 x i8> %a, %b 23141// CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer 23142// CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8> 23143// CHECK: ret <8 x i8> [[VTST_I]] 23144uint8x8_t test_vtst_u8(uint8x8_t a, uint8x8_t b) { 23145 return vtst_u8(a, b); 23146} 23147 23148// CHECK-LABEL: define <4 x i16> @test_vtst_u16(<4 x i16> %a, <4 x i16> %b) #0 { 23149// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 23150// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 23151// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 23152// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 23153// CHECK: [[TMP4:%.*]] = and <4 x i16> [[TMP2]], [[TMP3]] 23154// CHECK: [[TMP5:%.*]] = icmp ne <4 x i16> [[TMP4]], zeroinitializer 23155// CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i16> 23156// CHECK: ret <4 x i16> [[VTST_I]] 23157uint16x4_t test_vtst_u16(uint16x4_t a, uint16x4_t b) { 23158 return vtst_u16(a, b); 23159} 23160 23161// CHECK-LABEL: define <2 x i32> @test_vtst_u32(<2 x i32> %a, <2 x i32> %b) #0 { 23162// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 23163// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 23164// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 23165// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 23166// CHECK: [[TMP4:%.*]] = and <2 x i32> [[TMP2]], [[TMP3]] 23167// CHECK: [[TMP5:%.*]] = icmp ne <2 x i32> [[TMP4]], zeroinitializer 23168// CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i32> 23169// CHECK: ret <2 x i32> [[VTST_I]] 23170uint32x2_t test_vtst_u32(uint32x2_t a, uint32x2_t b) { 23171 return vtst_u32(a, b); 23172} 23173 23174// CHECK-LABEL: define <8 x i8> @test_vtst_p8(<8 x i8> %a, <8 x i8> %b) #0 { 23175// CHECK: [[TMP0:%.*]] = and <8 x i8> %a, %b 23176// CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer 23177// CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8> 23178// CHECK: ret <8 x i8> [[VTST_I]] 23179uint8x8_t test_vtst_p8(poly8x8_t a, poly8x8_t b) { 23180 return vtst_p8(a, b); 23181} 23182 23183// CHECK-LABEL: define <4 x i16> @test_vtst_p16(<4 x i16> %a, <4 x i16> %b) #0 { 23184// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 23185// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 23186// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 23187// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 23188// CHECK: [[TMP4:%.*]] = and <4 x i16> [[TMP2]], [[TMP3]] 23189// CHECK: [[TMP5:%.*]] = icmp ne <4 x i16> [[TMP4]], zeroinitializer 23190// CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i16> 23191// CHECK: ret <4 x i16> [[VTST_I]] 23192uint16x4_t test_vtst_p16(poly16x4_t a, poly16x4_t b) { 23193 return vtst_p16(a, b); 23194} 23195 23196// CHECK-LABEL: define <16 x i8> @test_vtstq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 23197// CHECK: [[TMP0:%.*]] = and <16 x i8> %a, %b 23198// CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer 23199// CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8> 23200// CHECK: ret <16 x i8> [[VTST_I]] 23201uint8x16_t test_vtstq_s8(int8x16_t a, int8x16_t b) { 23202 return vtstq_s8(a, b); 23203} 23204 23205// CHECK-LABEL: define <8 x i16> @test_vtstq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 23206// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 23207// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 23208// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 23209// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 23210// CHECK: [[TMP4:%.*]] = and <8 x i16> [[TMP2]], [[TMP3]] 23211// CHECK: [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP4]], zeroinitializer 23212// CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16> 23213// CHECK: ret <8 x i16> [[VTST_I]] 23214uint16x8_t test_vtstq_s16(int16x8_t a, int16x8_t b) { 23215 return vtstq_s16(a, b); 23216} 23217 23218// CHECK-LABEL: define <4 x i32> @test_vtstq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 23219// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 23220// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 23221// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 23222// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 23223// CHECK: [[TMP4:%.*]] = and <4 x i32> [[TMP2]], [[TMP3]] 23224// CHECK: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP4]], zeroinitializer 23225// CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32> 23226// CHECK: ret <4 x i32> [[VTST_I]] 23227uint32x4_t test_vtstq_s32(int32x4_t a, int32x4_t b) { 23228 return vtstq_s32(a, b); 23229} 23230 23231// CHECK-LABEL: define <16 x i8> @test_vtstq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 23232// CHECK: [[TMP0:%.*]] = and <16 x i8> %a, %b 23233// CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer 23234// CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8> 23235// CHECK: ret <16 x i8> [[VTST_I]] 23236uint8x16_t test_vtstq_u8(uint8x16_t a, uint8x16_t b) { 23237 return vtstq_u8(a, b); 23238} 23239 23240// CHECK-LABEL: define <8 x i16> @test_vtstq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 23241// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 23242// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 23243// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 23244// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 23245// CHECK: [[TMP4:%.*]] = and <8 x i16> [[TMP2]], [[TMP3]] 23246// CHECK: [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP4]], zeroinitializer 23247// CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16> 23248// CHECK: ret <8 x i16> [[VTST_I]] 23249uint16x8_t test_vtstq_u16(uint16x8_t a, uint16x8_t b) { 23250 return vtstq_u16(a, b); 23251} 23252 23253// CHECK-LABEL: define <4 x i32> @test_vtstq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 23254// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 23255// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 23256// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 23257// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 23258// CHECK: [[TMP4:%.*]] = and <4 x i32> [[TMP2]], [[TMP3]] 23259// CHECK: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP4]], zeroinitializer 23260// CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32> 23261// CHECK: ret <4 x i32> [[VTST_I]] 23262uint32x4_t test_vtstq_u32(uint32x4_t a, uint32x4_t b) { 23263 return vtstq_u32(a, b); 23264} 23265 23266// CHECK-LABEL: define <16 x i8> @test_vtstq_p8(<16 x i8> %a, <16 x i8> %b) #0 { 23267// CHECK: [[TMP0:%.*]] = and <16 x i8> %a, %b 23268// CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer 23269// CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8> 23270// CHECK: ret <16 x i8> [[VTST_I]] 23271uint8x16_t test_vtstq_p8(poly8x16_t a, poly8x16_t b) { 23272 return vtstq_p8(a, b); 23273} 23274 23275// CHECK-LABEL: define <8 x i16> @test_vtstq_p16(<8 x i16> %a, <8 x i16> %b) #0 { 23276// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 23277// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 23278// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 23279// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 23280// CHECK: [[TMP4:%.*]] = and <8 x i16> [[TMP2]], [[TMP3]] 23281// CHECK: [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP4]], zeroinitializer 23282// CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16> 23283// CHECK: ret <8 x i16> [[VTST_I]] 23284uint16x8_t test_vtstq_p16(poly16x8_t a, poly16x8_t b) { 23285 return vtstq_p16(a, b); 23286} 23287 23288 23289// CHECK-LABEL: define void @test_vuzp_s8(%struct.int8x8x2_t* noalias sret %agg.result, <8 x i8> %a, <8 x i8> %b) #0 { 23290// CHECK: [[__RET_I:%.*]] = alloca %struct.int8x8x2_t, align 8 23291// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8* 23292// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>* 23293// CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 23294// CHECK: store <8 x i8> [[VUZP_I]], <8 x i8>* [[TMP1]] 23295// CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1 23296// CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 23297// CHECK: store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]] 23298// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* %agg.result to i8* 23299// CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8* 23300// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) #4 23301// CHECK: ret void 23302int8x8x2_t test_vuzp_s8(int8x8_t a, int8x8_t b) { 23303 return vuzp_s8(a, b); 23304} 23305 23306// CHECK-LABEL: define void @test_vuzp_s16(%struct.int16x4x2_t* noalias sret %agg.result, <4 x i16> %a, <4 x i16> %b) #0 { 23307// CHECK: [[__RET_I:%.*]] = alloca %struct.int16x4x2_t, align 8 23308// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8* 23309// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> 23310// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> 23311// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* 23312// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 23313// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 23314// CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 2, i32 4, i32 6> 23315// CHECK: store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]] 23316// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 23317// CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 1, i32 3, i32 5, i32 7> 23318// CHECK: store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP6]] 23319// CHECK: [[TMP7:%.*]] = bitcast %struct.int16x4x2_t* %agg.result to i8* 23320// CHECK: [[TMP8:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8* 23321// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 23322// CHECK: ret void 23323int16x4x2_t test_vuzp_s16(int16x4_t a, int16x4_t b) { 23324 return vuzp_s16(a, b); 23325} 23326 23327// CHECK-LABEL: define void @test_vuzp_s32(%struct.int32x2x2_t* noalias sret %agg.result, <2 x i32> %a, <2 x i32> %b) #0 { 23328// CHECK: [[__RET_I:%.*]] = alloca %struct.int32x2x2_t, align 8 23329// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8* 23330// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8> 23331// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> 23332// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* 23333// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 23334// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 23335// CHECK: [[VUZP_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 0, i32 2> 23336// CHECK: store <2 x i32> [[VUZP_I]], <2 x i32>* [[TMP3]] 23337// CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1 23338// CHECK: [[VUZP1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 1, i32 3> 23339// CHECK: store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP6]] 23340// CHECK: [[TMP7:%.*]] = bitcast %struct.int32x2x2_t* %agg.result to i8* 23341// CHECK: [[TMP8:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8* 23342// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 23343// CHECK: ret void 23344int32x2x2_t test_vuzp_s32(int32x2_t a, int32x2_t b) { 23345 return vuzp_s32(a, b); 23346} 23347 23348// CHECK-LABEL: define void @test_vuzp_u8(%struct.uint8x8x2_t* noalias sret %agg.result, <8 x i8> %a, <8 x i8> %b) #0 { 23349// CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x8x2_t, align 8 23350// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8* 23351// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>* 23352// CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 23353// CHECK: store <8 x i8> [[VUZP_I]], <8 x i8>* [[TMP1]] 23354// CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1 23355// CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 23356// CHECK: store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]] 23357// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* %agg.result to i8* 23358// CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8* 23359// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) #4 23360// CHECK: ret void 23361uint8x8x2_t test_vuzp_u8(uint8x8_t a, uint8x8_t b) { 23362 return vuzp_u8(a, b); 23363} 23364 23365// CHECK-LABEL: define void @test_vuzp_u16(%struct.uint16x4x2_t* noalias sret %agg.result, <4 x i16> %a, <4 x i16> %b) #0 { 23366// CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x4x2_t, align 8 23367// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8* 23368// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> 23369// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> 23370// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* 23371// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 23372// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 23373// CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 2, i32 4, i32 6> 23374// CHECK: store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]] 23375// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 23376// CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 1, i32 3, i32 5, i32 7> 23377// CHECK: store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP6]] 23378// CHECK: [[TMP7:%.*]] = bitcast %struct.uint16x4x2_t* %agg.result to i8* 23379// CHECK: [[TMP8:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8* 23380// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 23381// CHECK: ret void 23382uint16x4x2_t test_vuzp_u16(uint16x4_t a, uint16x4_t b) { 23383 return vuzp_u16(a, b); 23384} 23385 23386// CHECK-LABEL: define void @test_vuzp_u32(%struct.uint32x2x2_t* noalias sret %agg.result, <2 x i32> %a, <2 x i32> %b) #0 { 23387// CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x2x2_t, align 8 23388// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8* 23389// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8> 23390// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> 23391// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* 23392// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 23393// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 23394// CHECK: [[VUZP_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 0, i32 2> 23395// CHECK: store <2 x i32> [[VUZP_I]], <2 x i32>* [[TMP3]] 23396// CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1 23397// CHECK: [[VUZP1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 1, i32 3> 23398// CHECK: store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP6]] 23399// CHECK: [[TMP7:%.*]] = bitcast %struct.uint32x2x2_t* %agg.result to i8* 23400// CHECK: [[TMP8:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8* 23401// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 23402// CHECK: ret void 23403uint32x2x2_t test_vuzp_u32(uint32x2_t a, uint32x2_t b) { 23404 return vuzp_u32(a, b); 23405} 23406 23407// CHECK-LABEL: define void @test_vuzp_f32(%struct.float32x2x2_t* noalias sret %agg.result, <2 x float> %a, <2 x float> %b) #0 { 23408// CHECK: [[__RET_I:%.*]] = alloca %struct.float32x2x2_t, align 8 23409// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8* 23410// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8> 23411// CHECK: [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8> 23412// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x float>* 23413// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 23414// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> 23415// CHECK: [[VUZP_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 0, i32 2> 23416// CHECK: store <2 x float> [[VUZP_I]], <2 x float>* [[TMP3]] 23417// CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1 23418// CHECK: [[VUZP1_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 1, i32 3> 23419// CHECK: store <2 x float> [[VUZP1_I]], <2 x float>* [[TMP6]] 23420// CHECK: [[TMP7:%.*]] = bitcast %struct.float32x2x2_t* %agg.result to i8* 23421// CHECK: [[TMP8:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8* 23422// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 23423// CHECK: ret void 23424float32x2x2_t test_vuzp_f32(float32x2_t a, float32x2_t b) { 23425 return vuzp_f32(a, b); 23426} 23427 23428// CHECK-LABEL: define void @test_vuzp_p8(%struct.poly8x8x2_t* noalias sret %agg.result, <8 x i8> %a, <8 x i8> %b) #0 { 23429// CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x8x2_t, align 8 23430// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8* 23431// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>* 23432// CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 23433// CHECK: store <8 x i8> [[VUZP_I]], <8 x i8>* [[TMP1]] 23434// CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1 23435// CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 23436// CHECK: store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]] 23437// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* %agg.result to i8* 23438// CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8* 23439// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) #4 23440// CHECK: ret void 23441poly8x8x2_t test_vuzp_p8(poly8x8_t a, poly8x8_t b) { 23442 return vuzp_p8(a, b); 23443} 23444 23445// CHECK-LABEL: define void @test_vuzp_p16(%struct.poly16x4x2_t* noalias sret %agg.result, <4 x i16> %a, <4 x i16> %b) #0 { 23446// CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x4x2_t, align 8 23447// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8* 23448// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> 23449// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> 23450// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* 23451// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 23452// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 23453// CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 2, i32 4, i32 6> 23454// CHECK: store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]] 23455// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 23456// CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 1, i32 3, i32 5, i32 7> 23457// CHECK: store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP6]] 23458// CHECK: [[TMP7:%.*]] = bitcast %struct.poly16x4x2_t* %agg.result to i8* 23459// CHECK: [[TMP8:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8* 23460// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 23461// CHECK: ret void 23462poly16x4x2_t test_vuzp_p16(poly16x4_t a, poly16x4_t b) { 23463 return vuzp_p16(a, b); 23464} 23465 23466// CHECK-LABEL: define void @test_vuzpq_s8(%struct.int8x16x2_t* noalias sret %agg.result, <16 x i8> %a, <16 x i8> %b) #0 { 23467// CHECK: [[__RET_I:%.*]] = alloca %struct.int8x16x2_t, align 16 23468// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8* 23469// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* 23470// CHECK: [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> 23471// CHECK: store <16 x i8> [[VUZP_I]], <16 x i8>* [[TMP1]] 23472// CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1 23473// CHECK: [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31> 23474// CHECK: store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]] 23475// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* %agg.result to i8* 23476// CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8* 23477// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) #4 23478// CHECK: ret void 23479int8x16x2_t test_vuzpq_s8(int8x16_t a, int8x16_t b) { 23480 return vuzpq_s8(a, b); 23481} 23482 23483// CHECK-LABEL: define void @test_vuzpq_s16(%struct.int16x8x2_t* noalias sret %agg.result, <8 x i16> %a, <8 x i16> %b) #0 { 23484// CHECK: [[__RET_I:%.*]] = alloca %struct.int16x8x2_t, align 16 23485// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* 23486// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> 23487// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> 23488// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* 23489// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 23490// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> 23491// CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 23492// CHECK: store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]] 23493// CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 23494// CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 23495// CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP6]] 23496// CHECK: [[TMP7:%.*]] = bitcast %struct.int16x8x2_t* %agg.result to i8* 23497// CHECK: [[TMP8:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* 23498// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 23499// CHECK: ret void 23500int16x8x2_t test_vuzpq_s16(int16x8_t a, int16x8_t b) { 23501 return vuzpq_s16(a, b); 23502} 23503 23504// CHECK-LABEL: define void @test_vuzpq_s32(%struct.int32x4x2_t* noalias sret %agg.result, <4 x i32> %a, <4 x i32> %b) #0 { 23505// CHECK: [[__RET_I:%.*]] = alloca %struct.int32x4x2_t, align 16 23506// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* 23507// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8> 23508// CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8> 23509// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* 23510// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 23511// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> 23512// CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 0, i32 2, i32 4, i32 6> 23513// CHECK: store <4 x i32> [[VUZP_I]], <4 x i32>* [[TMP3]] 23514// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1 23515// CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 1, i32 3, i32 5, i32 7> 23516// CHECK: store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP6]] 23517// CHECK: [[TMP7:%.*]] = bitcast %struct.int32x4x2_t* %agg.result to i8* 23518// CHECK: [[TMP8:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* 23519// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 23520// CHECK: ret void 23521int32x4x2_t test_vuzpq_s32(int32x4_t a, int32x4_t b) { 23522 return vuzpq_s32(a, b); 23523} 23524 23525// CHECK-LABEL: define void @test_vuzpq_u8(%struct.uint8x16x2_t* noalias sret %agg.result, <16 x i8> %a, <16 x i8> %b) #0 { 23526// CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x16x2_t, align 16 23527// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8* 23528// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* 23529// CHECK: [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> 23530// CHECK: store <16 x i8> [[VUZP_I]], <16 x i8>* [[TMP1]] 23531// CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1 23532// CHECK: [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31> 23533// CHECK: store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]] 23534// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* %agg.result to i8* 23535// CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8* 23536// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) #4 23537// CHECK: ret void 23538uint8x16x2_t test_vuzpq_u8(uint8x16_t a, uint8x16_t b) { 23539 return vuzpq_u8(a, b); 23540} 23541 23542// CHECK-LABEL: define void @test_vuzpq_u16(%struct.uint16x8x2_t* noalias sret %agg.result, <8 x i16> %a, <8 x i16> %b) #0 { 23543// CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x8x2_t, align 16 23544// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* 23545// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> 23546// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> 23547// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* 23548// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 23549// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> 23550// CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 23551// CHECK: store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]] 23552// CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 23553// CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 23554// CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP6]] 23555// CHECK: [[TMP7:%.*]] = bitcast %struct.uint16x8x2_t* %agg.result to i8* 23556// CHECK: [[TMP8:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* 23557// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 23558// CHECK: ret void 23559uint16x8x2_t test_vuzpq_u16(uint16x8_t a, uint16x8_t b) { 23560 return vuzpq_u16(a, b); 23561} 23562 23563// CHECK-LABEL: define void @test_vuzpq_u32(%struct.uint32x4x2_t* noalias sret %agg.result, <4 x i32> %a, <4 x i32> %b) #0 { 23564// CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x4x2_t, align 16 23565// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* 23566// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8> 23567// CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8> 23568// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* 23569// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 23570// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> 23571// CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 0, i32 2, i32 4, i32 6> 23572// CHECK: store <4 x i32> [[VUZP_I]], <4 x i32>* [[TMP3]] 23573// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1 23574// CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 1, i32 3, i32 5, i32 7> 23575// CHECK: store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP6]] 23576// CHECK: [[TMP7:%.*]] = bitcast %struct.uint32x4x2_t* %agg.result to i8* 23577// CHECK: [[TMP8:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* 23578// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 23579// CHECK: ret void 23580uint32x4x2_t test_vuzpq_u32(uint32x4_t a, uint32x4_t b) { 23581 return vuzpq_u32(a, b); 23582} 23583 23584// CHECK-LABEL: define void @test_vuzpq_f32(%struct.float32x4x2_t* noalias sret %agg.result, <4 x float> %a, <4 x float> %b) #0 { 23585// CHECK: [[__RET_I:%.*]] = alloca %struct.float32x4x2_t, align 16 23586// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* 23587// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8> 23588// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8> 23589// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x float>* 23590// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 23591// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> 23592// CHECK: [[VUZP_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 2, i32 4, i32 6> 23593// CHECK: store <4 x float> [[VUZP_I]], <4 x float>* [[TMP3]] 23594// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1 23595// CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 1, i32 3, i32 5, i32 7> 23596// CHECK: store <4 x float> [[VUZP1_I]], <4 x float>* [[TMP6]] 23597// CHECK: [[TMP7:%.*]] = bitcast %struct.float32x4x2_t* %agg.result to i8* 23598// CHECK: [[TMP8:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* 23599// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 23600// CHECK: ret void 23601float32x4x2_t test_vuzpq_f32(float32x4_t a, float32x4_t b) { 23602 return vuzpq_f32(a, b); 23603} 23604 23605// CHECK-LABEL: define void @test_vuzpq_p8(%struct.poly8x16x2_t* noalias sret %agg.result, <16 x i8> %a, <16 x i8> %b) #0 { 23606// CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x16x2_t, align 16 23607// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8* 23608// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* 23609// CHECK: [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> 23610// CHECK: store <16 x i8> [[VUZP_I]], <16 x i8>* [[TMP1]] 23611// CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1 23612// CHECK: [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31> 23613// CHECK: store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]] 23614// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* %agg.result to i8* 23615// CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8* 23616// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) #4 23617// CHECK: ret void 23618poly8x16x2_t test_vuzpq_p8(poly8x16_t a, poly8x16_t b) { 23619 return vuzpq_p8(a, b); 23620} 23621 23622// CHECK-LABEL: define void @test_vuzpq_p16(%struct.poly16x8x2_t* noalias sret %agg.result, <8 x i16> %a, <8 x i16> %b) #0 { 23623// CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x8x2_t, align 16 23624// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* 23625// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> 23626// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> 23627// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* 23628// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 23629// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> 23630// CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 23631// CHECK: store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]] 23632// CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 23633// CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 23634// CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP6]] 23635// CHECK: [[TMP7:%.*]] = bitcast %struct.poly16x8x2_t* %agg.result to i8* 23636// CHECK: [[TMP8:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* 23637// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 23638// CHECK: ret void 23639poly16x8x2_t test_vuzpq_p16(poly16x8_t a, poly16x8_t b) { 23640 return vuzpq_p16(a, b); 23641} 23642 23643 23644// CHECK-LABEL: define void @test_vzip_s8(%struct.int8x8x2_t* noalias sret %agg.result, <8 x i8> %a, <8 x i8> %b) #0 { 23645// CHECK: [[__RET_I:%.*]] = alloca %struct.int8x8x2_t, align 8 23646// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8* 23647// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>* 23648// CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 23649// CHECK: store <8 x i8> [[VZIP_I]], <8 x i8>* [[TMP1]] 23650// CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1 23651// CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 23652// CHECK: store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]] 23653// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* %agg.result to i8* 23654// CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8* 23655// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) #4 23656// CHECK: ret void 23657int8x8x2_t test_vzip_s8(int8x8_t a, int8x8_t b) { 23658 return vzip_s8(a, b); 23659} 23660 23661// CHECK-LABEL: define void @test_vzip_s16(%struct.int16x4x2_t* noalias sret %agg.result, <4 x i16> %a, <4 x i16> %b) #0 { 23662// CHECK: [[__RET_I:%.*]] = alloca %struct.int16x4x2_t, align 8 23663// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8* 23664// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> 23665// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> 23666// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* 23667// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 23668// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 23669// CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 1, i32 5> 23670// CHECK: store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]] 23671// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 23672// CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 2, i32 6, i32 3, i32 7> 23673// CHECK: store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP6]] 23674// CHECK: [[TMP7:%.*]] = bitcast %struct.int16x4x2_t* %agg.result to i8* 23675// CHECK: [[TMP8:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8* 23676// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 23677// CHECK: ret void 23678int16x4x2_t test_vzip_s16(int16x4_t a, int16x4_t b) { 23679 return vzip_s16(a, b); 23680} 23681 23682// CHECK-LABEL: define void @test_vzip_s32(%struct.int32x2x2_t* noalias sret %agg.result, <2 x i32> %a, <2 x i32> %b) #0 { 23683// CHECK: [[__RET_I:%.*]] = alloca %struct.int32x2x2_t, align 8 23684// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8* 23685// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8> 23686// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> 23687// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* 23688// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 23689// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 23690// CHECK: [[VZIP_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 0, i32 2> 23691// CHECK: store <2 x i32> [[VZIP_I]], <2 x i32>* [[TMP3]] 23692// CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1 23693// CHECK: [[VZIP1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 1, i32 3> 23694// CHECK: store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP6]] 23695// CHECK: [[TMP7:%.*]] = bitcast %struct.int32x2x2_t* %agg.result to i8* 23696// CHECK: [[TMP8:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8* 23697// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 23698// CHECK: ret void 23699int32x2x2_t test_vzip_s32(int32x2_t a, int32x2_t b) { 23700 return vzip_s32(a, b); 23701} 23702 23703// CHECK-LABEL: define void @test_vzip_u8(%struct.uint8x8x2_t* noalias sret %agg.result, <8 x i8> %a, <8 x i8> %b) #0 { 23704// CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x8x2_t, align 8 23705// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8* 23706// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>* 23707// CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 23708// CHECK: store <8 x i8> [[VZIP_I]], <8 x i8>* [[TMP1]] 23709// CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1 23710// CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 23711// CHECK: store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]] 23712// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* %agg.result to i8* 23713// CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8* 23714// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) #4 23715// CHECK: ret void 23716uint8x8x2_t test_vzip_u8(uint8x8_t a, uint8x8_t b) { 23717 return vzip_u8(a, b); 23718} 23719 23720// CHECK-LABEL: define void @test_vzip_u16(%struct.uint16x4x2_t* noalias sret %agg.result, <4 x i16> %a, <4 x i16> %b) #0 { 23721// CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x4x2_t, align 8 23722// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8* 23723// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> 23724// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> 23725// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* 23726// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 23727// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 23728// CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 1, i32 5> 23729// CHECK: store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]] 23730// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 23731// CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 2, i32 6, i32 3, i32 7> 23732// CHECK: store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP6]] 23733// CHECK: [[TMP7:%.*]] = bitcast %struct.uint16x4x2_t* %agg.result to i8* 23734// CHECK: [[TMP8:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8* 23735// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 23736// CHECK: ret void 23737uint16x4x2_t test_vzip_u16(uint16x4_t a, uint16x4_t b) { 23738 return vzip_u16(a, b); 23739} 23740 23741// CHECK-LABEL: define void @test_vzip_u32(%struct.uint32x2x2_t* noalias sret %agg.result, <2 x i32> %a, <2 x i32> %b) #0 { 23742// CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x2x2_t, align 8 23743// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8* 23744// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8> 23745// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> 23746// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* 23747// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 23748// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 23749// CHECK: [[VZIP_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 0, i32 2> 23750// CHECK: store <2 x i32> [[VZIP_I]], <2 x i32>* [[TMP3]] 23751// CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1 23752// CHECK: [[VZIP1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 1, i32 3> 23753// CHECK: store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP6]] 23754// CHECK: [[TMP7:%.*]] = bitcast %struct.uint32x2x2_t* %agg.result to i8* 23755// CHECK: [[TMP8:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8* 23756// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 23757// CHECK: ret void 23758uint32x2x2_t test_vzip_u32(uint32x2_t a, uint32x2_t b) { 23759 return vzip_u32(a, b); 23760} 23761 23762// CHECK-LABEL: define void @test_vzip_f32(%struct.float32x2x2_t* noalias sret %agg.result, <2 x float> %a, <2 x float> %b) #0 { 23763// CHECK: [[__RET_I:%.*]] = alloca %struct.float32x2x2_t, align 8 23764// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8* 23765// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8> 23766// CHECK: [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8> 23767// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x float>* 23768// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 23769// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> 23770// CHECK: [[VZIP_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 0, i32 2> 23771// CHECK: store <2 x float> [[VZIP_I]], <2 x float>* [[TMP3]] 23772// CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1 23773// CHECK: [[VZIP1_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 1, i32 3> 23774// CHECK: store <2 x float> [[VZIP1_I]], <2 x float>* [[TMP6]] 23775// CHECK: [[TMP7:%.*]] = bitcast %struct.float32x2x2_t* %agg.result to i8* 23776// CHECK: [[TMP8:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8* 23777// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 23778// CHECK: ret void 23779float32x2x2_t test_vzip_f32(float32x2_t a, float32x2_t b) { 23780 return vzip_f32(a, b); 23781} 23782 23783// CHECK-LABEL: define void @test_vzip_p8(%struct.poly8x8x2_t* noalias sret %agg.result, <8 x i8> %a, <8 x i8> %b) #0 { 23784// CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x8x2_t, align 8 23785// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8* 23786// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>* 23787// CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 23788// CHECK: store <8 x i8> [[VZIP_I]], <8 x i8>* [[TMP1]] 23789// CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1 23790// CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 23791// CHECK: store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]] 23792// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* %agg.result to i8* 23793// CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8* 23794// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) #4 23795// CHECK: ret void 23796poly8x8x2_t test_vzip_p8(poly8x8_t a, poly8x8_t b) { 23797 return vzip_p8(a, b); 23798} 23799 23800// CHECK-LABEL: define void @test_vzip_p16(%struct.poly16x4x2_t* noalias sret %agg.result, <4 x i16> %a, <4 x i16> %b) #0 { 23801// CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x4x2_t, align 8 23802// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8* 23803// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> 23804// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> 23805// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* 23806// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 23807// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 23808// CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 1, i32 5> 23809// CHECK: store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]] 23810// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 23811// CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 2, i32 6, i32 3, i32 7> 23812// CHECK: store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP6]] 23813// CHECK: [[TMP7:%.*]] = bitcast %struct.poly16x4x2_t* %agg.result to i8* 23814// CHECK: [[TMP8:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8* 23815// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 23816// CHECK: ret void 23817poly16x4x2_t test_vzip_p16(poly16x4_t a, poly16x4_t b) { 23818 return vzip_p16(a, b); 23819} 23820 23821// CHECK-LABEL: define void @test_vzipq_s8(%struct.int8x16x2_t* noalias sret %agg.result, <16 x i8> %a, <16 x i8> %b) #0 { 23822// CHECK: [[__RET_I:%.*]] = alloca %struct.int8x16x2_t, align 16 23823// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8* 23824// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* 23825// CHECK: [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23> 23826// CHECK: store <16 x i8> [[VZIP_I]], <16 x i8>* [[TMP1]] 23827// CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1 23828// CHECK: [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> 23829// CHECK: store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]] 23830// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* %agg.result to i8* 23831// CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8* 23832// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) #4 23833// CHECK: ret void 23834int8x16x2_t test_vzipq_s8(int8x16_t a, int8x16_t b) { 23835 return vzipq_s8(a, b); 23836} 23837 23838// CHECK-LABEL: define void @test_vzipq_s16(%struct.int16x8x2_t* noalias sret %agg.result, <8 x i16> %a, <8 x i16> %b) #0 { 23839// CHECK: [[__RET_I:%.*]] = alloca %struct.int16x8x2_t, align 16 23840// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* 23841// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> 23842// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> 23843// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* 23844// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 23845// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> 23846// CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 23847// CHECK: store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]] 23848// CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 23849// CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 23850// CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP6]] 23851// CHECK: [[TMP7:%.*]] = bitcast %struct.int16x8x2_t* %agg.result to i8* 23852// CHECK: [[TMP8:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* 23853// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 23854// CHECK: ret void 23855int16x8x2_t test_vzipq_s16(int16x8_t a, int16x8_t b) { 23856 return vzipq_s16(a, b); 23857} 23858 23859// CHECK-LABEL: define void @test_vzipq_s32(%struct.int32x4x2_t* noalias sret %agg.result, <4 x i32> %a, <4 x i32> %b) #0 { 23860// CHECK: [[__RET_I:%.*]] = alloca %struct.int32x4x2_t, align 16 23861// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* 23862// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8> 23863// CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8> 23864// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* 23865// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 23866// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> 23867// CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 1, i32 5> 23868// CHECK: store <4 x i32> [[VZIP_I]], <4 x i32>* [[TMP3]] 23869// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1 23870// CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 2, i32 6, i32 3, i32 7> 23871// CHECK: store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP6]] 23872// CHECK: [[TMP7:%.*]] = bitcast %struct.int32x4x2_t* %agg.result to i8* 23873// CHECK: [[TMP8:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* 23874// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 23875// CHECK: ret void 23876int32x4x2_t test_vzipq_s32(int32x4_t a, int32x4_t b) { 23877 return vzipq_s32(a, b); 23878} 23879 23880// CHECK-LABEL: define void @test_vzipq_u8(%struct.uint8x16x2_t* noalias sret %agg.result, <16 x i8> %a, <16 x i8> %b) #0 { 23881// CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x16x2_t, align 16 23882// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8* 23883// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* 23884// CHECK: [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23> 23885// CHECK: store <16 x i8> [[VZIP_I]], <16 x i8>* [[TMP1]] 23886// CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1 23887// CHECK: [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> 23888// CHECK: store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]] 23889// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* %agg.result to i8* 23890// CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8* 23891// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) #4 23892// CHECK: ret void 23893uint8x16x2_t test_vzipq_u8(uint8x16_t a, uint8x16_t b) { 23894 return vzipq_u8(a, b); 23895} 23896 23897// CHECK-LABEL: define void @test_vzipq_u16(%struct.uint16x8x2_t* noalias sret %agg.result, <8 x i16> %a, <8 x i16> %b) #0 { 23898// CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x8x2_t, align 16 23899// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* 23900// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> 23901// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> 23902// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* 23903// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 23904// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> 23905// CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 23906// CHECK: store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]] 23907// CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 23908// CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 23909// CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP6]] 23910// CHECK: [[TMP7:%.*]] = bitcast %struct.uint16x8x2_t* %agg.result to i8* 23911// CHECK: [[TMP8:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* 23912// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 23913// CHECK: ret void 23914uint16x8x2_t test_vzipq_u16(uint16x8_t a, uint16x8_t b) { 23915 return vzipq_u16(a, b); 23916} 23917 23918// CHECK-LABEL: define void @test_vzipq_u32(%struct.uint32x4x2_t* noalias sret %agg.result, <4 x i32> %a, <4 x i32> %b) #0 { 23919// CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x4x2_t, align 16 23920// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* 23921// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8> 23922// CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8> 23923// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* 23924// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 23925// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> 23926// CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 1, i32 5> 23927// CHECK: store <4 x i32> [[VZIP_I]], <4 x i32>* [[TMP3]] 23928// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1 23929// CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 2, i32 6, i32 3, i32 7> 23930// CHECK: store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP6]] 23931// CHECK: [[TMP7:%.*]] = bitcast %struct.uint32x4x2_t* %agg.result to i8* 23932// CHECK: [[TMP8:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* 23933// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 23934// CHECK: ret void 23935uint32x4x2_t test_vzipq_u32(uint32x4_t a, uint32x4_t b) { 23936 return vzipq_u32(a, b); 23937} 23938 23939// CHECK-LABEL: define void @test_vzipq_f32(%struct.float32x4x2_t* noalias sret %agg.result, <4 x float> %a, <4 x float> %b) #0 { 23940// CHECK: [[__RET_I:%.*]] = alloca %struct.float32x4x2_t, align 16 23941// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* 23942// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8> 23943// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8> 23944// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x float>* 23945// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 23946// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> 23947// CHECK: [[VZIP_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 1, i32 5> 23948// CHECK: store <4 x float> [[VZIP_I]], <4 x float>* [[TMP3]] 23949// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1 23950// CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 2, i32 6, i32 3, i32 7> 23951// CHECK: store <4 x float> [[VZIP1_I]], <4 x float>* [[TMP6]] 23952// CHECK: [[TMP7:%.*]] = bitcast %struct.float32x4x2_t* %agg.result to i8* 23953// CHECK: [[TMP8:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* 23954// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 23955// CHECK: ret void 23956float32x4x2_t test_vzipq_f32(float32x4_t a, float32x4_t b) { 23957 return vzipq_f32(a, b); 23958} 23959 23960// CHECK-LABEL: define void @test_vzipq_p8(%struct.poly8x16x2_t* noalias sret %agg.result, <16 x i8> %a, <16 x i8> %b) #0 { 23961// CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x16x2_t, align 16 23962// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8* 23963// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* 23964// CHECK: [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23> 23965// CHECK: store <16 x i8> [[VZIP_I]], <16 x i8>* [[TMP1]] 23966// CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1 23967// CHECK: [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> 23968// CHECK: store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]] 23969// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* %agg.result to i8* 23970// CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8* 23971// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) #4 23972// CHECK: ret void 23973poly8x16x2_t test_vzipq_p8(poly8x16_t a, poly8x16_t b) { 23974 return vzipq_p8(a, b); 23975} 23976 23977// CHECK-LABEL: define void @test_vzipq_p16(%struct.poly16x8x2_t* noalias sret %agg.result, <8 x i16> %a, <8 x i16> %b) #0 { 23978// CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x8x2_t, align 16 23979// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* 23980// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> 23981// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> 23982// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* 23983// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 23984// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> 23985// CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 23986// CHECK: store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]] 23987// CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 23988// CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 23989// CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP6]] 23990// CHECK: [[TMP7:%.*]] = bitcast %struct.poly16x8x2_t* %agg.result to i8* 23991// CHECK: [[TMP8:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* 23992// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 23993// CHECK: ret void 23994poly16x8x2_t test_vzipq_p16(poly16x8_t a, poly16x8_t b) { 23995 return vzipq_p16(a, b); 23996} 23997 23998 23999