1// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s 2 3// Test new aarch64 intrinsics and types 4 5#include <arm_neon.h> 6 7// CHECK-LABEL: define <4 x i16> @test_vmla_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) #0 { 8// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 9// CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] 10// CHECK: [[ADD:%.*]] = add <4 x i16> %a, [[MUL]] 11// CHECK: ret <4 x i16> [[ADD]] 12int16x4_t test_vmla_lane_s16(int16x4_t a, int16x4_t b, int16x4_t v) { 13 return vmla_lane_s16(a, b, v, 3); 14} 15 16// CHECK-LABEL: define <8 x i16> @test_vmlaq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) #0 { 17// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 18// CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] 19// CHECK: [[ADD:%.*]] = add <8 x i16> %a, [[MUL]] 20// CHECK: ret <8 x i16> [[ADD]] 21int16x8_t test_vmlaq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t v) { 22 return vmlaq_lane_s16(a, b, v, 3); 23} 24 25// CHECK-LABEL: define <2 x i32> @test_vmla_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) #0 { 26// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> 27// CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] 28// CHECK: [[ADD:%.*]] = add <2 x i32> %a, [[MUL]] 29// CHECK: ret <2 x i32> [[ADD]] 30int32x2_t test_vmla_lane_s32(int32x2_t a, int32x2_t b, int32x2_t v) { 31 return vmla_lane_s32(a, b, v, 1); 32} 33 34// CHECK-LABEL: define <4 x i32> @test_vmlaq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) #0 { 35// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 36// CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] 37// CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[MUL]] 38// CHECK: ret <4 x i32> [[ADD]] 39int32x4_t test_vmlaq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t v) { 40 return vmlaq_lane_s32(a, b, v, 1); 41} 42 43// CHECK-LABEL: define <4 x i16> @test_vmla_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) #0 { 44// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 45// CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] 46// CHECK: [[ADD:%.*]] = add <4 x i16> %a, [[MUL]] 47// CHECK: ret <4 x i16> [[ADD]] 48int16x4_t test_vmla_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) { 49 return vmla_laneq_s16(a, b, v, 7); 50} 51 52// CHECK-LABEL: define <8 x i16> @test_vmlaq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) #0 { 53// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 54// CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] 55// CHECK: [[ADD:%.*]] = add <8 x i16> %a, [[MUL]] 56// CHECK: ret <8 x i16> [[ADD]] 57int16x8_t test_vmlaq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) { 58 return vmlaq_laneq_s16(a, b, v, 7); 59} 60 61// CHECK-LABEL: define <2 x i32> @test_vmla_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) #0 { 62// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> 63// CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] 64// CHECK: [[ADD:%.*]] = add <2 x i32> %a, [[MUL]] 65// CHECK: ret <2 x i32> [[ADD]] 66int32x2_t test_vmla_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) { 67 return vmla_laneq_s32(a, b, v, 3); 68} 69 70// CHECK-LABEL: define <4 x i32> @test_vmlaq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) #0 { 71// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 72// CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] 73// CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[MUL]] 74// CHECK: ret <4 x i32> [[ADD]] 75int32x4_t test_vmlaq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) { 76 return vmlaq_laneq_s32(a, b, v, 3); 77} 78 79// CHECK-LABEL: define <4 x i16> @test_vmls_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) #0 { 80// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 81// CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] 82// CHECK: [[SUB:%.*]] = sub <4 x i16> %a, [[MUL]] 83// CHECK: ret <4 x i16> [[SUB]] 84int16x4_t test_vmls_lane_s16(int16x4_t a, int16x4_t b, int16x4_t v) { 85 return vmls_lane_s16(a, b, v, 3); 86} 87 88// CHECK-LABEL: define <8 x i16> @test_vmlsq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) #0 { 89// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 90// CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] 91// CHECK: [[SUB:%.*]] = sub <8 x i16> %a, [[MUL]] 92// CHECK: ret <8 x i16> [[SUB]] 93int16x8_t test_vmlsq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t v) { 94 return vmlsq_lane_s16(a, b, v, 3); 95} 96 97// CHECK-LABEL: define <2 x i32> @test_vmls_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) #0 { 98// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> 99// CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] 100// CHECK: [[SUB:%.*]] = sub <2 x i32> %a, [[MUL]] 101// CHECK: ret <2 x i32> [[SUB]] 102int32x2_t test_vmls_lane_s32(int32x2_t a, int32x2_t b, int32x2_t v) { 103 return vmls_lane_s32(a, b, v, 1); 104} 105 106// CHECK-LABEL: define <4 x i32> @test_vmlsq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) #0 { 107// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 108// CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] 109// CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[MUL]] 110// CHECK: ret <4 x i32> [[SUB]] 111int32x4_t test_vmlsq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t v) { 112 return vmlsq_lane_s32(a, b, v, 1); 113} 114 115// CHECK-LABEL: define <4 x i16> @test_vmls_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) #0 { 116// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 117// CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] 118// CHECK: [[SUB:%.*]] = sub <4 x i16> %a, [[MUL]] 119// CHECK: ret <4 x i16> [[SUB]] 120int16x4_t test_vmls_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) { 121 return vmls_laneq_s16(a, b, v, 7); 122} 123 124// CHECK-LABEL: define <8 x i16> @test_vmlsq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) #0 { 125// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 126// CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] 127// CHECK: [[SUB:%.*]] = sub <8 x i16> %a, [[MUL]] 128// CHECK: ret <8 x i16> [[SUB]] 129int16x8_t test_vmlsq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) { 130 return vmlsq_laneq_s16(a, b, v, 7); 131} 132 133// CHECK-LABEL: define <2 x i32> @test_vmls_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) #0 { 134// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> 135// CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] 136// CHECK: [[SUB:%.*]] = sub <2 x i32> %a, [[MUL]] 137// CHECK: ret <2 x i32> [[SUB]] 138int32x2_t test_vmls_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) { 139 return vmls_laneq_s32(a, b, v, 3); 140} 141 142// CHECK-LABEL: define <4 x i32> @test_vmlsq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) #0 { 143// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 144// CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] 145// CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[MUL]] 146// CHECK: ret <4 x i32> [[SUB]] 147int32x4_t test_vmlsq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) { 148 return vmlsq_laneq_s32(a, b, v, 3); 149} 150 151// CHECK-LABEL: define <4 x i16> @test_vmul_lane_s16(<4 x i16> %a, <4 x i16> %v) #0 { 152// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 153// CHECK: [[MUL:%.*]] = mul <4 x i16> %a, [[SHUFFLE]] 154// CHECK: ret <4 x i16> [[MUL]] 155int16x4_t test_vmul_lane_s16(int16x4_t a, int16x4_t v) { 156 return vmul_lane_s16(a, v, 3); 157} 158 159// CHECK-LABEL: define <8 x i16> @test_vmulq_lane_s16(<8 x i16> %a, <4 x i16> %v) #0 { 160// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 161// CHECK: [[MUL:%.*]] = mul <8 x i16> %a, [[SHUFFLE]] 162// CHECK: ret <8 x i16> [[MUL]] 163int16x8_t test_vmulq_lane_s16(int16x8_t a, int16x4_t v) { 164 return vmulq_lane_s16(a, v, 3); 165} 166 167// CHECK-LABEL: define <2 x i32> @test_vmul_lane_s32(<2 x i32> %a, <2 x i32> %v) #0 { 168// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> 169// CHECK: [[MUL:%.*]] = mul <2 x i32> %a, [[SHUFFLE]] 170// CHECK: ret <2 x i32> [[MUL]] 171int32x2_t test_vmul_lane_s32(int32x2_t a, int32x2_t v) { 172 return vmul_lane_s32(a, v, 1); 173} 174 175// CHECK-LABEL: define <4 x i32> @test_vmulq_lane_s32(<4 x i32> %a, <2 x i32> %v) #0 { 176// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 177// CHECK: [[MUL:%.*]] = mul <4 x i32> %a, [[SHUFFLE]] 178// CHECK: ret <4 x i32> [[MUL]] 179int32x4_t test_vmulq_lane_s32(int32x4_t a, int32x2_t v) { 180 return vmulq_lane_s32(a, v, 1); 181} 182 183// CHECK-LABEL: define <4 x i16> @test_vmul_lane_u16(<4 x i16> %a, <4 x i16> %v) #0 { 184// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 185// CHECK: [[MUL:%.*]] = mul <4 x i16> %a, [[SHUFFLE]] 186// CHECK: ret <4 x i16> [[MUL]] 187uint16x4_t test_vmul_lane_u16(uint16x4_t a, uint16x4_t v) { 188 return vmul_lane_u16(a, v, 3); 189} 190 191// CHECK-LABEL: define <8 x i16> @test_vmulq_lane_u16(<8 x i16> %a, <4 x i16> %v) #0 { 192// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 193// CHECK: [[MUL:%.*]] = mul <8 x i16> %a, [[SHUFFLE]] 194// CHECK: ret <8 x i16> [[MUL]] 195uint16x8_t test_vmulq_lane_u16(uint16x8_t a, uint16x4_t v) { 196 return vmulq_lane_u16(a, v, 3); 197} 198 199// CHECK-LABEL: define <2 x i32> @test_vmul_lane_u32(<2 x i32> %a, <2 x i32> %v) #0 { 200// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> 201// CHECK: [[MUL:%.*]] = mul <2 x i32> %a, [[SHUFFLE]] 202// CHECK: ret <2 x i32> [[MUL]] 203uint32x2_t test_vmul_lane_u32(uint32x2_t a, uint32x2_t v) { 204 return vmul_lane_u32(a, v, 1); 205} 206 207// CHECK-LABEL: define <4 x i32> @test_vmulq_lane_u32(<4 x i32> %a, <2 x i32> %v) #0 { 208// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 209// CHECK: [[MUL:%.*]] = mul <4 x i32> %a, [[SHUFFLE]] 210// CHECK: ret <4 x i32> [[MUL]] 211uint32x4_t test_vmulq_lane_u32(uint32x4_t a, uint32x2_t v) { 212 return vmulq_lane_u32(a, v, 1); 213} 214 215// CHECK-LABEL: define <4 x i16> @test_vmul_laneq_s16(<4 x i16> %a, <8 x i16> %v) #0 { 216// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 217// CHECK: [[MUL:%.*]] = mul <4 x i16> %a, [[SHUFFLE]] 218// CHECK: ret <4 x i16> [[MUL]] 219int16x4_t test_vmul_laneq_s16(int16x4_t a, int16x8_t v) { 220 return vmul_laneq_s16(a, v, 7); 221} 222 223// CHECK-LABEL: define <8 x i16> @test_vmulq_laneq_s16(<8 x i16> %a, <8 x i16> %v) #0 { 224// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 225// CHECK: [[MUL:%.*]] = mul <8 x i16> %a, [[SHUFFLE]] 226// CHECK: ret <8 x i16> [[MUL]] 227int16x8_t test_vmulq_laneq_s16(int16x8_t a, int16x8_t v) { 228 return vmulq_laneq_s16(a, v, 7); 229} 230 231// CHECK-LABEL: define <2 x i32> @test_vmul_laneq_s32(<2 x i32> %a, <4 x i32> %v) #0 { 232// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> 233// CHECK: [[MUL:%.*]] = mul <2 x i32> %a, [[SHUFFLE]] 234// CHECK: ret <2 x i32> [[MUL]] 235int32x2_t test_vmul_laneq_s32(int32x2_t a, int32x4_t v) { 236 return vmul_laneq_s32(a, v, 3); 237} 238 239// CHECK-LABEL: define <4 x i32> @test_vmulq_laneq_s32(<4 x i32> %a, <4 x i32> %v) #0 { 240// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 241// CHECK: [[MUL:%.*]] = mul <4 x i32> %a, [[SHUFFLE]] 242// CHECK: ret <4 x i32> [[MUL]] 243int32x4_t test_vmulq_laneq_s32(int32x4_t a, int32x4_t v) { 244 return vmulq_laneq_s32(a, v, 3); 245} 246 247// CHECK-LABEL: define <4 x i16> @test_vmul_laneq_u16(<4 x i16> %a, <8 x i16> %v) #0 { 248// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 249// CHECK: [[MUL:%.*]] = mul <4 x i16> %a, [[SHUFFLE]] 250// CHECK: ret <4 x i16> [[MUL]] 251uint16x4_t test_vmul_laneq_u16(uint16x4_t a, uint16x8_t v) { 252 return vmul_laneq_u16(a, v, 7); 253} 254 255// CHECK-LABEL: define <8 x i16> @test_vmulq_laneq_u16(<8 x i16> %a, <8 x i16> %v) #0 { 256// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 257// CHECK: [[MUL:%.*]] = mul <8 x i16> %a, [[SHUFFLE]] 258// CHECK: ret <8 x i16> [[MUL]] 259uint16x8_t test_vmulq_laneq_u16(uint16x8_t a, uint16x8_t v) { 260 return vmulq_laneq_u16(a, v, 7); 261} 262 263// CHECK-LABEL: define <2 x i32> @test_vmul_laneq_u32(<2 x i32> %a, <4 x i32> %v) #0 { 264// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> 265// CHECK: [[MUL:%.*]] = mul <2 x i32> %a, [[SHUFFLE]] 266// CHECK: ret <2 x i32> [[MUL]] 267uint32x2_t test_vmul_laneq_u32(uint32x2_t a, uint32x4_t v) { 268 return vmul_laneq_u32(a, v, 3); 269} 270 271// CHECK-LABEL: define <4 x i32> @test_vmulq_laneq_u32(<4 x i32> %a, <4 x i32> %v) #0 { 272// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 273// CHECK: [[MUL:%.*]] = mul <4 x i32> %a, [[SHUFFLE]] 274// CHECK: ret <4 x i32> [[MUL]] 275uint32x4_t test_vmulq_laneq_u32(uint32x4_t a, uint32x4_t v) { 276 return vmulq_laneq_u32(a, v, 3); 277} 278 279// CHECK-LABEL: define <2 x float> @test_vfma_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) #0 { 280// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 281// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 282// CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v to <8 x i8> 283// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> 284// CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <2 x i32> <i32 1, i32 1> 285// CHECK: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 286// CHECK: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 287// CHECK: [[FMLA2:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[FMLA]], <2 x float> [[LANE]], <2 x float> [[FMLA1]]) 288// CHECK: ret <2 x float> [[FMLA2]] 289float32x2_t test_vfma_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) { 290 return vfma_lane_f32(a, b, v, 1); 291} 292 293// CHECK-LABEL: define <4 x float> @test_vfmaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 { 294// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 295// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 296// CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v to <8 x i8> 297// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> 298// CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <4 x i32> <i32 1, i32 1, i32 1, i32 1> 299// CHECK: [[FMLA:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 300// CHECK: [[FMLA1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 301// CHECK: [[FMLA2:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[FMLA]], <4 x float> [[LANE]], <4 x float> [[FMLA1]]) 302// CHECK: ret <4 x float> [[FMLA2]] 303float32x4_t test_vfmaq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) { 304 return vfmaq_lane_f32(a, b, v, 1); 305} 306 307// CHECK-LABEL: define <2 x float> @test_vfma_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 { 308// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 309// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 310// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v to <16 x i8> 311// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 312// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 313// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> 314// CHECK: [[LANE:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP5]], <2 x i32> <i32 3, i32 3> 315// CHECK: [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[LANE]], <2 x float> [[TMP4]], <2 x float> [[TMP3]]) 316// CHECK: ret <2 x float> [[TMP6]] 317float32x2_t test_vfma_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) { 318 return vfma_laneq_f32(a, b, v, 3); 319} 320 321// CHECK-LABEL: define <4 x float> @test_vfmaq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 { 322// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 323// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 324// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v to <16 x i8> 325// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 326// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 327// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> 328// CHECK: [[LANE:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP5]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> 329// CHECK: [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[LANE]], <4 x float> [[TMP4]], <4 x float> [[TMP3]]) 330// CHECK: ret <4 x float> [[TMP6]] 331float32x4_t test_vfmaq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) { 332 return vfmaq_laneq_f32(a, b, v, 3); 333} 334 335// CHECK-LABEL: define <2 x float> @test_vfms_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) #0 { 336// CHECK: [[SUB:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %b 337// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 338// CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SUB]] to <8 x i8> 339// CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v to <8 x i8> 340// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> 341// CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <2 x i32> <i32 1, i32 1> 342// CHECK: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 343// CHECK: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 344// CHECK: [[FMLA2:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[FMLA]], <2 x float> [[LANE]], <2 x float> [[FMLA1]]) 345// CHECK: ret <2 x float> [[FMLA2]] 346float32x2_t test_vfms_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) { 347 return vfms_lane_f32(a, b, v, 1); 348} 349 350// CHECK-LABEL: define <4 x float> @test_vfmsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 { 351// CHECK: [[SUB:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b 352// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 353// CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SUB]] to <16 x i8> 354// CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v to <8 x i8> 355// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> 356// CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <4 x i32> <i32 1, i32 1, i32 1, i32 1> 357// CHECK: [[FMLA:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 358// CHECK: [[FMLA1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 359// CHECK: [[FMLA2:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[FMLA]], <4 x float> [[LANE]], <4 x float> [[FMLA1]]) 360// CHECK: ret <4 x float> [[FMLA2]] 361float32x4_t test_vfmsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) { 362 return vfmsq_lane_f32(a, b, v, 1); 363} 364 365// CHECK-LABEL: define <2 x float> @test_vfms_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 { 366// CHECK: [[SUB:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %b 367// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 368// CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SUB]] to <8 x i8> 369// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v to <16 x i8> 370// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 371// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 372// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> 373// CHECK: [[LANE:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP5]], <2 x i32> <i32 3, i32 3> 374// CHECK: [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[LANE]], <2 x float> [[TMP4]], <2 x float> [[TMP3]]) 375// CHECK: ret <2 x float> [[TMP6]] 376float32x2_t test_vfms_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) { 377 return vfms_laneq_f32(a, b, v, 3); 378} 379 380// CHECK-LABEL: define <4 x float> @test_vfmsq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 { 381// CHECK: [[SUB:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b 382// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 383// CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SUB]] to <16 x i8> 384// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v to <16 x i8> 385// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 386// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 387// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> 388// CHECK: [[LANE:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP5]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> 389// CHECK: [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[LANE]], <4 x float> [[TMP4]], <4 x float> [[TMP3]]) 390// CHECK: ret <4 x float> [[TMP6]] 391float32x4_t test_vfmsq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) { 392 return vfmsq_laneq_f32(a, b, v, 3); 393} 394 395// CHECK-LABEL: define <2 x double> @test_vfmaq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) #0 { 396// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 397// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> 398// CHECK: [[TMP2:%.*]] = bitcast <1 x double> %v to <8 x i8> 399// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double> 400// CHECK: [[LANE:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> [[TMP3]], <2 x i32> zeroinitializer 401// CHECK: [[FMLA:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 402// CHECK: [[FMLA1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 403// CHECK: [[FMLA2:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[FMLA]], <2 x double> [[LANE]], <2 x double> [[FMLA1]]) 404// CHECK: ret <2 x double> [[FMLA2]] 405float64x2_t test_vfmaq_lane_f64(float64x2_t a, float64x2_t b, float64x1_t v) { 406 return vfmaq_lane_f64(a, b, v, 0); 407} 408 409// CHECK-LABEL: define <2 x double> @test_vfmaq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) #0 { 410// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 411// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> 412// CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v to <16 x i8> 413// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 414// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 415// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double> 416// CHECK: [[LANE:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP5]], <2 x i32> <i32 1, i32 1> 417// CHECK: [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[LANE]], <2 x double> [[TMP4]], <2 x double> [[TMP3]]) 418// CHECK: ret <2 x double> [[TMP6]] 419float64x2_t test_vfmaq_laneq_f64(float64x2_t a, float64x2_t b, float64x2_t v) { 420 return vfmaq_laneq_f64(a, b, v, 1); 421} 422 423// CHECK-LABEL: define <2 x double> @test_vfmsq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) #0 { 424// CHECK: [[SUB:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %b 425// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 426// CHECK: [[TMP1:%.*]] = bitcast <2 x double> [[SUB]] to <16 x i8> 427// CHECK: [[TMP2:%.*]] = bitcast <1 x double> %v to <8 x i8> 428// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double> 429// CHECK: [[LANE:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> [[TMP3]], <2 x i32> zeroinitializer 430// CHECK: [[FMLA:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 431// CHECK: [[FMLA1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 432// CHECK: [[FMLA2:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[FMLA]], <2 x double> [[LANE]], <2 x double> [[FMLA1]]) 433// CHECK: ret <2 x double> [[FMLA2]] 434float64x2_t test_vfmsq_lane_f64(float64x2_t a, float64x2_t b, float64x1_t v) { 435 return vfmsq_lane_f64(a, b, v, 0); 436} 437 438// CHECK-LABEL: define <2 x double> @test_vfmsq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) #0 { 439// CHECK: [[SUB:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %b 440// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 441// CHECK: [[TMP1:%.*]] = bitcast <2 x double> [[SUB]] to <16 x i8> 442// CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v to <16 x i8> 443// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 444// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 445// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double> 446// CHECK: [[LANE:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP5]], <2 x i32> <i32 1, i32 1> 447// CHECK: [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[LANE]], <2 x double> [[TMP4]], <2 x double> [[TMP3]]) 448// CHECK: ret <2 x double> [[TMP6]] 449float64x2_t test_vfmsq_laneq_f64(float64x2_t a, float64x2_t b, float64x2_t v) { 450 return vfmsq_laneq_f64(a, b, v, 1); 451} 452 453// CHECK-LABEL: define float @test_vfmas_laneq_f32(float %a, float %b, <4 x float> %v) #0 { 454// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v to <16 x i8> 455// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 456// CHECK: [[EXTRACT:%.*]] = extractelement <4 x float> [[TMP1]], i32 3 457// CHECK: [[TMP2:%.*]] = call float @llvm.fma.f32(float %b, float [[EXTRACT]], float %a) 458// CHECK: ret float [[TMP2]] 459float32_t test_vfmas_laneq_f32(float32_t a, float32_t b, float32x4_t v) { 460 return vfmas_laneq_f32(a, b, v, 3); 461} 462 463// CHECK-LABEL: define double @test_vfmsd_lane_f64(double %a, double %b, <1 x double> %v) #0 { 464// CHECK: [[SUB:%.*]] = fsub double -0.000000e+00, %b 465// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %v to <8 x i8> 466// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 467// CHECK: [[EXTRACT:%.*]] = extractelement <1 x double> [[TMP1]], i32 0 468// CHECK: [[TMP2:%.*]] = call double @llvm.fma.f64(double [[SUB]], double [[EXTRACT]], double %a) 469// CHECK: ret double [[TMP2]] 470float64_t test_vfmsd_lane_f64(float64_t a, float64_t b, float64x1_t v) { 471 return vfmsd_lane_f64(a, b, v, 0); 472} 473 474// CHECK-LABEL: define float @test_vfmss_laneq_f32(float %a, float %b, <4 x float> %v) #0 { 475// CHECK: [[SUB:%.*]] = fsub float -0.000000e+00, %b 476// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v to <16 x i8> 477// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 478// CHECK: [[EXTRACT:%.*]] = extractelement <4 x float> [[TMP1]], i32 3 479// CHECK: [[TMP2:%.*]] = call float @llvm.fma.f32(float [[SUB]], float [[EXTRACT]], float %a) 480// CHECK: ret float [[TMP2]] 481float32_t test_vfmss_laneq_f32(float32_t a, float32_t b, float32x4_t v) { 482 return vfmss_laneq_f32(a, b, v, 3); 483} 484 485// CHECK-LABEL: define double @test_vfmsd_laneq_f64(double %a, double %b, <2 x double> %v) #0 { 486// CHECK: [[SUB:%.*]] = fsub double -0.000000e+00, %b 487// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v to <16 x i8> 488// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 489// CHECK: [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 490// CHECK: [[TMP2:%.*]] = call double @llvm.fma.f64(double [[SUB]], double [[EXTRACT]], double %a) 491// CHECK: ret double [[TMP2]] 492float64_t test_vfmsd_laneq_f64(float64_t a, float64_t b, float64x2_t v) { 493 return vfmsd_laneq_f64(a, b, v, 1); 494} 495 496// CHECK-LABEL: define <4 x i32> @test_vmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) #0 { 497// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 498// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 499// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 500// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 501// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 502// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 503// CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] 504// CHECK: ret <4 x i32> [[ADD]] 505int32x4_t test_vmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v) { 506 return vmlal_lane_s16(a, b, v, 3); 507} 508 509// CHECK-LABEL: define <2 x i64> @test_vmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) #0 { 510// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> 511// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 512// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 513// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 514// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 515// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 516// CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] 517// CHECK: ret <2 x i64> [[ADD]] 518int64x2_t test_vmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) { 519 return vmlal_lane_s32(a, b, v, 1); 520} 521 522// CHECK-LABEL: define <4 x i32> @test_vmlal_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) #0 { 523// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 524// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 525// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 526// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 527// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 528// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 529// CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] 530// CHECK: ret <4 x i32> [[ADD]] 531int32x4_t test_vmlal_laneq_s16(int32x4_t a, int16x4_t b, int16x8_t v) { 532 return vmlal_laneq_s16(a, b, v, 7); 533} 534 535// CHECK-LABEL: define <2 x i64> @test_vmlal_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) #0 { 536// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> 537// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 538// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 539// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 540// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 541// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 542// CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] 543// CHECK: ret <2 x i64> [[ADD]] 544int64x2_t test_vmlal_laneq_s32(int64x2_t a, int32x2_t b, int32x4_t v) { 545 return vmlal_laneq_s32(a, b, v, 3); 546} 547 548// CHECK-LABEL: define <4 x i32> @test_vmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) #0 { 549// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 550// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 551// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 552// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 553// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 554// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 555// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 556// CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] 557// CHECK: ret <4 x i32> [[ADD]] 558int32x4_t test_vmlal_high_lane_s16(int32x4_t a, int16x8_t b, int16x4_t v) { 559 return vmlal_high_lane_s16(a, b, v, 3); 560} 561 562// CHECK-LABEL: define <2 x i64> @test_vmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) #0 { 563// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 564// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> 565// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 566// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 567// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 568// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 569// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 570// CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] 571// CHECK: ret <2 x i64> [[ADD]] 572int64x2_t test_vmlal_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) { 573 return vmlal_high_lane_s32(a, b, v, 1); 574} 575 576// CHECK-LABEL: define <4 x i32> @test_vmlal_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) #0 { 577// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 578// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 579// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 580// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 581// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 582// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 583// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 584// CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] 585// CHECK: ret <4 x i32> [[ADD]] 586int32x4_t test_vmlal_high_laneq_s16(int32x4_t a, int16x8_t b, int16x8_t v) { 587 return vmlal_high_laneq_s16(a, b, v, 7); 588} 589 590// CHECK-LABEL: define <2 x i64> @test_vmlal_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) #0 { 591// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 592// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> 593// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 594// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 595// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 596// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 597// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 598// CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] 599// CHECK: ret <2 x i64> [[ADD]] 600int64x2_t test_vmlal_high_laneq_s32(int64x2_t a, int32x4_t b, int32x4_t v) { 601 return vmlal_high_laneq_s32(a, b, v, 3); 602} 603 604// CHECK-LABEL: define <4 x i32> @test_vmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) #0 { 605// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 606// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 607// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 608// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 609// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 610// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 611// CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] 612// CHECK: ret <4 x i32> [[SUB]] 613int32x4_t test_vmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v) { 614 return vmlsl_lane_s16(a, b, v, 3); 615} 616 617// CHECK-LABEL: define <2 x i64> @test_vmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) #0 { 618// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> 619// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 620// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 621// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 622// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 623// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 624// CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] 625// CHECK: ret <2 x i64> [[SUB]] 626int64x2_t test_vmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) { 627 return vmlsl_lane_s32(a, b, v, 1); 628} 629 630// CHECK-LABEL: define <4 x i32> @test_vmlsl_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) #0 { 631// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 632// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 633// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 634// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 635// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 636// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 637// CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] 638// CHECK: ret <4 x i32> [[SUB]] 639int32x4_t test_vmlsl_laneq_s16(int32x4_t a, int16x4_t b, int16x8_t v) { 640 return vmlsl_laneq_s16(a, b, v, 7); 641} 642 643// CHECK-LABEL: define <2 x i64> @test_vmlsl_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) #0 { 644// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> 645// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 646// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 647// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 648// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 649// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 650// CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] 651// CHECK: ret <2 x i64> [[SUB]] 652int64x2_t test_vmlsl_laneq_s32(int64x2_t a, int32x2_t b, int32x4_t v) { 653 return vmlsl_laneq_s32(a, b, v, 3); 654} 655 656// CHECK-LABEL: define <4 x i32> @test_vmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) #0 { 657// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 658// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 659// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 660// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 661// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 662// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 663// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 664// CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] 665// CHECK: ret <4 x i32> [[SUB]] 666int32x4_t test_vmlsl_high_lane_s16(int32x4_t a, int16x8_t b, int16x4_t v) { 667 return vmlsl_high_lane_s16(a, b, v, 3); 668} 669 670// CHECK-LABEL: define <2 x i64> @test_vmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) #0 { 671// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 672// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> 673// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 674// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 675// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 676// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 677// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 678// CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] 679// CHECK: ret <2 x i64> [[SUB]] 680int64x2_t test_vmlsl_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) { 681 return vmlsl_high_lane_s32(a, b, v, 1); 682} 683 684// CHECK-LABEL: define <4 x i32> @test_vmlsl_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) #0 { 685// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 686// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 687// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 688// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 689// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 690// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 691// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 692// CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] 693// CHECK: ret <4 x i32> [[SUB]] 694int32x4_t test_vmlsl_high_laneq_s16(int32x4_t a, int16x8_t b, int16x8_t v) { 695 return vmlsl_high_laneq_s16(a, b, v, 7); 696} 697 698// CHECK-LABEL: define <2 x i64> @test_vmlsl_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) #0 { 699// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 700// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> 701// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 702// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 703// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 704// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 705// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 706// CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] 707// CHECK: ret <2 x i64> [[SUB]] 708int64x2_t test_vmlsl_high_laneq_s32(int64x2_t a, int32x4_t b, int32x4_t v) { 709 return vmlsl_high_laneq_s32(a, b, v, 3); 710} 711 712// CHECK-LABEL: define <4 x i32> @test_vmlal_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) #0 { 713// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 714// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 715// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 716// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 717// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 718// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 719// CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] 720// CHECK: ret <4 x i32> [[ADD]] 721int32x4_t test_vmlal_lane_u16(int32x4_t a, int16x4_t b, int16x4_t v) { 722 return vmlal_lane_u16(a, b, v, 3); 723} 724 725// CHECK-LABEL: define <2 x i64> @test_vmlal_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) #0 { 726// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> 727// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 728// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 729// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 730// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 731// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 732// CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] 733// CHECK: ret <2 x i64> [[ADD]] 734int64x2_t test_vmlal_lane_u32(int64x2_t a, int32x2_t b, int32x2_t v) { 735 return vmlal_lane_u32(a, b, v, 1); 736} 737 738// CHECK-LABEL: define <4 x i32> @test_vmlal_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) #0 { 739// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 740// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 741// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 742// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 743// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 744// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 745// CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] 746// CHECK: ret <4 x i32> [[ADD]] 747int32x4_t test_vmlal_laneq_u16(int32x4_t a, int16x4_t b, int16x8_t v) { 748 return vmlal_laneq_u16(a, b, v, 7); 749} 750 751// CHECK-LABEL: define <2 x i64> @test_vmlal_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) #0 { 752// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> 753// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 754// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 755// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 756// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 757// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 758// CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] 759// CHECK: ret <2 x i64> [[ADD]] 760int64x2_t test_vmlal_laneq_u32(int64x2_t a, int32x2_t b, int32x4_t v) { 761 return vmlal_laneq_u32(a, b, v, 3); 762} 763 764// CHECK-LABEL: define <4 x i32> @test_vmlal_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) #0 { 765// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 766// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 767// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 768// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 769// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 770// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 771// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 772// CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] 773// CHECK: ret <4 x i32> [[ADD]] 774int32x4_t test_vmlal_high_lane_u16(int32x4_t a, int16x8_t b, int16x4_t v) { 775 return vmlal_high_lane_u16(a, b, v, 3); 776} 777 778// CHECK-LABEL: define <2 x i64> @test_vmlal_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) #0 { 779// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 780// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> 781// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 782// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 783// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 784// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 785// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 786// CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] 787// CHECK: ret <2 x i64> [[ADD]] 788int64x2_t test_vmlal_high_lane_u32(int64x2_t a, int32x4_t b, int32x2_t v) { 789 return vmlal_high_lane_u32(a, b, v, 1); 790} 791 792// CHECK-LABEL: define <4 x i32> @test_vmlal_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) #0 { 793// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 794// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 795// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 796// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 797// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 798// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 799// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 800// CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] 801// CHECK: ret <4 x i32> [[ADD]] 802int32x4_t test_vmlal_high_laneq_u16(int32x4_t a, int16x8_t b, int16x8_t v) { 803 return vmlal_high_laneq_u16(a, b, v, 7); 804} 805 806// CHECK-LABEL: define <2 x i64> @test_vmlal_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) #0 { 807// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 808// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> 809// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 810// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 811// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 812// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 813// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 814// CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] 815// CHECK: ret <2 x i64> [[ADD]] 816int64x2_t test_vmlal_high_laneq_u32(int64x2_t a, int32x4_t b, int32x4_t v) { 817 return vmlal_high_laneq_u32(a, b, v, 3); 818} 819 820// CHECK-LABEL: define <4 x i32> @test_vmlsl_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) #0 { 821// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 822// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 823// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 824// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 825// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 826// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 827// CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] 828// CHECK: ret <4 x i32> [[SUB]] 829int32x4_t test_vmlsl_lane_u16(int32x4_t a, int16x4_t b, int16x4_t v) { 830 return vmlsl_lane_u16(a, b, v, 3); 831} 832 833// CHECK-LABEL: define <2 x i64> @test_vmlsl_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) #0 { 834// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> 835// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 836// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 837// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 838// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 839// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 840// CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] 841// CHECK: ret <2 x i64> [[SUB]] 842int64x2_t test_vmlsl_lane_u32(int64x2_t a, int32x2_t b, int32x2_t v) { 843 return vmlsl_lane_u32(a, b, v, 1); 844} 845 846// CHECK-LABEL: define <4 x i32> @test_vmlsl_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) #0 { 847// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 848// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 849// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 850// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 851// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 852// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 853// CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] 854// CHECK: ret <4 x i32> [[SUB]] 855int32x4_t test_vmlsl_laneq_u16(int32x4_t a, int16x4_t b, int16x8_t v) { 856 return vmlsl_laneq_u16(a, b, v, 7); 857} 858 859// CHECK-LABEL: define <2 x i64> @test_vmlsl_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) #0 { 860// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> 861// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 862// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 863// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 864// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 865// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 866// CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] 867// CHECK: ret <2 x i64> [[SUB]] 868int64x2_t test_vmlsl_laneq_u32(int64x2_t a, int32x2_t b, int32x4_t v) { 869 return vmlsl_laneq_u32(a, b, v, 3); 870} 871 872// CHECK-LABEL: define <4 x i32> @test_vmlsl_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) #0 { 873// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 874// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 875// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 876// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 877// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 878// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 879// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 880// CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] 881// CHECK: ret <4 x i32> [[SUB]] 882int32x4_t test_vmlsl_high_lane_u16(int32x4_t a, int16x8_t b, int16x4_t v) { 883 return vmlsl_high_lane_u16(a, b, v, 3); 884} 885 886// CHECK-LABEL: define <2 x i64> @test_vmlsl_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) #0 { 887// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 888// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> 889// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 890// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 891// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 892// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 893// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 894// CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] 895// CHECK: ret <2 x i64> [[SUB]] 896int64x2_t test_vmlsl_high_lane_u32(int64x2_t a, int32x4_t b, int32x2_t v) { 897 return vmlsl_high_lane_u32(a, b, v, 1); 898} 899 900// CHECK-LABEL: define <4 x i32> @test_vmlsl_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) #0 { 901// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 902// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 903// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 904// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 905// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 906// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 907// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 908// CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] 909// CHECK: ret <4 x i32> [[SUB]] 910int32x4_t test_vmlsl_high_laneq_u16(int32x4_t a, int16x8_t b, int16x8_t v) { 911 return vmlsl_high_laneq_u16(a, b, v, 7); 912} 913 914// CHECK-LABEL: define <2 x i64> @test_vmlsl_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) #0 { 915// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 916// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> 917// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 918// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 919// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 920// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 921// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 922// CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] 923// CHECK: ret <2 x i64> [[SUB]] 924int64x2_t test_vmlsl_high_laneq_u32(int64x2_t a, int32x4_t b, int32x4_t v) { 925 return vmlsl_high_laneq_u32(a, b, v, 3); 926} 927 928// CHECK-LABEL: define <4 x i32> @test_vmull_lane_s16(<4 x i16> %a, <4 x i16> %v) #0 { 929// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 930// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 931// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 932// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 933// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 934// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 935// CHECK: ret <4 x i32> [[VMULL2_I]] 936int32x4_t test_vmull_lane_s16(int16x4_t a, int16x4_t v) { 937 return vmull_lane_s16(a, v, 3); 938} 939 940// CHECK-LABEL: define <2 x i64> @test_vmull_lane_s32(<2 x i32> %a, <2 x i32> %v) #0 { 941// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> 942// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 943// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 944// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 945// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 946// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 947// CHECK: ret <2 x i64> [[VMULL2_I]] 948int64x2_t test_vmull_lane_s32(int32x2_t a, int32x2_t v) { 949 return vmull_lane_s32(a, v, 1); 950} 951 952// CHECK-LABEL: define <4 x i32> @test_vmull_lane_u16(<4 x i16> %a, <4 x i16> %v) #0 { 953// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 954// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 955// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 956// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 957// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 958// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 959// CHECK: ret <4 x i32> [[VMULL2_I]] 960uint32x4_t test_vmull_lane_u16(uint16x4_t a, uint16x4_t v) { 961 return vmull_lane_u16(a, v, 3); 962} 963 964// CHECK-LABEL: define <2 x i64> @test_vmull_lane_u32(<2 x i32> %a, <2 x i32> %v) #0 { 965// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> 966// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 967// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 968// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 969// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 970// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 971// CHECK: ret <2 x i64> [[VMULL2_I]] 972uint64x2_t test_vmull_lane_u32(uint32x2_t a, uint32x2_t v) { 973 return vmull_lane_u32(a, v, 1); 974} 975 976// CHECK-LABEL: define <4 x i32> @test_vmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) #0 { 977// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 978// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 979// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 980// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 981// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 982// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 983// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 984// CHECK: ret <4 x i32> [[VMULL2_I]] 985int32x4_t test_vmull_high_lane_s16(int16x8_t a, int16x4_t v) { 986 return vmull_high_lane_s16(a, v, 3); 987} 988 989// CHECK-LABEL: define <2 x i64> @test_vmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) #0 { 990// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 991// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> 992// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 993// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 994// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 995// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 996// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 997// CHECK: ret <2 x i64> [[VMULL2_I]] 998int64x2_t test_vmull_high_lane_s32(int32x4_t a, int32x2_t v) { 999 return vmull_high_lane_s32(a, v, 1); 1000} 1001 1002// CHECK-LABEL: define <4 x i32> @test_vmull_high_lane_u16(<8 x i16> %a, <4 x i16> %v) #0 { 1003// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1004// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1005// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 1006// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 1007// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 1008// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 1009// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 1010// CHECK: ret <4 x i32> [[VMULL2_I]] 1011uint32x4_t test_vmull_high_lane_u16(uint16x8_t a, uint16x4_t v) { 1012 return vmull_high_lane_u16(a, v, 3); 1013} 1014 1015// CHECK-LABEL: define <2 x i64> @test_vmull_high_lane_u32(<4 x i32> %a, <2 x i32> %v) #0 { 1016// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 1017// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> 1018// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 1019// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 1020// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 1021// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 1022// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 1023// CHECK: ret <2 x i64> [[VMULL2_I]] 1024uint64x2_t test_vmull_high_lane_u32(uint32x4_t a, uint32x2_t v) { 1025 return vmull_high_lane_u32(a, v, 1); 1026} 1027 1028// CHECK-LABEL: define <4 x i32> @test_vmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) #0 { 1029// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 1030// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 1031// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 1032// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 1033// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 1034// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 1035// CHECK: ret <4 x i32> [[VMULL2_I]] 1036int32x4_t test_vmull_laneq_s16(int16x4_t a, int16x8_t v) { 1037 return vmull_laneq_s16(a, v, 7); 1038} 1039 1040// CHECK-LABEL: define <2 x i64> @test_vmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) #0 { 1041// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> 1042// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 1043// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 1044// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 1045// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 1046// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 1047// CHECK: ret <2 x i64> [[VMULL2_I]] 1048int64x2_t test_vmull_laneq_s32(int32x2_t a, int32x4_t v) { 1049 return vmull_laneq_s32(a, v, 3); 1050} 1051 1052// CHECK-LABEL: define <4 x i32> @test_vmull_laneq_u16(<4 x i16> %a, <8 x i16> %v) #0 { 1053// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 1054// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 1055// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 1056// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 1057// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 1058// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 1059// CHECK: ret <4 x i32> [[VMULL2_I]] 1060uint32x4_t test_vmull_laneq_u16(uint16x4_t a, uint16x8_t v) { 1061 return vmull_laneq_u16(a, v, 7); 1062} 1063 1064// CHECK-LABEL: define <2 x i64> @test_vmull_laneq_u32(<2 x i32> %a, <4 x i32> %v) #0 { 1065// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> 1066// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 1067// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 1068// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 1069// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 1070// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 1071// CHECK: ret <2 x i64> [[VMULL2_I]] 1072uint64x2_t test_vmull_laneq_u32(uint32x2_t a, uint32x4_t v) { 1073 return vmull_laneq_u32(a, v, 3); 1074} 1075 1076// CHECK-LABEL: define <4 x i32> @test_vmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) #0 { 1077// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1078// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 1079// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 1080// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 1081// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 1082// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 1083// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 1084// CHECK: ret <4 x i32> [[VMULL2_I]] 1085int32x4_t test_vmull_high_laneq_s16(int16x8_t a, int16x8_t v) { 1086 return vmull_high_laneq_s16(a, v, 7); 1087} 1088 1089// CHECK-LABEL: define <2 x i64> @test_vmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) #0 { 1090// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 1091// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> 1092// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 1093// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 1094// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 1095// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 1096// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 1097// CHECK: ret <2 x i64> [[VMULL2_I]] 1098int64x2_t test_vmull_high_laneq_s32(int32x4_t a, int32x4_t v) { 1099 return vmull_high_laneq_s32(a, v, 3); 1100} 1101 1102// CHECK-LABEL: define <4 x i32> @test_vmull_high_laneq_u16(<8 x i16> %a, <8 x i16> %v) #0 { 1103// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1104// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 1105// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 1106// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 1107// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 1108// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 1109// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 1110// CHECK: ret <4 x i32> [[VMULL2_I]] 1111uint32x4_t test_vmull_high_laneq_u16(uint16x8_t a, uint16x8_t v) { 1112 return vmull_high_laneq_u16(a, v, 7); 1113} 1114 1115// CHECK-LABEL: define <2 x i64> @test_vmull_high_laneq_u32(<4 x i32> %a, <4 x i32> %v) #0 { 1116// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 1117// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> 1118// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 1119// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 1120// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 1121// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 1122// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 1123// CHECK: ret <2 x i64> [[VMULL2_I]] 1124uint64x2_t test_vmull_high_laneq_u32(uint32x4_t a, uint32x4_t v) { 1125 return vmull_high_laneq_u32(a, v, 3); 1126} 1127 1128// CHECK-LABEL: define <4 x i32> @test_vqdmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) #0 { 1129// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1130// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 1131// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 1132// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 1133// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 1134// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 1135// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #2 1136// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 1137// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #2 1138// CHECK: ret <4 x i32> [[VQDMLAL_V3_I]] 1139int32x4_t test_vqdmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v) { 1140 return vqdmlal_lane_s16(a, b, v, 3); 1141} 1142 1143// CHECK-LABEL: define <2 x i64> @test_vqdmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) #0 { 1144// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> 1145// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 1146// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 1147// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 1148// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 1149// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 1150// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #2 1151// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 1152// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #2 1153// CHECK: ret <2 x i64> [[VQDMLAL_V3_I]] 1154int64x2_t test_vqdmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) { 1155 return vqdmlal_lane_s32(a, b, v, 1); 1156} 1157 1158// CHECK-LABEL: define <4 x i32> @test_vqdmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) #0 { 1159// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1160// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1161// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 1162// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 1163// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 1164// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 1165// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 1166// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #2 1167// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 1168// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #2 1169// CHECK: ret <4 x i32> [[VQDMLAL_V3_I]] 1170int32x4_t test_vqdmlal_high_lane_s16(int32x4_t a, int16x8_t b, int16x4_t v) { 1171 return vqdmlal_high_lane_s16(a, b, v, 3); 1172} 1173 1174// CHECK-LABEL: define <2 x i64> @test_vqdmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) #0 { 1175// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 1176// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> 1177// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 1178// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 1179// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 1180// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 1181// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 1182// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #2 1183// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 1184// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #2 1185// CHECK: ret <2 x i64> [[VQDMLAL_V3_I]] 1186int64x2_t test_vqdmlal_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) { 1187 return vqdmlal_high_lane_s32(a, b, v, 1); 1188} 1189 1190// CHECK-LABEL: define <4 x i32> @test_vqdmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) #0 { 1191// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1192// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 1193// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 1194// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 1195// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 1196// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 1197// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #2 1198// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 1199// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #2 1200// CHECK: ret <4 x i32> [[VQDMLSL_V3_I]] 1201int32x4_t test_vqdmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v) { 1202 return vqdmlsl_lane_s16(a, b, v, 3); 1203} 1204 1205// CHECK-LABEL: define <2 x i64> @test_vqdmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) #0 { 1206// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> 1207// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 1208// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 1209// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 1210// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 1211// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 1212// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #2 1213// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 1214// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #2 1215// CHECK: ret <2 x i64> [[VQDMLSL_V3_I]] 1216int64x2_t test_vqdmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) { 1217 return vqdmlsl_lane_s32(a, b, v, 1); 1218} 1219 1220// CHECK-LABEL: define <4 x i32> @test_vqdmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) #0 { 1221// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1222// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1223// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 1224// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 1225// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 1226// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 1227// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 1228// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #2 1229// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 1230// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #2 1231// CHECK: ret <4 x i32> [[VQDMLSL_V3_I]] 1232int32x4_t test_vqdmlsl_high_lane_s16(int32x4_t a, int16x8_t b, int16x4_t v) { 1233 return vqdmlsl_high_lane_s16(a, b, v, 3); 1234} 1235 1236// CHECK-LABEL: define <2 x i64> @test_vqdmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) #0 { 1237// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 1238// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> 1239// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 1240// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 1241// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 1242// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 1243// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 1244// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #2 1245// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 1246// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #2 1247// CHECK: ret <2 x i64> [[VQDMLSL_V3_I]] 1248int64x2_t test_vqdmlsl_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) { 1249 return vqdmlsl_high_lane_s32(a, b, v, 1); 1250} 1251 1252// CHECK-LABEL: define <4 x i32> @test_vqdmull_lane_s16(<4 x i16> %a, <4 x i16> %v) #0 { 1253// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1254// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 1255// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 1256// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 1257// CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 1258// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]]) #2 1259// CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> 1260// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32> 1261// CHECK: ret <4 x i32> [[TMP2]] 1262int32x4_t test_vqdmull_lane_s16(int16x4_t a, int16x4_t v) { 1263 return vqdmull_lane_s16(a, v, 3); 1264} 1265 1266// CHECK-LABEL: define <2 x i64> @test_vqdmull_lane_s32(<2 x i32> %a, <2 x i32> %v) #0 { 1267// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> 1268// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 1269// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 1270// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 1271// CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 1272// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]]) #2 1273// CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> 1274// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64> 1275// CHECK: ret <2 x i64> [[TMP2]] 1276int64x2_t test_vqdmull_lane_s32(int32x2_t a, int32x2_t v) { 1277 return vqdmull_lane_s32(a, v, 1); 1278} 1279 1280// CHECK-LABEL: define <4 x i32> @test_vqdmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) #0 { 1281// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1282// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 1283// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 1284// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 1285// CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 1286// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]]) #2 1287// CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> 1288// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32> 1289// CHECK: ret <4 x i32> [[TMP2]] 1290int32x4_t test_vqdmull_laneq_s16(int16x4_t a, int16x8_t v) { 1291 return vqdmull_laneq_s16(a, v, 3); 1292} 1293 1294// CHECK-LABEL: define <2 x i64> @test_vqdmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) #0 { 1295// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> 1296// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 1297// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 1298// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 1299// CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 1300// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]]) #2 1301// CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> 1302// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64> 1303// CHECK: ret <2 x i64> [[TMP2]] 1304int64x2_t test_vqdmull_laneq_s32(int32x2_t a, int32x4_t v) { 1305 return vqdmull_laneq_s32(a, v, 3); 1306} 1307 1308// CHECK-LABEL: define <4 x i32> @test_vqdmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) #0 { 1309// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1310// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1311// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 1312// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 1313// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 1314// CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 1315// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]]) #2 1316// CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> 1317// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32> 1318// CHECK: ret <4 x i32> [[TMP2]] 1319int32x4_t test_vqdmull_high_lane_s16(int16x8_t a, int16x4_t v) { 1320 return vqdmull_high_lane_s16(a, v, 3); 1321} 1322 1323// CHECK-LABEL: define <2 x i64> @test_vqdmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) #0 { 1324// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 1325// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> 1326// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 1327// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 1328// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 1329// CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 1330// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]]) #2 1331// CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> 1332// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64> 1333// CHECK: ret <2 x i64> [[TMP2]] 1334int64x2_t test_vqdmull_high_lane_s32(int32x4_t a, int32x2_t v) { 1335 return vqdmull_high_lane_s32(a, v, 1); 1336} 1337 1338// CHECK-LABEL: define <4 x i32> @test_vqdmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) #0 { 1339// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1340// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 1341// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 1342// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 1343// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 1344// CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 1345// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]]) #2 1346// CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> 1347// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32> 1348// CHECK: ret <4 x i32> [[TMP2]] 1349int32x4_t test_vqdmull_high_laneq_s16(int16x8_t a, int16x8_t v) { 1350 return vqdmull_high_laneq_s16(a, v, 7); 1351} 1352 1353// CHECK-LABEL: define <2 x i64> @test_vqdmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) #0 { 1354// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 1355// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> 1356// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 1357// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 1358// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 1359// CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 1360// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]]) #2 1361// CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> 1362// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64> 1363// CHECK: ret <2 x i64> [[TMP2]] 1364int64x2_t test_vqdmull_high_laneq_s32(int32x4_t a, int32x4_t v) { 1365 return vqdmull_high_laneq_s32(a, v, 3); 1366} 1367 1368// CHECK-LABEL: define <4 x i16> @test_vqdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) #0 { 1369// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1370// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 1371// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 1372// CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 1373// CHECK: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 1374// CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[VQDMULH_V_I]], <4 x i16> [[VQDMULH_V1_I]]) #2 1375// CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8> 1376// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <4 x i16> 1377// CHECK: ret <4 x i16> [[TMP2]] 1378int16x4_t test_vqdmulh_lane_s16(int16x4_t a, int16x4_t v) { 1379 return vqdmulh_lane_s16(a, v, 3); 1380} 1381 1382// CHECK-LABEL: define <8 x i16> @test_vqdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) #0 { 1383// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 1384// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 1385// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8> 1386// CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 1387// CHECK: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 1388// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> [[VQDMULHQ_V_I]], <8 x i16> [[VQDMULHQ_V1_I]]) #2 1389// CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8> 1390// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <8 x i16> 1391// CHECK: ret <8 x i16> [[TMP2]] 1392int16x8_t test_vqdmulhq_lane_s16(int16x8_t a, int16x4_t v) { 1393 return vqdmulhq_lane_s16(a, v, 3); 1394} 1395 1396// CHECK-LABEL: define <2 x i32> @test_vqdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) #0 { 1397// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> 1398// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 1399// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 1400// CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 1401// CHECK: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 1402// CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> [[VQDMULH_V_I]], <2 x i32> [[VQDMULH_V1_I]]) #2 1403// CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8> 1404// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <2 x i32> 1405// CHECK: ret <2 x i32> [[TMP2]] 1406int32x2_t test_vqdmulh_lane_s32(int32x2_t a, int32x2_t v) { 1407 return vqdmulh_lane_s32(a, v, 1); 1408} 1409 1410// CHECK-LABEL: define <4 x i32> @test_vqdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) #0 { 1411// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 1412// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 1413// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8> 1414// CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 1415// CHECK: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 1416// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> [[VQDMULHQ_V_I]], <4 x i32> [[VQDMULHQ_V1_I]]) #2 1417// CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8> 1418// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <4 x i32> 1419// CHECK: ret <4 x i32> [[TMP2]] 1420int32x4_t test_vqdmulhq_lane_s32(int32x4_t a, int32x2_t v) { 1421 return vqdmulhq_lane_s32(a, v, 1); 1422} 1423 1424// CHECK-LABEL: define <4 x i16> @test_vqrdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) #0 { 1425// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1426// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 1427// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 1428// CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 1429// CHECK: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 1430// CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[VQRDMULH_V_I]], <4 x i16> [[VQRDMULH_V1_I]]) #2 1431// CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8> 1432// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <4 x i16> 1433// CHECK: ret <4 x i16> [[TMP2]] 1434int16x4_t test_vqrdmulh_lane_s16(int16x4_t a, int16x4_t v) { 1435 return vqrdmulh_lane_s16(a, v, 3); 1436} 1437 1438// CHECK-LABEL: define <8 x i16> @test_vqrdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) #0 { 1439// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 1440// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 1441// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8> 1442// CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 1443// CHECK: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 1444// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> [[VQRDMULHQ_V_I]], <8 x i16> [[VQRDMULHQ_V1_I]]) #2 1445// CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8> 1446// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <8 x i16> 1447// CHECK: ret <8 x i16> [[TMP2]] 1448int16x8_t test_vqrdmulhq_lane_s16(int16x8_t a, int16x4_t v) { 1449 return vqrdmulhq_lane_s16(a, v, 3); 1450} 1451 1452// CHECK-LABEL: define <2 x i32> @test_vqrdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) #0 { 1453// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> 1454// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 1455// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 1456// CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 1457// CHECK: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 1458// CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> [[VQRDMULH_V_I]], <2 x i32> [[VQRDMULH_V1_I]]) #2 1459// CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8> 1460// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <2 x i32> 1461// CHECK: ret <2 x i32> [[TMP2]] 1462int32x2_t test_vqrdmulh_lane_s32(int32x2_t a, int32x2_t v) { 1463 return vqrdmulh_lane_s32(a, v, 1); 1464} 1465 1466// CHECK-LABEL: define <4 x i32> @test_vqrdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) #0 { 1467// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 1468// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 1469// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8> 1470// CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 1471// CHECK: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 1472// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> [[VQRDMULHQ_V_I]], <4 x i32> [[VQRDMULHQ_V1_I]]) #2 1473// CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8> 1474// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <4 x i32> 1475// CHECK: ret <4 x i32> [[TMP2]] 1476int32x4_t test_vqrdmulhq_lane_s32(int32x4_t a, int32x2_t v) { 1477 return vqrdmulhq_lane_s32(a, v, 1); 1478} 1479 1480// CHECK-LABEL: define <2 x float> @test_vmul_lane_f32(<2 x float> %a, <2 x float> %v) #0 { 1481// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <2 x i32> <i32 1, i32 1> 1482// CHECK: [[MUL:%.*]] = fmul <2 x float> %a, [[SHUFFLE]] 1483// CHECK: ret <2 x float> [[MUL]] 1484float32x2_t test_vmul_lane_f32(float32x2_t a, float32x2_t v) { 1485 return vmul_lane_f32(a, v, 1); 1486} 1487 1488 1489// CHECK-LABEL: define <1 x double> @test_vmul_lane_f64(<1 x double> %a, <1 x double> %v) #0 { 1490// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 1491// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %v to <8 x i8> 1492// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to double 1493// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> 1494// CHECK: [[EXTRACT:%.*]] = extractelement <1 x double> [[TMP3]], i32 0 1495// CHECK: [[TMP4:%.*]] = fmul double [[TMP2]], [[EXTRACT]] 1496// CHECK: [[TMP5:%.*]] = bitcast double [[TMP4]] to <1 x double> 1497// CHECK: ret <1 x double> [[TMP5]] 1498float64x1_t test_vmul_lane_f64(float64x1_t a, float64x1_t v) { 1499 return vmul_lane_f64(a, v, 0); 1500} 1501 1502 1503// CHECK-LABEL: define <4 x float> @test_vmulq_lane_f32(<4 x float> %a, <2 x float> %v) #0 { 1504// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 1505// CHECK: [[MUL:%.*]] = fmul <4 x float> %a, [[SHUFFLE]] 1506// CHECK: ret <4 x float> [[MUL]] 1507float32x4_t test_vmulq_lane_f32(float32x4_t a, float32x2_t v) { 1508 return vmulq_lane_f32(a, v, 1); 1509} 1510 1511// CHECK-LABEL: define <2 x double> @test_vmulq_lane_f64(<2 x double> %a, <1 x double> %v) #0 { 1512// CHECK: [[SHUFFLE:%.*]] = shufflevector <1 x double> %v, <1 x double> %v, <2 x i32> zeroinitializer 1513// CHECK: [[MUL:%.*]] = fmul <2 x double> %a, [[SHUFFLE]] 1514// CHECK: ret <2 x double> [[MUL]] 1515float64x2_t test_vmulq_lane_f64(float64x2_t a, float64x1_t v) { 1516 return vmulq_lane_f64(a, v, 0); 1517} 1518 1519// CHECK-LABEL: define <2 x float> @test_vmul_laneq_f32(<2 x float> %a, <4 x float> %v) #0 { 1520// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> <i32 3, i32 3> 1521// CHECK: [[MUL:%.*]] = fmul <2 x float> %a, [[SHUFFLE]] 1522// CHECK: ret <2 x float> [[MUL]] 1523float32x2_t test_vmul_laneq_f32(float32x2_t a, float32x4_t v) { 1524 return vmul_laneq_f32(a, v, 3); 1525} 1526 1527// CHECK-LABEL: define <1 x double> @test_vmul_laneq_f64(<1 x double> %a, <2 x double> %v) #0 { 1528// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 1529// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v to <16 x i8> 1530// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to double 1531// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 1532// CHECK: [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP3]], i32 1 1533// CHECK: [[TMP4:%.*]] = fmul double [[TMP2]], [[EXTRACT]] 1534// CHECK: [[TMP5:%.*]] = bitcast double [[TMP4]] to <1 x double> 1535// CHECK: ret <1 x double> [[TMP5]] 1536float64x1_t test_vmul_laneq_f64(float64x1_t a, float64x2_t v) { 1537 return vmul_laneq_f64(a, v, 1); 1538} 1539 1540 1541// CHECK-LABEL: define <4 x float> @test_vmulq_laneq_f32(<4 x float> %a, <4 x float> %v) #0 { 1542// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1543// CHECK: [[MUL:%.*]] = fmul <4 x float> %a, [[SHUFFLE]] 1544// CHECK: ret <4 x float> [[MUL]] 1545float32x4_t test_vmulq_laneq_f32(float32x4_t a, float32x4_t v) { 1546 return vmulq_laneq_f32(a, v, 3); 1547} 1548 1549// CHECK-LABEL: define <2 x double> @test_vmulq_laneq_f64(<2 x double> %a, <2 x double> %v) #0 { 1550// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x double> %v, <2 x double> %v, <2 x i32> <i32 1, i32 1> 1551// CHECK: [[MUL:%.*]] = fmul <2 x double> %a, [[SHUFFLE]] 1552// CHECK: ret <2 x double> [[MUL]] 1553float64x2_t test_vmulq_laneq_f64(float64x2_t a, float64x2_t v) { 1554 return vmulq_laneq_f64(a, v, 1); 1555} 1556 1557// CHECK-LABEL: define <2 x float> @test_vmulx_lane_f32(<2 x float> %a, <2 x float> %v) #0 { 1558// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <2 x i32> <i32 1, i32 1> 1559// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 1560// CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SHUFFLE]] to <8 x i8> 1561// CHECK: [[VMULX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 1562// CHECK: [[VMULX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 1563// CHECK: [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> [[VMULX_I]], <2 x float> [[VMULX1_I]]) #2 1564// CHECK: ret <2 x float> [[VMULX2_I]] 1565float32x2_t test_vmulx_lane_f32(float32x2_t a, float32x2_t v) { 1566 return vmulx_lane_f32(a, v, 1); 1567} 1568 1569// CHECK-LABEL: define <4 x float> @test_vmulxq_lane_f32(<4 x float> %a, <2 x float> %v) #0 { 1570// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 1571// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 1572// CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SHUFFLE]] to <16 x i8> 1573// CHECK: [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 1574// CHECK: [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 1575// CHECK: [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> [[VMULX_I]], <4 x float> [[VMULX1_I]]) #2 1576// CHECK: ret <4 x float> [[VMULX2_I]] 1577float32x4_t test_vmulxq_lane_f32(float32x4_t a, float32x2_t v) { 1578 return vmulxq_lane_f32(a, v, 1); 1579} 1580 1581// CHECK-LABEL: define <2 x double> @test_vmulxq_lane_f64(<2 x double> %a, <1 x double> %v) #0 { 1582// CHECK: [[SHUFFLE:%.*]] = shufflevector <1 x double> %v, <1 x double> %v, <2 x i32> zeroinitializer 1583// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 1584// CHECK: [[TMP1:%.*]] = bitcast <2 x double> [[SHUFFLE]] to <16 x i8> 1585// CHECK: [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 1586// CHECK: [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 1587// CHECK: [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> [[VMULX_I]], <2 x double> [[VMULX1_I]]) #2 1588// CHECK: ret <2 x double> [[VMULX2_I]] 1589float64x2_t test_vmulxq_lane_f64(float64x2_t a, float64x1_t v) { 1590 return vmulxq_lane_f64(a, v, 0); 1591} 1592 1593// CHECK-LABEL: define <2 x float> @test_vmulx_laneq_f32(<2 x float> %a, <4 x float> %v) #0 { 1594// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> <i32 3, i32 3> 1595// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 1596// CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SHUFFLE]] to <8 x i8> 1597// CHECK: [[VMULX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 1598// CHECK: [[VMULX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 1599// CHECK: [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> [[VMULX_I]], <2 x float> [[VMULX1_I]]) #2 1600// CHECK: ret <2 x float> [[VMULX2_I]] 1601float32x2_t test_vmulx_laneq_f32(float32x2_t a, float32x4_t v) { 1602 return vmulx_laneq_f32(a, v, 3); 1603} 1604 1605// CHECK-LABEL: define <4 x float> @test_vmulxq_laneq_f32(<4 x float> %a, <4 x float> %v) #0 { 1606// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1607// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 1608// CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SHUFFLE]] to <16 x i8> 1609// CHECK: [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 1610// CHECK: [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 1611// CHECK: [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> [[VMULX_I]], <4 x float> [[VMULX1_I]]) #2 1612// CHECK: ret <4 x float> [[VMULX2_I]] 1613float32x4_t test_vmulxq_laneq_f32(float32x4_t a, float32x4_t v) { 1614 return vmulxq_laneq_f32(a, v, 3); 1615} 1616 1617// CHECK-LABEL: define <2 x double> @test_vmulxq_laneq_f64(<2 x double> %a, <2 x double> %v) #0 { 1618// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x double> %v, <2 x double> %v, <2 x i32> <i32 1, i32 1> 1619// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 1620// CHECK: [[TMP1:%.*]] = bitcast <2 x double> [[SHUFFLE]] to <16 x i8> 1621// CHECK: [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 1622// CHECK: [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 1623// CHECK: [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> [[VMULX_I]], <2 x double> [[VMULX1_I]]) #2 1624// CHECK: ret <2 x double> [[VMULX2_I]] 1625float64x2_t test_vmulxq_laneq_f64(float64x2_t a, float64x2_t v) { 1626 return vmulxq_laneq_f64(a, v, 1); 1627} 1628 1629// CHECK-LABEL: define <4 x i16> @test_vmla_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) #0 { 1630// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer 1631// CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] 1632// CHECK: [[ADD:%.*]] = add <4 x i16> %a, [[MUL]] 1633// CHECK: ret <4 x i16> [[ADD]] 1634int16x4_t test_vmla_lane_s16_0(int16x4_t a, int16x4_t b, int16x4_t v) { 1635 return vmla_lane_s16(a, b, v, 0); 1636} 1637 1638// CHECK-LABEL: define <8 x i16> @test_vmlaq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) #0 { 1639// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> zeroinitializer 1640// CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] 1641// CHECK: [[ADD:%.*]] = add <8 x i16> %a, [[MUL]] 1642// CHECK: ret <8 x i16> [[ADD]] 1643int16x8_t test_vmlaq_lane_s16_0(int16x8_t a, int16x8_t b, int16x4_t v) { 1644 return vmlaq_lane_s16(a, b, v, 0); 1645} 1646 1647// CHECK-LABEL: define <2 x i32> @test_vmla_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) #0 { 1648// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer 1649// CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] 1650// CHECK: [[ADD:%.*]] = add <2 x i32> %a, [[MUL]] 1651// CHECK: ret <2 x i32> [[ADD]] 1652int32x2_t test_vmla_lane_s32_0(int32x2_t a, int32x2_t b, int32x2_t v) { 1653 return vmla_lane_s32(a, b, v, 0); 1654} 1655 1656// CHECK-LABEL: define <4 x i32> @test_vmlaq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) #0 { 1657// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> zeroinitializer 1658// CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] 1659// CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[MUL]] 1660// CHECK: ret <4 x i32> [[ADD]] 1661int32x4_t test_vmlaq_lane_s32_0(int32x4_t a, int32x4_t b, int32x2_t v) { 1662 return vmlaq_lane_s32(a, b, v, 0); 1663} 1664 1665// CHECK-LABEL: define <4 x i16> @test_vmla_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) #0 { 1666// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer 1667// CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] 1668// CHECK: [[ADD:%.*]] = add <4 x i16> %a, [[MUL]] 1669// CHECK: ret <4 x i16> [[ADD]] 1670int16x4_t test_vmla_laneq_s16_0(int16x4_t a, int16x4_t b, int16x8_t v) { 1671 return vmla_laneq_s16(a, b, v, 0); 1672} 1673 1674// CHECK-LABEL: define <8 x i16> @test_vmlaq_laneq_s16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) #0 { 1675// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> zeroinitializer 1676// CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] 1677// CHECK: [[ADD:%.*]] = add <8 x i16> %a, [[MUL]] 1678// CHECK: ret <8 x i16> [[ADD]] 1679int16x8_t test_vmlaq_laneq_s16_0(int16x8_t a, int16x8_t b, int16x8_t v) { 1680 return vmlaq_laneq_s16(a, b, v, 0); 1681} 1682 1683// CHECK-LABEL: define <2 x i32> @test_vmla_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) #0 { 1684// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer 1685// CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] 1686// CHECK: [[ADD:%.*]] = add <2 x i32> %a, [[MUL]] 1687// CHECK: ret <2 x i32> [[ADD]] 1688int32x2_t test_vmla_laneq_s32_0(int32x2_t a, int32x2_t b, int32x4_t v) { 1689 return vmla_laneq_s32(a, b, v, 0); 1690} 1691 1692// CHECK-LABEL: define <4 x i32> @test_vmlaq_laneq_s32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) #0 { 1693// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> zeroinitializer 1694// CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] 1695// CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[MUL]] 1696// CHECK: ret <4 x i32> [[ADD]] 1697int32x4_t test_vmlaq_laneq_s32_0(int32x4_t a, int32x4_t b, int32x4_t v) { 1698 return vmlaq_laneq_s32(a, b, v, 0); 1699} 1700 1701// CHECK-LABEL: define <4 x i16> @test_vmls_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) #0 { 1702// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer 1703// CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] 1704// CHECK: [[SUB:%.*]] = sub <4 x i16> %a, [[MUL]] 1705// CHECK: ret <4 x i16> [[SUB]] 1706int16x4_t test_vmls_lane_s16_0(int16x4_t a, int16x4_t b, int16x4_t v) { 1707 return vmls_lane_s16(a, b, v, 0); 1708} 1709 1710// CHECK-LABEL: define <8 x i16> @test_vmlsq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) #0 { 1711// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> zeroinitializer 1712// CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] 1713// CHECK: [[SUB:%.*]] = sub <8 x i16> %a, [[MUL]] 1714// CHECK: ret <8 x i16> [[SUB]] 1715int16x8_t test_vmlsq_lane_s16_0(int16x8_t a, int16x8_t b, int16x4_t v) { 1716 return vmlsq_lane_s16(a, b, v, 0); 1717} 1718 1719// CHECK-LABEL: define <2 x i32> @test_vmls_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) #0 { 1720// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer 1721// CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] 1722// CHECK: [[SUB:%.*]] = sub <2 x i32> %a, [[MUL]] 1723// CHECK: ret <2 x i32> [[SUB]] 1724int32x2_t test_vmls_lane_s32_0(int32x2_t a, int32x2_t b, int32x2_t v) { 1725 return vmls_lane_s32(a, b, v, 0); 1726} 1727 1728// CHECK-LABEL: define <4 x i32> @test_vmlsq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) #0 { 1729// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> zeroinitializer 1730// CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] 1731// CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[MUL]] 1732// CHECK: ret <4 x i32> [[SUB]] 1733int32x4_t test_vmlsq_lane_s32_0(int32x4_t a, int32x4_t b, int32x2_t v) { 1734 return vmlsq_lane_s32(a, b, v, 0); 1735} 1736 1737// CHECK-LABEL: define <4 x i16> @test_vmls_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) #0 { 1738// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer 1739// CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] 1740// CHECK: [[SUB:%.*]] = sub <4 x i16> %a, [[MUL]] 1741// CHECK: ret <4 x i16> [[SUB]] 1742int16x4_t test_vmls_laneq_s16_0(int16x4_t a, int16x4_t b, int16x8_t v) { 1743 return vmls_laneq_s16(a, b, v, 0); 1744} 1745 1746// CHECK-LABEL: define <8 x i16> @test_vmlsq_laneq_s16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) #0 { 1747// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> zeroinitializer 1748// CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] 1749// CHECK: [[SUB:%.*]] = sub <8 x i16> %a, [[MUL]] 1750// CHECK: ret <8 x i16> [[SUB]] 1751int16x8_t test_vmlsq_laneq_s16_0(int16x8_t a, int16x8_t b, int16x8_t v) { 1752 return vmlsq_laneq_s16(a, b, v, 0); 1753} 1754 1755// CHECK-LABEL: define <2 x i32> @test_vmls_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) #0 { 1756// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer 1757// CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] 1758// CHECK: [[SUB:%.*]] = sub <2 x i32> %a, [[MUL]] 1759// CHECK: ret <2 x i32> [[SUB]] 1760int32x2_t test_vmls_laneq_s32_0(int32x2_t a, int32x2_t b, int32x4_t v) { 1761 return vmls_laneq_s32(a, b, v, 0); 1762} 1763 1764// CHECK-LABEL: define <4 x i32> @test_vmlsq_laneq_s32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) #0 { 1765// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> zeroinitializer 1766// CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] 1767// CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[MUL]] 1768// CHECK: ret <4 x i32> [[SUB]] 1769int32x4_t test_vmlsq_laneq_s32_0(int32x4_t a, int32x4_t b, int32x4_t v) { 1770 return vmlsq_laneq_s32(a, b, v, 0); 1771} 1772 1773// CHECK-LABEL: define <4 x i16> @test_vmul_lane_s16_0(<4 x i16> %a, <4 x i16> %v) #0 { 1774// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer 1775// CHECK: [[MUL:%.*]] = mul <4 x i16> %a, [[SHUFFLE]] 1776// CHECK: ret <4 x i16> [[MUL]] 1777int16x4_t test_vmul_lane_s16_0(int16x4_t a, int16x4_t v) { 1778 return vmul_lane_s16(a, v, 0); 1779} 1780 1781// CHECK-LABEL: define <8 x i16> @test_vmulq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) #0 { 1782// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> zeroinitializer 1783// CHECK: [[MUL:%.*]] = mul <8 x i16> %a, [[SHUFFLE]] 1784// CHECK: ret <8 x i16> [[MUL]] 1785int16x8_t test_vmulq_lane_s16_0(int16x8_t a, int16x4_t v) { 1786 return vmulq_lane_s16(a, v, 0); 1787} 1788 1789// CHECK-LABEL: define <2 x i32> @test_vmul_lane_s32_0(<2 x i32> %a, <2 x i32> %v) #0 { 1790// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer 1791// CHECK: [[MUL:%.*]] = mul <2 x i32> %a, [[SHUFFLE]] 1792// CHECK: ret <2 x i32> [[MUL]] 1793int32x2_t test_vmul_lane_s32_0(int32x2_t a, int32x2_t v) { 1794 return vmul_lane_s32(a, v, 0); 1795} 1796 1797// CHECK-LABEL: define <4 x i32> @test_vmulq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) #0 { 1798// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> zeroinitializer 1799// CHECK: [[MUL:%.*]] = mul <4 x i32> %a, [[SHUFFLE]] 1800// CHECK: ret <4 x i32> [[MUL]] 1801int32x4_t test_vmulq_lane_s32_0(int32x4_t a, int32x2_t v) { 1802 return vmulq_lane_s32(a, v, 0); 1803} 1804 1805// CHECK-LABEL: define <4 x i16> @test_vmul_lane_u16_0(<4 x i16> %a, <4 x i16> %v) #0 { 1806// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer 1807// CHECK: [[MUL:%.*]] = mul <4 x i16> %a, [[SHUFFLE]] 1808// CHECK: ret <4 x i16> [[MUL]] 1809uint16x4_t test_vmul_lane_u16_0(uint16x4_t a, uint16x4_t v) { 1810 return vmul_lane_u16(a, v, 0); 1811} 1812 1813// CHECK-LABEL: define <8 x i16> @test_vmulq_lane_u16_0(<8 x i16> %a, <4 x i16> %v) #0 { 1814// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> zeroinitializer 1815// CHECK: [[MUL:%.*]] = mul <8 x i16> %a, [[SHUFFLE]] 1816// CHECK: ret <8 x i16> [[MUL]] 1817uint16x8_t test_vmulq_lane_u16_0(uint16x8_t a, uint16x4_t v) { 1818 return vmulq_lane_u16(a, v, 0); 1819} 1820 1821// CHECK-LABEL: define <2 x i32> @test_vmul_lane_u32_0(<2 x i32> %a, <2 x i32> %v) #0 { 1822// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer 1823// CHECK: [[MUL:%.*]] = mul <2 x i32> %a, [[SHUFFLE]] 1824// CHECK: ret <2 x i32> [[MUL]] 1825uint32x2_t test_vmul_lane_u32_0(uint32x2_t a, uint32x2_t v) { 1826 return vmul_lane_u32(a, v, 0); 1827} 1828 1829// CHECK-LABEL: define <4 x i32> @test_vmulq_lane_u32_0(<4 x i32> %a, <2 x i32> %v) #0 { 1830// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> zeroinitializer 1831// CHECK: [[MUL:%.*]] = mul <4 x i32> %a, [[SHUFFLE]] 1832// CHECK: ret <4 x i32> [[MUL]] 1833uint32x4_t test_vmulq_lane_u32_0(uint32x4_t a, uint32x2_t v) { 1834 return vmulq_lane_u32(a, v, 0); 1835} 1836 1837// CHECK-LABEL: define <4 x i16> @test_vmul_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) #0 { 1838// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer 1839// CHECK: [[MUL:%.*]] = mul <4 x i16> %a, [[SHUFFLE]] 1840// CHECK: ret <4 x i16> [[MUL]] 1841int16x4_t test_vmul_laneq_s16_0(int16x4_t a, int16x8_t v) { 1842 return vmul_laneq_s16(a, v, 0); 1843} 1844 1845// CHECK-LABEL: define <8 x i16> @test_vmulq_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) #0 { 1846// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> zeroinitializer 1847// CHECK: [[MUL:%.*]] = mul <8 x i16> %a, [[SHUFFLE]] 1848// CHECK: ret <8 x i16> [[MUL]] 1849int16x8_t test_vmulq_laneq_s16_0(int16x8_t a, int16x8_t v) { 1850 return vmulq_laneq_s16(a, v, 0); 1851} 1852 1853// CHECK-LABEL: define <2 x i32> @test_vmul_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) #0 { 1854// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer 1855// CHECK: [[MUL:%.*]] = mul <2 x i32> %a, [[SHUFFLE]] 1856// CHECK: ret <2 x i32> [[MUL]] 1857int32x2_t test_vmul_laneq_s32_0(int32x2_t a, int32x4_t v) { 1858 return vmul_laneq_s32(a, v, 0); 1859} 1860 1861// CHECK-LABEL: define <4 x i32> @test_vmulq_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) #0 { 1862// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> zeroinitializer 1863// CHECK: [[MUL:%.*]] = mul <4 x i32> %a, [[SHUFFLE]] 1864// CHECK: ret <4 x i32> [[MUL]] 1865int32x4_t test_vmulq_laneq_s32_0(int32x4_t a, int32x4_t v) { 1866 return vmulq_laneq_s32(a, v, 0); 1867} 1868 1869// CHECK-LABEL: define <4 x i16> @test_vmul_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) #0 { 1870// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer 1871// CHECK: [[MUL:%.*]] = mul <4 x i16> %a, [[SHUFFLE]] 1872// CHECK: ret <4 x i16> [[MUL]] 1873uint16x4_t test_vmul_laneq_u16_0(uint16x4_t a, uint16x8_t v) { 1874 return vmul_laneq_u16(a, v, 0); 1875} 1876 1877// CHECK-LABEL: define <8 x i16> @test_vmulq_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) #0 { 1878// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> zeroinitializer 1879// CHECK: [[MUL:%.*]] = mul <8 x i16> %a, [[SHUFFLE]] 1880// CHECK: ret <8 x i16> [[MUL]] 1881uint16x8_t test_vmulq_laneq_u16_0(uint16x8_t a, uint16x8_t v) { 1882 return vmulq_laneq_u16(a, v, 0); 1883} 1884 1885// CHECK-LABEL: define <2 x i32> @test_vmul_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) #0 { 1886// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer 1887// CHECK: [[MUL:%.*]] = mul <2 x i32> %a, [[SHUFFLE]] 1888// CHECK: ret <2 x i32> [[MUL]] 1889uint32x2_t test_vmul_laneq_u32_0(uint32x2_t a, uint32x4_t v) { 1890 return vmul_laneq_u32(a, v, 0); 1891} 1892 1893// CHECK-LABEL: define <4 x i32> @test_vmulq_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) #0 { 1894// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> zeroinitializer 1895// CHECK: [[MUL:%.*]] = mul <4 x i32> %a, [[SHUFFLE]] 1896// CHECK: ret <4 x i32> [[MUL]] 1897uint32x4_t test_vmulq_laneq_u32_0(uint32x4_t a, uint32x4_t v) { 1898 return vmulq_laneq_u32(a, v, 0); 1899} 1900 1901// CHECK-LABEL: define <2 x float> @test_vfma_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) #0 { 1902// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 1903// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 1904// CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v to <8 x i8> 1905// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> 1906// CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <2 x i32> zeroinitializer 1907// CHECK: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 1908// CHECK: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 1909// CHECK: [[FMLA2:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[FMLA]], <2 x float> [[LANE]], <2 x float> [[FMLA1]]) 1910// CHECK: ret <2 x float> [[FMLA2]] 1911float32x2_t test_vfma_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) { 1912 return vfma_lane_f32(a, b, v, 0); 1913} 1914 1915// CHECK-LABEL: define <4 x float> @test_vfmaq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 { 1916// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 1917// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 1918// CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v to <8 x i8> 1919// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> 1920// CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <4 x i32> zeroinitializer 1921// CHECK: [[FMLA:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 1922// CHECK: [[FMLA1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 1923// CHECK: [[FMLA2:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[FMLA]], <4 x float> [[LANE]], <4 x float> [[FMLA1]]) 1924// CHECK: ret <4 x float> [[FMLA2]] 1925float32x4_t test_vfmaq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) { 1926 return vfmaq_lane_f32(a, b, v, 0); 1927} 1928 1929// CHECK-LABEL: define <2 x float> @test_vfma_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 { 1930// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 1931// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 1932// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v to <16 x i8> 1933// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 1934// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 1935// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> 1936// CHECK: [[LANE:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP5]], <2 x i32> zeroinitializer 1937// CHECK: [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[LANE]], <2 x float> [[TMP4]], <2 x float> [[TMP3]]) 1938// CHECK: ret <2 x float> [[TMP6]] 1939float32x2_t test_vfma_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) { 1940 return vfma_laneq_f32(a, b, v, 0); 1941} 1942 1943// CHECK-LABEL: define <4 x float> @test_vfmaq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 { 1944// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 1945// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 1946// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v to <16 x i8> 1947// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 1948// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 1949// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> 1950// CHECK: [[LANE:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP5]], <4 x i32> zeroinitializer 1951// CHECK: [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[LANE]], <4 x float> [[TMP4]], <4 x float> [[TMP3]]) 1952// CHECK: ret <4 x float> [[TMP6]] 1953float32x4_t test_vfmaq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v) { 1954 return vfmaq_laneq_f32(a, b, v, 0); 1955} 1956 1957// CHECK-LABEL: define <2 x float> @test_vfms_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) #0 { 1958// CHECK: [[SUB:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %b 1959// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 1960// CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SUB]] to <8 x i8> 1961// CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v to <8 x i8> 1962// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> 1963// CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <2 x i32> zeroinitializer 1964// CHECK: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 1965// CHECK: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 1966// CHECK: [[FMLA2:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[FMLA]], <2 x float> [[LANE]], <2 x float> [[FMLA1]]) 1967// CHECK: ret <2 x float> [[FMLA2]] 1968float32x2_t test_vfms_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) { 1969 return vfms_lane_f32(a, b, v, 0); 1970} 1971 1972// CHECK-LABEL: define <4 x float> @test_vfmsq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 { 1973// CHECK: [[SUB:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b 1974// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 1975// CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SUB]] to <16 x i8> 1976// CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v to <8 x i8> 1977// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> 1978// CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <4 x i32> zeroinitializer 1979// CHECK: [[FMLA:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 1980// CHECK: [[FMLA1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 1981// CHECK: [[FMLA2:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[FMLA]], <4 x float> [[LANE]], <4 x float> [[FMLA1]]) 1982// CHECK: ret <4 x float> [[FMLA2]] 1983float32x4_t test_vfmsq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) { 1984 return vfmsq_lane_f32(a, b, v, 0); 1985} 1986 1987// CHECK-LABEL: define <2 x float> @test_vfms_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 { 1988// CHECK: [[SUB:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %b 1989// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 1990// CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SUB]] to <8 x i8> 1991// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v to <16 x i8> 1992// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 1993// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 1994// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> 1995// CHECK: [[LANE:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP5]], <2 x i32> zeroinitializer 1996// CHECK: [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[LANE]], <2 x float> [[TMP4]], <2 x float> [[TMP3]]) 1997// CHECK: ret <2 x float> [[TMP6]] 1998float32x2_t test_vfms_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) { 1999 return vfms_laneq_f32(a, b, v, 0); 2000} 2001 2002// CHECK-LABEL: define <4 x float> @test_vfmsq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 { 2003// CHECK: [[SUB:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b 2004// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 2005// CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SUB]] to <16 x i8> 2006// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v to <16 x i8> 2007// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 2008// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 2009// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> 2010// CHECK: [[LANE:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP5]], <4 x i32> zeroinitializer 2011// CHECK: [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[LANE]], <4 x float> [[TMP4]], <4 x float> [[TMP3]]) 2012// CHECK: ret <4 x float> [[TMP6]] 2013float32x4_t test_vfmsq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v) { 2014 return vfmsq_laneq_f32(a, b, v, 0); 2015} 2016 2017// CHECK-LABEL: define <2 x double> @test_vfmaq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) #0 { 2018// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 2019// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> 2020// CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v to <16 x i8> 2021// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 2022// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 2023// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double> 2024// CHECK: [[LANE:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP5]], <2 x i32> zeroinitializer 2025// CHECK: [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[LANE]], <2 x double> [[TMP4]], <2 x double> [[TMP3]]) 2026// CHECK: ret <2 x double> [[TMP6]] 2027float64x2_t test_vfmaq_laneq_f64_0(float64x2_t a, float64x2_t b, float64x2_t v) { 2028 return vfmaq_laneq_f64(a, b, v, 0); 2029} 2030 2031// CHECK-LABEL: define <2 x double> @test_vfmsq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) #0 { 2032// CHECK: [[SUB:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %b 2033// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 2034// CHECK: [[TMP1:%.*]] = bitcast <2 x double> [[SUB]] to <16 x i8> 2035// CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v to <16 x i8> 2036// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 2037// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 2038// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double> 2039// CHECK: [[LANE:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP5]], <2 x i32> zeroinitializer 2040// CHECK: [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[LANE]], <2 x double> [[TMP4]], <2 x double> [[TMP3]]) 2041// CHECK: ret <2 x double> [[TMP6]] 2042float64x2_t test_vfmsq_laneq_f64_0(float64x2_t a, float64x2_t b, float64x2_t v) { 2043 return vfmsq_laneq_f64(a, b, v, 0); 2044} 2045 2046// CHECK-LABEL: define <4 x i32> @test_vmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) #0 { 2047// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer 2048// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 2049// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 2050// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2051// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 2052// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 2053// CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] 2054// CHECK: ret <4 x i32> [[ADD]] 2055int32x4_t test_vmlal_lane_s16_0(int32x4_t a, int16x4_t b, int16x4_t v) { 2056 return vmlal_lane_s16(a, b, v, 0); 2057} 2058 2059// CHECK-LABEL: define <2 x i64> @test_vmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) #0 { 2060// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer 2061// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 2062// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 2063// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2064// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 2065// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 2066// CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] 2067// CHECK: ret <2 x i64> [[ADD]] 2068int64x2_t test_vmlal_lane_s32_0(int64x2_t a, int32x2_t b, int32x2_t v) { 2069 return vmlal_lane_s32(a, b, v, 0); 2070} 2071 2072// CHECK-LABEL: define <4 x i32> @test_vmlal_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) #0 { 2073// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer 2074// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 2075// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 2076// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2077// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 2078// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 2079// CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] 2080// CHECK: ret <4 x i32> [[ADD]] 2081int32x4_t test_vmlal_laneq_s16_0(int32x4_t a, int16x4_t b, int16x8_t v) { 2082 return vmlal_laneq_s16(a, b, v, 0); 2083} 2084 2085// CHECK-LABEL: define <2 x i64> @test_vmlal_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) #0 { 2086// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer 2087// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 2088// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 2089// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2090// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 2091// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 2092// CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] 2093// CHECK: ret <2 x i64> [[ADD]] 2094int64x2_t test_vmlal_laneq_s32_0(int64x2_t a, int32x2_t b, int32x4_t v) { 2095 return vmlal_laneq_s32(a, b, v, 0); 2096} 2097 2098// CHECK-LABEL: define <4 x i32> @test_vmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) #0 { 2099// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2100// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer 2101// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 2102// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 2103// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2104// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 2105// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 2106// CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] 2107// CHECK: ret <4 x i32> [[ADD]] 2108int32x4_t test_vmlal_high_lane_s16_0(int32x4_t a, int16x8_t b, int16x4_t v) { 2109 return vmlal_high_lane_s16(a, b, v, 0); 2110} 2111 2112// CHECK-LABEL: define <2 x i64> @test_vmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) #0 { 2113// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 2114// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer 2115// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 2116// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 2117// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2118// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 2119// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 2120// CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] 2121// CHECK: ret <2 x i64> [[ADD]] 2122int64x2_t test_vmlal_high_lane_s32_0(int64x2_t a, int32x4_t b, int32x2_t v) { 2123 return vmlal_high_lane_s32(a, b, v, 0); 2124} 2125 2126// CHECK-LABEL: define <4 x i32> @test_vmlal_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) #0 { 2127// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2128// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer 2129// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 2130// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 2131// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2132// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 2133// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 2134// CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] 2135// CHECK: ret <4 x i32> [[ADD]] 2136int32x4_t test_vmlal_high_laneq_s16_0(int32x4_t a, int16x8_t b, int16x8_t v) { 2137 return vmlal_high_laneq_s16(a, b, v, 0); 2138} 2139 2140// CHECK-LABEL: define <2 x i64> @test_vmlal_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) #0 { 2141// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 2142// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer 2143// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 2144// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 2145// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2146// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 2147// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 2148// CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] 2149// CHECK: ret <2 x i64> [[ADD]] 2150int64x2_t test_vmlal_high_laneq_s32_0(int64x2_t a, int32x4_t b, int32x4_t v) { 2151 return vmlal_high_laneq_s32(a, b, v, 0); 2152} 2153 2154// CHECK-LABEL: define <4 x i32> @test_vmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) #0 { 2155// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer 2156// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 2157// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 2158// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2159// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 2160// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 2161// CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] 2162// CHECK: ret <4 x i32> [[SUB]] 2163int32x4_t test_vmlsl_lane_s16_0(int32x4_t a, int16x4_t b, int16x4_t v) { 2164 return vmlsl_lane_s16(a, b, v, 0); 2165} 2166 2167// CHECK-LABEL: define <2 x i64> @test_vmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) #0 { 2168// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer 2169// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 2170// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 2171// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2172// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 2173// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 2174// CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] 2175// CHECK: ret <2 x i64> [[SUB]] 2176int64x2_t test_vmlsl_lane_s32_0(int64x2_t a, int32x2_t b, int32x2_t v) { 2177 return vmlsl_lane_s32(a, b, v, 0); 2178} 2179 2180// CHECK-LABEL: define <4 x i32> @test_vmlsl_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) #0 { 2181// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer 2182// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 2183// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 2184// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2185// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 2186// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 2187// CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] 2188// CHECK: ret <4 x i32> [[SUB]] 2189int32x4_t test_vmlsl_laneq_s16_0(int32x4_t a, int16x4_t b, int16x8_t v) { 2190 return vmlsl_laneq_s16(a, b, v, 0); 2191} 2192 2193// CHECK-LABEL: define <2 x i64> @test_vmlsl_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) #0 { 2194// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer 2195// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 2196// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 2197// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2198// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 2199// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 2200// CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] 2201// CHECK: ret <2 x i64> [[SUB]] 2202int64x2_t test_vmlsl_laneq_s32_0(int64x2_t a, int32x2_t b, int32x4_t v) { 2203 return vmlsl_laneq_s32(a, b, v, 0); 2204} 2205 2206// CHECK-LABEL: define <4 x i32> @test_vmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) #0 { 2207// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2208// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer 2209// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 2210// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 2211// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2212// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 2213// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 2214// CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] 2215// CHECK: ret <4 x i32> [[SUB]] 2216int32x4_t test_vmlsl_high_lane_s16_0(int32x4_t a, int16x8_t b, int16x4_t v) { 2217 return vmlsl_high_lane_s16(a, b, v, 0); 2218} 2219 2220// CHECK-LABEL: define <2 x i64> @test_vmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) #0 { 2221// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 2222// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer 2223// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 2224// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 2225// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2226// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 2227// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 2228// CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] 2229// CHECK: ret <2 x i64> [[SUB]] 2230int64x2_t test_vmlsl_high_lane_s32_0(int64x2_t a, int32x4_t b, int32x2_t v) { 2231 return vmlsl_high_lane_s32(a, b, v, 0); 2232} 2233 2234// CHECK-LABEL: define <4 x i32> @test_vmlsl_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) #0 { 2235// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2236// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer 2237// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 2238// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 2239// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2240// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 2241// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 2242// CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] 2243// CHECK: ret <4 x i32> [[SUB]] 2244int32x4_t test_vmlsl_high_laneq_s16_0(int32x4_t a, int16x8_t b, int16x8_t v) { 2245 return vmlsl_high_laneq_s16(a, b, v, 0); 2246} 2247 2248// CHECK-LABEL: define <2 x i64> @test_vmlsl_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) #0 { 2249// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 2250// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer 2251// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 2252// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 2253// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2254// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 2255// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 2256// CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] 2257// CHECK: ret <2 x i64> [[SUB]] 2258int64x2_t test_vmlsl_high_laneq_s32_0(int64x2_t a, int32x4_t b, int32x4_t v) { 2259 return vmlsl_high_laneq_s32(a, b, v, 0); 2260} 2261 2262// CHECK-LABEL: define <4 x i32> @test_vmlal_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) #0 { 2263// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer 2264// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 2265// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 2266// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2267// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 2268// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 2269// CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] 2270// CHECK: ret <4 x i32> [[ADD]] 2271int32x4_t test_vmlal_lane_u16_0(int32x4_t a, int16x4_t b, int16x4_t v) { 2272 return vmlal_lane_u16(a, b, v, 0); 2273} 2274 2275// CHECK-LABEL: define <2 x i64> @test_vmlal_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) #0 { 2276// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer 2277// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 2278// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 2279// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2280// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 2281// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 2282// CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] 2283// CHECK: ret <2 x i64> [[ADD]] 2284int64x2_t test_vmlal_lane_u32_0(int64x2_t a, int32x2_t b, int32x2_t v) { 2285 return vmlal_lane_u32(a, b, v, 0); 2286} 2287 2288// CHECK-LABEL: define <4 x i32> @test_vmlal_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) #0 { 2289// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer 2290// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 2291// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 2292// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2293// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 2294// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 2295// CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] 2296// CHECK: ret <4 x i32> [[ADD]] 2297int32x4_t test_vmlal_laneq_u16_0(int32x4_t a, int16x4_t b, int16x8_t v) { 2298 return vmlal_laneq_u16(a, b, v, 0); 2299} 2300 2301// CHECK-LABEL: define <2 x i64> @test_vmlal_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) #0 { 2302// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer 2303// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 2304// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 2305// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2306// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 2307// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 2308// CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] 2309// CHECK: ret <2 x i64> [[ADD]] 2310int64x2_t test_vmlal_laneq_u32_0(int64x2_t a, int32x2_t b, int32x4_t v) { 2311 return vmlal_laneq_u32(a, b, v, 0); 2312} 2313 2314// CHECK-LABEL: define <4 x i32> @test_vmlal_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) #0 { 2315// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2316// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer 2317// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 2318// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 2319// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2320// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 2321// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 2322// CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] 2323// CHECK: ret <4 x i32> [[ADD]] 2324int32x4_t test_vmlal_high_lane_u16_0(int32x4_t a, int16x8_t b, int16x4_t v) { 2325 return vmlal_high_lane_u16(a, b, v, 0); 2326} 2327 2328// CHECK-LABEL: define <2 x i64> @test_vmlal_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) #0 { 2329// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 2330// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer 2331// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 2332// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 2333// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2334// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 2335// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 2336// CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] 2337// CHECK: ret <2 x i64> [[ADD]] 2338int64x2_t test_vmlal_high_lane_u32_0(int64x2_t a, int32x4_t b, int32x2_t v) { 2339 return vmlal_high_lane_u32(a, b, v, 0); 2340} 2341 2342// CHECK-LABEL: define <4 x i32> @test_vmlal_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) #0 { 2343// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2344// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer 2345// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 2346// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 2347// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2348// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 2349// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 2350// CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] 2351// CHECK: ret <4 x i32> [[ADD]] 2352int32x4_t test_vmlal_high_laneq_u16_0(int32x4_t a, int16x8_t b, int16x8_t v) { 2353 return vmlal_high_laneq_u16(a, b, v, 0); 2354} 2355 2356// CHECK-LABEL: define <2 x i64> @test_vmlal_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) #0 { 2357// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 2358// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer 2359// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 2360// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 2361// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2362// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 2363// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 2364// CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] 2365// CHECK: ret <2 x i64> [[ADD]] 2366int64x2_t test_vmlal_high_laneq_u32_0(int64x2_t a, int32x4_t b, int32x4_t v) { 2367 return vmlal_high_laneq_u32(a, b, v, 0); 2368} 2369 2370// CHECK-LABEL: define <4 x i32> @test_vmlsl_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) #0 { 2371// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer 2372// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 2373// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 2374// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2375// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 2376// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 2377// CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] 2378// CHECK: ret <4 x i32> [[SUB]] 2379int32x4_t test_vmlsl_lane_u16_0(int32x4_t a, int16x4_t b, int16x4_t v) { 2380 return vmlsl_lane_u16(a, b, v, 0); 2381} 2382 2383// CHECK-LABEL: define <2 x i64> @test_vmlsl_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) #0 { 2384// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer 2385// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 2386// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 2387// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2388// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 2389// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 2390// CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] 2391// CHECK: ret <2 x i64> [[SUB]] 2392int64x2_t test_vmlsl_lane_u32_0(int64x2_t a, int32x2_t b, int32x2_t v) { 2393 return vmlsl_lane_u32(a, b, v, 0); 2394} 2395 2396// CHECK-LABEL: define <4 x i32> @test_vmlsl_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) #0 { 2397// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer 2398// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 2399// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 2400// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2401// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 2402// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 2403// CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] 2404// CHECK: ret <4 x i32> [[SUB]] 2405int32x4_t test_vmlsl_laneq_u16_0(int32x4_t a, int16x4_t b, int16x8_t v) { 2406 return vmlsl_laneq_u16(a, b, v, 0); 2407} 2408 2409// CHECK-LABEL: define <2 x i64> @test_vmlsl_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) #0 { 2410// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer 2411// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 2412// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 2413// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2414// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 2415// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 2416// CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] 2417// CHECK: ret <2 x i64> [[SUB]] 2418int64x2_t test_vmlsl_laneq_u32_0(int64x2_t a, int32x2_t b, int32x4_t v) { 2419 return vmlsl_laneq_u32(a, b, v, 0); 2420} 2421 2422// CHECK-LABEL: define <4 x i32> @test_vmlsl_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) #0 { 2423// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2424// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer 2425// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 2426// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 2427// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2428// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 2429// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 2430// CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] 2431// CHECK: ret <4 x i32> [[SUB]] 2432int32x4_t test_vmlsl_high_lane_u16_0(int32x4_t a, int16x8_t b, int16x4_t v) { 2433 return vmlsl_high_lane_u16(a, b, v, 0); 2434} 2435 2436// CHECK-LABEL: define <2 x i64> @test_vmlsl_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) #0 { 2437// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 2438// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer 2439// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 2440// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 2441// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2442// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 2443// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 2444// CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] 2445// CHECK: ret <2 x i64> [[SUB]] 2446int64x2_t test_vmlsl_high_lane_u32_0(int64x2_t a, int32x4_t b, int32x2_t v) { 2447 return vmlsl_high_lane_u32(a, b, v, 0); 2448} 2449 2450// CHECK-LABEL: define <4 x i32> @test_vmlsl_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) #0 { 2451// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2452// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer 2453// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 2454// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 2455// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2456// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 2457// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 2458// CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] 2459// CHECK: ret <4 x i32> [[SUB]] 2460int32x4_t test_vmlsl_high_laneq_u16_0(int32x4_t a, int16x8_t b, int16x8_t v) { 2461 return vmlsl_high_laneq_u16(a, b, v, 0); 2462} 2463 2464// CHECK-LABEL: define <2 x i64> @test_vmlsl_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) #0 { 2465// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 2466// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer 2467// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 2468// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 2469// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2470// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 2471// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 2472// CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] 2473// CHECK: ret <2 x i64> [[SUB]] 2474int64x2_t test_vmlsl_high_laneq_u32_0(int64x2_t a, int32x4_t b, int32x4_t v) { 2475 return vmlsl_high_laneq_u32(a, b, v, 0); 2476} 2477 2478// CHECK-LABEL: define <4 x i32> @test_vmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) #0 { 2479// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer 2480// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 2481// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 2482// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2483// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 2484// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 2485// CHECK: ret <4 x i32> [[VMULL2_I]] 2486int32x4_t test_vmull_lane_s16_0(int16x4_t a, int16x4_t v) { 2487 return vmull_lane_s16(a, v, 0); 2488} 2489 2490// CHECK-LABEL: define <2 x i64> @test_vmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) #0 { 2491// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer 2492// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 2493// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 2494// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2495// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 2496// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 2497// CHECK: ret <2 x i64> [[VMULL2_I]] 2498int64x2_t test_vmull_lane_s32_0(int32x2_t a, int32x2_t v) { 2499 return vmull_lane_s32(a, v, 0); 2500} 2501 2502// CHECK-LABEL: define <4 x i32> @test_vmull_lane_u16_0(<4 x i16> %a, <4 x i16> %v) #0 { 2503// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer 2504// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 2505// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 2506// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2507// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 2508// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 2509// CHECK: ret <4 x i32> [[VMULL2_I]] 2510uint32x4_t test_vmull_lane_u16_0(uint16x4_t a, uint16x4_t v) { 2511 return vmull_lane_u16(a, v, 0); 2512} 2513 2514// CHECK-LABEL: define <2 x i64> @test_vmull_lane_u32_0(<2 x i32> %a, <2 x i32> %v) #0 { 2515// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer 2516// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 2517// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 2518// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2519// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 2520// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 2521// CHECK: ret <2 x i64> [[VMULL2_I]] 2522uint64x2_t test_vmull_lane_u32_0(uint32x2_t a, uint32x2_t v) { 2523 return vmull_lane_u32(a, v, 0); 2524} 2525 2526// CHECK-LABEL: define <4 x i32> @test_vmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) #0 { 2527// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2528// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer 2529// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 2530// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 2531// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2532// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 2533// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 2534// CHECK: ret <4 x i32> [[VMULL2_I]] 2535int32x4_t test_vmull_high_lane_s16_0(int16x8_t a, int16x4_t v) { 2536 return vmull_high_lane_s16(a, v, 0); 2537} 2538 2539// CHECK-LABEL: define <2 x i64> @test_vmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) #0 { 2540// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 2541// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer 2542// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 2543// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 2544// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2545// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 2546// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 2547// CHECK: ret <2 x i64> [[VMULL2_I]] 2548int64x2_t test_vmull_high_lane_s32_0(int32x4_t a, int32x2_t v) { 2549 return vmull_high_lane_s32(a, v, 0); 2550} 2551 2552// CHECK-LABEL: define <4 x i32> @test_vmull_high_lane_u16_0(<8 x i16> %a, <4 x i16> %v) #0 { 2553// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2554// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer 2555// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 2556// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 2557// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2558// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 2559// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 2560// CHECK: ret <4 x i32> [[VMULL2_I]] 2561uint32x4_t test_vmull_high_lane_u16_0(uint16x8_t a, uint16x4_t v) { 2562 return vmull_high_lane_u16(a, v, 0); 2563} 2564 2565// CHECK-LABEL: define <2 x i64> @test_vmull_high_lane_u32_0(<4 x i32> %a, <2 x i32> %v) #0 { 2566// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 2567// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer 2568// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 2569// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 2570// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2571// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 2572// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 2573// CHECK: ret <2 x i64> [[VMULL2_I]] 2574uint64x2_t test_vmull_high_lane_u32_0(uint32x4_t a, uint32x2_t v) { 2575 return vmull_high_lane_u32(a, v, 0); 2576} 2577 2578// CHECK-LABEL: define <4 x i32> @test_vmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) #0 { 2579// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer 2580// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 2581// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 2582// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2583// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 2584// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 2585// CHECK: ret <4 x i32> [[VMULL2_I]] 2586int32x4_t test_vmull_laneq_s16_0(int16x4_t a, int16x8_t v) { 2587 return vmull_laneq_s16(a, v, 0); 2588} 2589 2590// CHECK-LABEL: define <2 x i64> @test_vmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) #0 { 2591// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer 2592// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 2593// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 2594// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2595// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 2596// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 2597// CHECK: ret <2 x i64> [[VMULL2_I]] 2598int64x2_t test_vmull_laneq_s32_0(int32x2_t a, int32x4_t v) { 2599 return vmull_laneq_s32(a, v, 0); 2600} 2601 2602// CHECK-LABEL: define <4 x i32> @test_vmull_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) #0 { 2603// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer 2604// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 2605// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 2606// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2607// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 2608// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 2609// CHECK: ret <4 x i32> [[VMULL2_I]] 2610uint32x4_t test_vmull_laneq_u16_0(uint16x4_t a, uint16x8_t v) { 2611 return vmull_laneq_u16(a, v, 0); 2612} 2613 2614// CHECK-LABEL: define <2 x i64> @test_vmull_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) #0 { 2615// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer 2616// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 2617// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 2618// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2619// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 2620// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 2621// CHECK: ret <2 x i64> [[VMULL2_I]] 2622uint64x2_t test_vmull_laneq_u32_0(uint32x2_t a, uint32x4_t v) { 2623 return vmull_laneq_u32(a, v, 0); 2624} 2625 2626// CHECK-LABEL: define <4 x i32> @test_vmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) #0 { 2627// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2628// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer 2629// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 2630// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 2631// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2632// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 2633// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 2634// CHECK: ret <4 x i32> [[VMULL2_I]] 2635int32x4_t test_vmull_high_laneq_s16_0(int16x8_t a, int16x8_t v) { 2636 return vmull_high_laneq_s16(a, v, 0); 2637} 2638 2639// CHECK-LABEL: define <2 x i64> @test_vmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) #0 { 2640// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 2641// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer 2642// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 2643// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 2644// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2645// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 2646// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 2647// CHECK: ret <2 x i64> [[VMULL2_I]] 2648int64x2_t test_vmull_high_laneq_s32_0(int32x4_t a, int32x4_t v) { 2649 return vmull_high_laneq_s32(a, v, 0); 2650} 2651 2652// CHECK-LABEL: define <4 x i32> @test_vmull_high_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) #0 { 2653// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2654// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer 2655// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 2656// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 2657// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2658// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 2659// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #2 2660// CHECK: ret <4 x i32> [[VMULL2_I]] 2661uint32x4_t test_vmull_high_laneq_u16_0(uint16x8_t a, uint16x8_t v) { 2662 return vmull_high_laneq_u16(a, v, 0); 2663} 2664 2665// CHECK-LABEL: define <2 x i64> @test_vmull_high_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) #0 { 2666// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 2667// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer 2668// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 2669// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 2670// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2671// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 2672// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #2 2673// CHECK: ret <2 x i64> [[VMULL2_I]] 2674uint64x2_t test_vmull_high_laneq_u32_0(uint32x4_t a, uint32x4_t v) { 2675 return vmull_high_laneq_u32(a, v, 0); 2676} 2677 2678// CHECK-LABEL: define <4 x i32> @test_vqdmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) #0 { 2679// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer 2680// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 2681// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 2682// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 2683// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 2684// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 2685// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #2 2686// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 2687// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #2 2688// CHECK: ret <4 x i32> [[VQDMLAL_V3_I]] 2689int32x4_t test_vqdmlal_lane_s16_0(int32x4_t a, int16x4_t b, int16x4_t v) { 2690 return vqdmlal_lane_s16(a, b, v, 0); 2691} 2692 2693// CHECK-LABEL: define <2 x i64> @test_vqdmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) #0 { 2694// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer 2695// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 2696// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 2697// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 2698// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 2699// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 2700// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #2 2701// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 2702// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #2 2703// CHECK: ret <2 x i64> [[VQDMLAL_V3_I]] 2704int64x2_t test_vqdmlal_lane_s32_0(int64x2_t a, int32x2_t b, int32x2_t v) { 2705 return vqdmlal_lane_s32(a, b, v, 0); 2706} 2707 2708// CHECK-LABEL: define <4 x i32> @test_vqdmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) #0 { 2709// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2710// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer 2711// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 2712// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 2713// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 2714// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 2715// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 2716// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #2 2717// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 2718// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #2 2719// CHECK: ret <4 x i32> [[VQDMLAL_V3_I]] 2720int32x4_t test_vqdmlal_high_lane_s16_0(int32x4_t a, int16x8_t b, int16x4_t v) { 2721 return vqdmlal_high_lane_s16(a, b, v, 0); 2722} 2723 2724// CHECK-LABEL: define <2 x i64> @test_vqdmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) #0 { 2725// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 2726// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer 2727// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 2728// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 2729// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 2730// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 2731// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 2732// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #2 2733// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 2734// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #2 2735// CHECK: ret <2 x i64> [[VQDMLAL_V3_I]] 2736int64x2_t test_vqdmlal_high_lane_s32_0(int64x2_t a, int32x4_t b, int32x2_t v) { 2737 return vqdmlal_high_lane_s32(a, b, v, 0); 2738} 2739 2740// CHECK-LABEL: define <4 x i32> @test_vqdmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) #0 { 2741// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer 2742// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 2743// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 2744// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 2745// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 2746// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 2747// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #2 2748// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 2749// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #2 2750// CHECK: ret <4 x i32> [[VQDMLSL_V3_I]] 2751int32x4_t test_vqdmlsl_lane_s16_0(int32x4_t a, int16x4_t b, int16x4_t v) { 2752 return vqdmlsl_lane_s16(a, b, v, 0); 2753} 2754 2755// CHECK-LABEL: define <2 x i64> @test_vqdmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) #0 { 2756// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer 2757// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 2758// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 2759// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 2760// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 2761// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 2762// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #2 2763// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 2764// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #2 2765// CHECK: ret <2 x i64> [[VQDMLSL_V3_I]] 2766int64x2_t test_vqdmlsl_lane_s32_0(int64x2_t a, int32x2_t b, int32x2_t v) { 2767 return vqdmlsl_lane_s32(a, b, v, 0); 2768} 2769 2770// CHECK-LABEL: define <4 x i32> @test_vqdmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) #0 { 2771// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2772// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer 2773// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 2774// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 2775// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 2776// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 2777// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 2778// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #2 2779// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 2780// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #2 2781// CHECK: ret <4 x i32> [[VQDMLSL_V3_I]] 2782int32x4_t test_vqdmlsl_high_lane_s16_0(int32x4_t a, int16x8_t b, int16x4_t v) { 2783 return vqdmlsl_high_lane_s16(a, b, v, 0); 2784} 2785 2786// CHECK-LABEL: define <2 x i64> @test_vqdmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) #0 { 2787// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 2788// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer 2789// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 2790// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 2791// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 2792// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 2793// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 2794// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #2 2795// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 2796// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #2 2797// CHECK: ret <2 x i64> [[VQDMLSL_V3_I]] 2798int64x2_t test_vqdmlsl_high_lane_s32_0(int64x2_t a, int32x4_t b, int32x2_t v) { 2799 return vqdmlsl_high_lane_s32(a, b, v, 0); 2800} 2801 2802// CHECK-LABEL: define <4 x i32> @test_vqdmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) #0 { 2803// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer 2804// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 2805// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 2806// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2807// CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 2808// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]]) #2 2809// CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> 2810// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32> 2811// CHECK: ret <4 x i32> [[TMP2]] 2812int32x4_t test_vqdmull_lane_s16_0(int16x4_t a, int16x4_t v) { 2813 return vqdmull_lane_s16(a, v, 0); 2814} 2815 2816// CHECK-LABEL: define <2 x i64> @test_vqdmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) #0 { 2817// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer 2818// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 2819// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 2820// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2821// CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 2822// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]]) #2 2823// CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> 2824// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64> 2825// CHECK: ret <2 x i64> [[TMP2]] 2826int64x2_t test_vqdmull_lane_s32_0(int32x2_t a, int32x2_t v) { 2827 return vqdmull_lane_s32(a, v, 0); 2828} 2829 2830// CHECK-LABEL: define <4 x i32> @test_vqdmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) #0 { 2831// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer 2832// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 2833// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 2834// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2835// CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 2836// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]]) #2 2837// CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> 2838// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32> 2839// CHECK: ret <4 x i32> [[TMP2]] 2840int32x4_t test_vqdmull_laneq_s16_0(int16x4_t a, int16x8_t v) { 2841 return vqdmull_laneq_s16(a, v, 0); 2842} 2843 2844// CHECK-LABEL: define <2 x i64> @test_vqdmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) #0 { 2845// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer 2846// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 2847// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 2848// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2849// CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 2850// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]]) #2 2851// CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> 2852// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64> 2853// CHECK: ret <2 x i64> [[TMP2]] 2854int64x2_t test_vqdmull_laneq_s32_0(int32x2_t a, int32x4_t v) { 2855 return vqdmull_laneq_s32(a, v, 0); 2856} 2857 2858// CHECK-LABEL: define <4 x i32> @test_vqdmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) #0 { 2859// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2860// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer 2861// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 2862// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 2863// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2864// CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 2865// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]]) #2 2866// CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> 2867// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32> 2868// CHECK: ret <4 x i32> [[TMP2]] 2869int32x4_t test_vqdmull_high_lane_s16_0(int16x8_t a, int16x4_t v) { 2870 return vqdmull_high_lane_s16(a, v, 0); 2871} 2872 2873// CHECK-LABEL: define <2 x i64> @test_vqdmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) #0 { 2874// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 2875// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer 2876// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 2877// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 2878// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2879// CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 2880// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]]) #2 2881// CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> 2882// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64> 2883// CHECK: ret <2 x i64> [[TMP2]] 2884int64x2_t test_vqdmull_high_lane_s32_0(int32x4_t a, int32x2_t v) { 2885 return vqdmull_high_lane_s32(a, v, 0); 2886} 2887 2888// CHECK-LABEL: define <4 x i32> @test_vqdmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) #0 { 2889// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2890// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer 2891// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 2892// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 2893// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2894// CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 2895// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]]) #2 2896// CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> 2897// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32> 2898// CHECK: ret <4 x i32> [[TMP2]] 2899int32x4_t test_vqdmull_high_laneq_s16_0(int16x8_t a, int16x8_t v) { 2900 return vqdmull_high_laneq_s16(a, v, 0); 2901} 2902 2903// CHECK-LABEL: define <2 x i64> @test_vqdmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) #0 { 2904// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 2905// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer 2906// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 2907// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 2908// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2909// CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 2910// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]]) #2 2911// CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> 2912// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64> 2913// CHECK: ret <2 x i64> [[TMP2]] 2914int64x2_t test_vqdmull_high_laneq_s32_0(int32x4_t a, int32x4_t v) { 2915 return vqdmull_high_laneq_s32(a, v, 0); 2916} 2917 2918// CHECK-LABEL: define <4 x i16> @test_vqdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) #0 { 2919// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer 2920// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 2921// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 2922// CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2923// CHECK: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 2924// CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[VQDMULH_V_I]], <4 x i16> [[VQDMULH_V1_I]]) #2 2925// CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8> 2926// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <4 x i16> 2927// CHECK: ret <4 x i16> [[TMP2]] 2928int16x4_t test_vqdmulh_lane_s16_0(int16x4_t a, int16x4_t v) { 2929 return vqdmulh_lane_s16(a, v, 0); 2930} 2931 2932// CHECK-LABEL: define <8 x i16> @test_vqdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) #0 { 2933// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> zeroinitializer 2934// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 2935// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8> 2936// CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 2937// CHECK: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 2938// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> [[VQDMULHQ_V_I]], <8 x i16> [[VQDMULHQ_V1_I]]) #2 2939// CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8> 2940// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <8 x i16> 2941// CHECK: ret <8 x i16> [[TMP2]] 2942int16x8_t test_vqdmulhq_lane_s16_0(int16x8_t a, int16x4_t v) { 2943 return vqdmulhq_lane_s16(a, v, 0); 2944} 2945 2946// CHECK-LABEL: define <2 x i32> @test_vqdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) #0 { 2947// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer 2948// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 2949// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 2950// CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2951// CHECK: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 2952// CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> [[VQDMULH_V_I]], <2 x i32> [[VQDMULH_V1_I]]) #2 2953// CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8> 2954// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <2 x i32> 2955// CHECK: ret <2 x i32> [[TMP2]] 2956int32x2_t test_vqdmulh_lane_s32_0(int32x2_t a, int32x2_t v) { 2957 return vqdmulh_lane_s32(a, v, 0); 2958} 2959 2960// CHECK-LABEL: define <4 x i32> @test_vqdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) #0 { 2961// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> zeroinitializer 2962// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 2963// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8> 2964// CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 2965// CHECK: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 2966// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> [[VQDMULHQ_V_I]], <4 x i32> [[VQDMULHQ_V1_I]]) #2 2967// CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8> 2968// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <4 x i32> 2969// CHECK: ret <4 x i32> [[TMP2]] 2970int32x4_t test_vqdmulhq_lane_s32_0(int32x4_t a, int32x2_t v) { 2971 return vqdmulhq_lane_s32(a, v, 0); 2972} 2973 2974// CHECK-LABEL: define <4 x i16> @test_vqrdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) #0 { 2975// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer 2976// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 2977// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 2978// CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2979// CHECK: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 2980// CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[VQRDMULH_V_I]], <4 x i16> [[VQRDMULH_V1_I]]) #2 2981// CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8> 2982// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <4 x i16> 2983// CHECK: ret <4 x i16> [[TMP2]] 2984int16x4_t test_vqrdmulh_lane_s16_0(int16x4_t a, int16x4_t v) { 2985 return vqrdmulh_lane_s16(a, v, 0); 2986} 2987 2988// CHECK-LABEL: define <8 x i16> @test_vqrdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) #0 { 2989// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> zeroinitializer 2990// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 2991// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8> 2992// CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 2993// CHECK: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 2994// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> [[VQRDMULHQ_V_I]], <8 x i16> [[VQRDMULHQ_V1_I]]) #2 2995// CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8> 2996// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <8 x i16> 2997// CHECK: ret <8 x i16> [[TMP2]] 2998int16x8_t test_vqrdmulhq_lane_s16_0(int16x8_t a, int16x4_t v) { 2999 return vqrdmulhq_lane_s16(a, v, 0); 3000} 3001 3002// CHECK-LABEL: define <2 x i32> @test_vqrdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) #0 { 3003// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer 3004// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 3005// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 3006// CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3007// CHECK: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3008// CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> [[VQRDMULH_V_I]], <2 x i32> [[VQRDMULH_V1_I]]) #2 3009// CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8> 3010// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <2 x i32> 3011// CHECK: ret <2 x i32> [[TMP2]] 3012int32x2_t test_vqrdmulh_lane_s32_0(int32x2_t a, int32x2_t v) { 3013 return vqrdmulh_lane_s32(a, v, 0); 3014} 3015 3016// CHECK-LABEL: define <4 x i32> @test_vqrdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) #0 { 3017// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> zeroinitializer 3018// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 3019// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8> 3020// CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 3021// CHECK: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 3022// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> [[VQRDMULHQ_V_I]], <4 x i32> [[VQRDMULHQ_V1_I]]) #2 3023// CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8> 3024// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <4 x i32> 3025// CHECK: ret <4 x i32> [[TMP2]] 3026int32x4_t test_vqrdmulhq_lane_s32_0(int32x4_t a, int32x2_t v) { 3027 return vqrdmulhq_lane_s32(a, v, 0); 3028} 3029 3030// CHECK-LABEL: define <2 x float> @test_vmul_lane_f32_0(<2 x float> %a, <2 x float> %v) #0 { 3031// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <2 x i32> zeroinitializer 3032// CHECK: [[MUL:%.*]] = fmul <2 x float> %a, [[SHUFFLE]] 3033// CHECK: ret <2 x float> [[MUL]] 3034float32x2_t test_vmul_lane_f32_0(float32x2_t a, float32x2_t v) { 3035 return vmul_lane_f32(a, v, 0); 3036} 3037 3038// CHECK-LABEL: define <4 x float> @test_vmulq_lane_f32_0(<4 x float> %a, <2 x float> %v) #0 { 3039// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> zeroinitializer 3040// CHECK: [[MUL:%.*]] = fmul <4 x float> %a, [[SHUFFLE]] 3041// CHECK: ret <4 x float> [[MUL]] 3042float32x4_t test_vmulq_lane_f32_0(float32x4_t a, float32x2_t v) { 3043 return vmulq_lane_f32(a, v, 0); 3044} 3045 3046// CHECK-LABEL: define <2 x float> @test_vmul_laneq_f32_0(<2 x float> %a, <4 x float> %v) #0 { 3047// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> zeroinitializer 3048// CHECK: [[MUL:%.*]] = fmul <2 x float> %a, [[SHUFFLE]] 3049// CHECK: ret <2 x float> [[MUL]] 3050float32x2_t test_vmul_laneq_f32_0(float32x2_t a, float32x4_t v) { 3051 return vmul_laneq_f32(a, v, 0); 3052} 3053 3054// CHECK-LABEL: define <1 x double> @test_vmul_laneq_f64_0(<1 x double> %a, <2 x double> %v) #0 { 3055// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 3056// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v to <16 x i8> 3057// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to double 3058// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 3059// CHECK: [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP3]], i32 0 3060// CHECK: [[TMP4:%.*]] = fmul double [[TMP2]], [[EXTRACT]] 3061// CHECK: [[TMP5:%.*]] = bitcast double [[TMP4]] to <1 x double> 3062// CHECK: ret <1 x double> [[TMP5]] 3063float64x1_t test_vmul_laneq_f64_0(float64x1_t a, float64x2_t v) { 3064 return vmul_laneq_f64(a, v, 0); 3065} 3066 3067// CHECK-LABEL: define <4 x float> @test_vmulq_laneq_f32_0(<4 x float> %a, <4 x float> %v) #0 { 3068// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> zeroinitializer 3069// CHECK: [[MUL:%.*]] = fmul <4 x float> %a, [[SHUFFLE]] 3070// CHECK: ret <4 x float> [[MUL]] 3071float32x4_t test_vmulq_laneq_f32_0(float32x4_t a, float32x4_t v) { 3072 return vmulq_laneq_f32(a, v, 0); 3073} 3074 3075// CHECK-LABEL: define <2 x double> @test_vmulq_laneq_f64_0(<2 x double> %a, <2 x double> %v) #0 { 3076// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x double> %v, <2 x double> %v, <2 x i32> zeroinitializer 3077// CHECK: [[MUL:%.*]] = fmul <2 x double> %a, [[SHUFFLE]] 3078// CHECK: ret <2 x double> [[MUL]] 3079float64x2_t test_vmulq_laneq_f64_0(float64x2_t a, float64x2_t v) { 3080 return vmulq_laneq_f64(a, v, 0); 3081} 3082 3083// CHECK-LABEL: define <2 x float> @test_vmulx_lane_f32_0(<2 x float> %a, <2 x float> %v) #0 { 3084// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <2 x i32> zeroinitializer 3085// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 3086// CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SHUFFLE]] to <8 x i8> 3087// CHECK: [[VMULX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 3088// CHECK: [[VMULX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 3089// CHECK: [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> [[VMULX_I]], <2 x float> [[VMULX1_I]]) #2 3090// CHECK: ret <2 x float> [[VMULX2_I]] 3091float32x2_t test_vmulx_lane_f32_0(float32x2_t a, float32x2_t v) { 3092 return vmulx_lane_f32(a, v, 0); 3093} 3094 3095// CHECK-LABEL: define <4 x float> @test_vmulxq_lane_f32_0(<4 x float> %a, <2 x float> %v) #0 { 3096// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> zeroinitializer 3097// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 3098// CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SHUFFLE]] to <16 x i8> 3099// CHECK: [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 3100// CHECK: [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 3101// CHECK: [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> [[VMULX_I]], <4 x float> [[VMULX1_I]]) #2 3102// CHECK: ret <4 x float> [[VMULX2_I]] 3103float32x4_t test_vmulxq_lane_f32_0(float32x4_t a, float32x2_t v) { 3104 return vmulxq_lane_f32(a, v, 0); 3105} 3106 3107// CHECK-LABEL: define <2 x double> @test_vmulxq_lane_f64_0(<2 x double> %a, <1 x double> %v) #0 { 3108// CHECK: [[SHUFFLE:%.*]] = shufflevector <1 x double> %v, <1 x double> %v, <2 x i32> zeroinitializer 3109// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 3110// CHECK: [[TMP1:%.*]] = bitcast <2 x double> [[SHUFFLE]] to <16 x i8> 3111// CHECK: [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 3112// CHECK: [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 3113// CHECK: [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> [[VMULX_I]], <2 x double> [[VMULX1_I]]) #2 3114// CHECK: ret <2 x double> [[VMULX2_I]] 3115float64x2_t test_vmulxq_lane_f64_0(float64x2_t a, float64x1_t v) { 3116 return vmulxq_lane_f64(a, v, 0); 3117} 3118 3119// CHECK-LABEL: define <2 x float> @test_vmulx_laneq_f32_0(<2 x float> %a, <4 x float> %v) #0 { 3120// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> zeroinitializer 3121// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 3122// CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SHUFFLE]] to <8 x i8> 3123// CHECK: [[VMULX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 3124// CHECK: [[VMULX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 3125// CHECK: [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> [[VMULX_I]], <2 x float> [[VMULX1_I]]) #2 3126// CHECK: ret <2 x float> [[VMULX2_I]] 3127float32x2_t test_vmulx_laneq_f32_0(float32x2_t a, float32x4_t v) { 3128 return vmulx_laneq_f32(a, v, 0); 3129} 3130 3131// CHECK-LABEL: define <4 x float> @test_vmulxq_laneq_f32_0(<4 x float> %a, <4 x float> %v) #0 { 3132// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> zeroinitializer 3133// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 3134// CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SHUFFLE]] to <16 x i8> 3135// CHECK: [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 3136// CHECK: [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 3137// CHECK: [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> [[VMULX_I]], <4 x float> [[VMULX1_I]]) #2 3138// CHECK: ret <4 x float> [[VMULX2_I]] 3139float32x4_t test_vmulxq_laneq_f32_0(float32x4_t a, float32x4_t v) { 3140 return vmulxq_laneq_f32(a, v, 0); 3141} 3142 3143// CHECK-LABEL: define <2 x double> @test_vmulxq_laneq_f64_0(<2 x double> %a, <2 x double> %v) #0 { 3144// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x double> %v, <2 x double> %v, <2 x i32> zeroinitializer 3145// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 3146// CHECK: [[TMP1:%.*]] = bitcast <2 x double> [[SHUFFLE]] to <16 x i8> 3147// CHECK: [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 3148// CHECK: [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 3149// CHECK: [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> [[VMULX_I]], <2 x double> [[VMULX1_I]]) #2 3150// CHECK: ret <2 x double> [[VMULX2_I]] 3151float64x2_t test_vmulxq_laneq_f64_0(float64x2_t a, float64x2_t v) { 3152 return vmulxq_laneq_f64(a, v, 0); 3153} 3154 3155// CHECK-LABEL: define <4 x i32> @test_vmull_high_n_s16(<8 x i16> %a, i16 %b) #0 { 3156// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 3157// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 3158// CHECK: [[VECINIT_I_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0 3159// CHECK: [[VECINIT1_I_I:%.*]] = insertelement <4 x i16> [[VECINIT_I_I]], i16 %b, i32 1 3160// CHECK: [[VECINIT2_I_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I_I]], i16 %b, i32 2 3161// CHECK: [[VECINIT3_I_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I_I]], i16 %b, i32 3 3162// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I_I]] to <8 x i8> 3163// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3164// CHECK: [[VMULL4_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3165// CHECK: [[VMULL5_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL4_I_I]]) #2 3166// CHECK: ret <4 x i32> [[VMULL5_I_I]] 3167int32x4_t test_vmull_high_n_s16(int16x8_t a, int16_t b) { 3168 return vmull_high_n_s16(a, b); 3169} 3170 3171// CHECK-LABEL: define <2 x i64> @test_vmull_high_n_s32(<4 x i32> %a, i32 %b) #0 { 3172// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 3173// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 3174// CHECK: [[VECINIT_I_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 3175// CHECK: [[VECINIT1_I_I:%.*]] = insertelement <2 x i32> [[VECINIT_I_I]], i32 %b, i32 1 3176// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I_I]] to <8 x i8> 3177// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3178// CHECK: [[VMULL2_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3179// CHECK: [[VMULL3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL2_I_I]]) #2 3180// CHECK: ret <2 x i64> [[VMULL3_I_I]] 3181int64x2_t test_vmull_high_n_s32(int32x4_t a, int32_t b) { 3182 return vmull_high_n_s32(a, b); 3183} 3184 3185// CHECK-LABEL: define <4 x i32> @test_vmull_high_n_u16(<8 x i16> %a, i16 %b) #0 { 3186// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 3187// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 3188// CHECK: [[VECINIT_I_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0 3189// CHECK: [[VECINIT1_I_I:%.*]] = insertelement <4 x i16> [[VECINIT_I_I]], i16 %b, i32 1 3190// CHECK: [[VECINIT2_I_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I_I]], i16 %b, i32 2 3191// CHECK: [[VECINIT3_I_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I_I]], i16 %b, i32 3 3192// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I_I]] to <8 x i8> 3193// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3194// CHECK: [[VMULL4_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3195// CHECK: [[VMULL5_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL4_I_I]]) #2 3196// CHECK: ret <4 x i32> [[VMULL5_I_I]] 3197uint32x4_t test_vmull_high_n_u16(uint16x8_t a, uint16_t b) { 3198 return vmull_high_n_u16(a, b); 3199} 3200 3201// CHECK-LABEL: define <2 x i64> @test_vmull_high_n_u32(<4 x i32> %a, i32 %b) #0 { 3202// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 3203// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 3204// CHECK: [[VECINIT_I_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 3205// CHECK: [[VECINIT1_I_I:%.*]] = insertelement <2 x i32> [[VECINIT_I_I]], i32 %b, i32 1 3206// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I_I]] to <8 x i8> 3207// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3208// CHECK: [[VMULL2_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3209// CHECK: [[VMULL3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL2_I_I]]) #2 3210// CHECK: ret <2 x i64> [[VMULL3_I_I]] 3211uint64x2_t test_vmull_high_n_u32(uint32x4_t a, uint32_t b) { 3212 return vmull_high_n_u32(a, b); 3213} 3214 3215// CHECK-LABEL: define <4 x i32> @test_vqdmull_high_n_s16(<8 x i16> %a, i16 %b) #0 { 3216// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 3217// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 3218// CHECK: [[VECINIT_I_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0 3219// CHECK: [[VECINIT1_I_I:%.*]] = insertelement <4 x i16> [[VECINIT_I_I]], i16 %b, i32 1 3220// CHECK: [[VECINIT2_I_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I_I]], i16 %b, i32 2 3221// CHECK: [[VECINIT3_I_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I_I]], i16 %b, i32 3 3222// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I_I]] to <8 x i8> 3223// CHECK: [[VQDMULL_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3224// CHECK: [[VQDMULL_V4_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3225// CHECK: [[VQDMULL_V5_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I_I]], <4 x i16> [[VQDMULL_V4_I_I]]) #2 3226// CHECK: [[VQDMULL_V6_I_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V5_I_I]] to <16 x i8> 3227// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V6_I_I]] to <4 x i32> 3228// CHECK: ret <4 x i32> [[TMP2]] 3229int32x4_t test_vqdmull_high_n_s16(int16x8_t a, int16_t b) { 3230 return vqdmull_high_n_s16(a, b); 3231} 3232 3233// CHECK-LABEL: define <2 x i64> @test_vqdmull_high_n_s32(<4 x i32> %a, i32 %b) #0 { 3234// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 3235// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 3236// CHECK: [[VECINIT_I_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 3237// CHECK: [[VECINIT1_I_I:%.*]] = insertelement <2 x i32> [[VECINIT_I_I]], i32 %b, i32 1 3238// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I_I]] to <8 x i8> 3239// CHECK: [[VQDMULL_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3240// CHECK: [[VQDMULL_V2_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3241// CHECK: [[VQDMULL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I_I]], <2 x i32> [[VQDMULL_V2_I_I]]) #2 3242// CHECK: [[VQDMULL_V4_I_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V3_I_I]] to <16 x i8> 3243// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V4_I_I]] to <2 x i64> 3244// CHECK: ret <2 x i64> [[TMP2]] 3245int64x2_t test_vqdmull_high_n_s32(int32x4_t a, int32_t b) { 3246 return vqdmull_high_n_s32(a, b); 3247} 3248 3249// CHECK-LABEL: define <4 x i32> @test_vmlal_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) #0 { 3250// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 3251// CHECK: [[VECINIT_I_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 3252// CHECK: [[VECINIT1_I_I:%.*]] = insertelement <4 x i16> [[VECINIT_I_I]], i16 %c, i32 1 3253// CHECK: [[VECINIT2_I_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I_I]], i16 %c, i32 2 3254// CHECK: [[VECINIT3_I_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I_I]], i16 %c, i32 3 3255// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 3256// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I_I]] to <8 x i8> 3257// CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3258// CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3259// CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I_I]], <4 x i16> [[VMULL1_I_I_I]]) #2 3260// CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I_I]] 3261// CHECK: ret <4 x i32> [[ADD_I_I]] 3262int32x4_t test_vmlal_high_n_s16(int32x4_t a, int16x8_t b, int16_t c) { 3263 return vmlal_high_n_s16(a, b, c); 3264} 3265 3266// CHECK-LABEL: define <2 x i64> @test_vmlal_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) #0 { 3267// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 3268// CHECK: [[VECINIT_I_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 3269// CHECK: [[VECINIT1_I_I:%.*]] = insertelement <2 x i32> [[VECINIT_I_I]], i32 %c, i32 1 3270// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 3271// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I_I]] to <8 x i8> 3272// CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3273// CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3274// CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I_I]], <2 x i32> [[VMULL1_I_I_I]]) #2 3275// CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I_I]] 3276// CHECK: ret <2 x i64> [[ADD_I_I]] 3277int64x2_t test_vmlal_high_n_s32(int64x2_t a, int32x4_t b, int32_t c) { 3278 return vmlal_high_n_s32(a, b, c); 3279} 3280 3281// CHECK-LABEL: define <4 x i32> @test_vmlal_high_n_u16(<4 x i32> %a, <8 x i16> %b, i16 %c) #0 { 3282// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 3283// CHECK: [[VECINIT_I_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 3284// CHECK: [[VECINIT1_I_I:%.*]] = insertelement <4 x i16> [[VECINIT_I_I]], i16 %c, i32 1 3285// CHECK: [[VECINIT2_I_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I_I]], i16 %c, i32 2 3286// CHECK: [[VECINIT3_I_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I_I]], i16 %c, i32 3 3287// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 3288// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I_I]] to <8 x i8> 3289// CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3290// CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3291// CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I_I]], <4 x i16> [[VMULL1_I_I_I]]) #2 3292// CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I_I]] 3293// CHECK: ret <4 x i32> [[ADD_I_I]] 3294uint32x4_t test_vmlal_high_n_u16(uint32x4_t a, uint16x8_t b, uint16_t c) { 3295 return vmlal_high_n_u16(a, b, c); 3296} 3297 3298// CHECK-LABEL: define <2 x i64> @test_vmlal_high_n_u32(<2 x i64> %a, <4 x i32> %b, i32 %c) #0 { 3299// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 3300// CHECK: [[VECINIT_I_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 3301// CHECK: [[VECINIT1_I_I:%.*]] = insertelement <2 x i32> [[VECINIT_I_I]], i32 %c, i32 1 3302// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 3303// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I_I]] to <8 x i8> 3304// CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3305// CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3306// CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I_I]], <2 x i32> [[VMULL1_I_I_I]]) #2 3307// CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I_I]] 3308// CHECK: ret <2 x i64> [[ADD_I_I]] 3309uint64x2_t test_vmlal_high_n_u32(uint64x2_t a, uint32x4_t b, uint32_t c) { 3310 return vmlal_high_n_u32(a, b, c); 3311} 3312 3313// CHECK-LABEL: define <4 x i32> @test_vqdmlal_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) #0 { 3314// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 3315// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 3316// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 3317// CHECK: [[VECINIT_I_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 3318// CHECK: [[VECINIT1_I_I:%.*]] = insertelement <4 x i16> [[VECINIT_I_I]], i16 %c, i32 1 3319// CHECK: [[VECINIT2_I_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I_I]], i16 %c, i32 2 3320// CHECK: [[VECINIT3_I_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I_I]], i16 %c, i32 3 3321// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I_I]] to <8 x i8> 3322// CHECK: [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3323// CHECK: [[VQDMLAL4_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 3324// CHECK: [[VQDMLAL5_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I_I]], <4 x i16> [[VQDMLAL4_I_I]]) #2 3325// CHECK: [[VQDMLAL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 3326// CHECK: [[VQDMLAL_V6_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I_I]], <4 x i32> [[VQDMLAL5_I_I]]) #2 3327// CHECK: ret <4 x i32> [[VQDMLAL_V6_I_I]] 3328int32x4_t test_vqdmlal_high_n_s16(int32x4_t a, int16x8_t b, int16_t c) { 3329 return vqdmlal_high_n_s16(a, b, c); 3330} 3331 3332// CHECK-LABEL: define <2 x i64> @test_vqdmlal_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) #0 { 3333// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 3334// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 3335// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 3336// CHECK: [[VECINIT_I_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 3337// CHECK: [[VECINIT1_I_I:%.*]] = insertelement <2 x i32> [[VECINIT_I_I]], i32 %c, i32 1 3338// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I_I]] to <8 x i8> 3339// CHECK: [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3340// CHECK: [[VQDMLAL2_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 3341// CHECK: [[VQDMLAL3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I_I]], <2 x i32> [[VQDMLAL2_I_I]]) #2 3342// CHECK: [[VQDMLAL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 3343// CHECK: [[VQDMLAL_V4_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I_I]], <2 x i64> [[VQDMLAL3_I_I]]) #2 3344// CHECK: ret <2 x i64> [[VQDMLAL_V4_I_I]] 3345int64x2_t test_vqdmlal_high_n_s32(int64x2_t a, int32x4_t b, int32_t c) { 3346 return vqdmlal_high_n_s32(a, b, c); 3347} 3348 3349// CHECK-LABEL: define <4 x i32> @test_vmlsl_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) #0 { 3350// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 3351// CHECK: [[VECINIT_I_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 3352// CHECK: [[VECINIT1_I_I:%.*]] = insertelement <4 x i16> [[VECINIT_I_I]], i16 %c, i32 1 3353// CHECK: [[VECINIT2_I_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I_I]], i16 %c, i32 2 3354// CHECK: [[VECINIT3_I_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I_I]], i16 %c, i32 3 3355// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 3356// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I_I]] to <8 x i8> 3357// CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3358// CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3359// CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I_I]], <4 x i16> [[VMULL1_I_I_I]]) #2 3360// CHECK: [[SUB_I_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I_I]] 3361// CHECK: ret <4 x i32> [[SUB_I_I]] 3362int32x4_t test_vmlsl_high_n_s16(int32x4_t a, int16x8_t b, int16_t c) { 3363 return vmlsl_high_n_s16(a, b, c); 3364} 3365 3366// CHECK-LABEL: define <2 x i64> @test_vmlsl_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) #0 { 3367// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 3368// CHECK: [[VECINIT_I_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 3369// CHECK: [[VECINIT1_I_I:%.*]] = insertelement <2 x i32> [[VECINIT_I_I]], i32 %c, i32 1 3370// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 3371// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I_I]] to <8 x i8> 3372// CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3373// CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3374// CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I_I]], <2 x i32> [[VMULL1_I_I_I]]) #2 3375// CHECK: [[SUB_I_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I_I]] 3376// CHECK: ret <2 x i64> [[SUB_I_I]] 3377int64x2_t test_vmlsl_high_n_s32(int64x2_t a, int32x4_t b, int32_t c) { 3378 return vmlsl_high_n_s32(a, b, c); 3379} 3380 3381// CHECK-LABEL: define <4 x i32> @test_vmlsl_high_n_u16(<4 x i32> %a, <8 x i16> %b, i16 %c) #0 { 3382// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 3383// CHECK: [[VECINIT_I_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 3384// CHECK: [[VECINIT1_I_I:%.*]] = insertelement <4 x i16> [[VECINIT_I_I]], i16 %c, i32 1 3385// CHECK: [[VECINIT2_I_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I_I]], i16 %c, i32 2 3386// CHECK: [[VECINIT3_I_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I_I]], i16 %c, i32 3 3387// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 3388// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I_I]] to <8 x i8> 3389// CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3390// CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3391// CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I_I]], <4 x i16> [[VMULL1_I_I_I]]) #2 3392// CHECK: [[SUB_I_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I_I]] 3393// CHECK: ret <4 x i32> [[SUB_I_I]] 3394uint32x4_t test_vmlsl_high_n_u16(uint32x4_t a, uint16x8_t b, uint16_t c) { 3395 return vmlsl_high_n_u16(a, b, c); 3396} 3397 3398// CHECK-LABEL: define <2 x i64> @test_vmlsl_high_n_u32(<2 x i64> %a, <4 x i32> %b, i32 %c) #0 { 3399// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 3400// CHECK: [[VECINIT_I_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 3401// CHECK: [[VECINIT1_I_I:%.*]] = insertelement <2 x i32> [[VECINIT_I_I]], i32 %c, i32 1 3402// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 3403// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I_I]] to <8 x i8> 3404// CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3405// CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3406// CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I_I]], <2 x i32> [[VMULL1_I_I_I]]) #2 3407// CHECK: [[SUB_I_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I_I]] 3408// CHECK: ret <2 x i64> [[SUB_I_I]] 3409uint64x2_t test_vmlsl_high_n_u32(uint64x2_t a, uint32x4_t b, uint32_t c) { 3410 return vmlsl_high_n_u32(a, b, c); 3411} 3412 3413// CHECK-LABEL: define <4 x i32> @test_vqdmlsl_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) #0 { 3414// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 3415// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 3416// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 3417// CHECK: [[VECINIT_I_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 3418// CHECK: [[VECINIT1_I_I:%.*]] = insertelement <4 x i16> [[VECINIT_I_I]], i16 %c, i32 1 3419// CHECK: [[VECINIT2_I_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I_I]], i16 %c, i32 2 3420// CHECK: [[VECINIT3_I_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I_I]], i16 %c, i32 3 3421// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I_I]] to <8 x i8> 3422// CHECK: [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3423// CHECK: [[VQDMLAL4_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 3424// CHECK: [[VQDMLAL5_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I_I]], <4 x i16> [[VQDMLAL4_I_I]]) #2 3425// CHECK: [[VQDMLSL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 3426// CHECK: [[VQDMLSL_V6_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I_I]], <4 x i32> [[VQDMLAL5_I_I]]) #2 3427// CHECK: ret <4 x i32> [[VQDMLSL_V6_I_I]] 3428int32x4_t test_vqdmlsl_high_n_s16(int32x4_t a, int16x8_t b, int16_t c) { 3429 return vqdmlsl_high_n_s16(a, b, c); 3430} 3431 3432// CHECK-LABEL: define <2 x i64> @test_vqdmlsl_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) #0 { 3433// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 3434// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 3435// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 3436// CHECK: [[VECINIT_I_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 3437// CHECK: [[VECINIT1_I_I:%.*]] = insertelement <2 x i32> [[VECINIT_I_I]], i32 %c, i32 1 3438// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I_I]] to <8 x i8> 3439// CHECK: [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3440// CHECK: [[VQDMLAL2_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 3441// CHECK: [[VQDMLAL3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I_I]], <2 x i32> [[VQDMLAL2_I_I]]) #2 3442// CHECK: [[VQDMLSL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 3443// CHECK: [[VQDMLSL_V4_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I_I]], <2 x i64> [[VQDMLAL3_I_I]]) #2 3444// CHECK: ret <2 x i64> [[VQDMLSL_V4_I_I]] 3445int64x2_t test_vqdmlsl_high_n_s32(int64x2_t a, int32x4_t b, int32_t c) { 3446 return vqdmlsl_high_n_s32(a, b, c); 3447} 3448 3449// CHECK-LABEL: define <2 x float> @test_vmul_n_f32(<2 x float> %a, float %b) #0 { 3450// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %b, i32 0 3451// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %b, i32 1 3452// CHECK: [[MUL_I:%.*]] = fmul <2 x float> %a, [[VECINIT1_I]] 3453// CHECK: ret <2 x float> [[MUL_I]] 3454float32x2_t test_vmul_n_f32(float32x2_t a, float32_t b) { 3455 return vmul_n_f32(a, b); 3456} 3457 3458// CHECK-LABEL: define <4 x float> @test_vmulq_n_f32(<4 x float> %a, float %b) #0 { 3459// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %b, i32 0 3460// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %b, i32 1 3461// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %b, i32 2 3462// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %b, i32 3 3463// CHECK: [[MUL_I:%.*]] = fmul <4 x float> %a, [[VECINIT3_I]] 3464// CHECK: ret <4 x float> [[MUL_I]] 3465float32x4_t test_vmulq_n_f32(float32x4_t a, float32_t b) { 3466 return vmulq_n_f32(a, b); 3467} 3468 3469// CHECK-LABEL: define <2 x double> @test_vmulq_n_f64(<2 x double> %a, double %b) #0 { 3470// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %b, i32 0 3471// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %b, i32 1 3472// CHECK: [[MUL_I:%.*]] = fmul <2 x double> %a, [[VECINIT1_I]] 3473// CHECK: ret <2 x double> [[MUL_I]] 3474float64x2_t test_vmulq_n_f64(float64x2_t a, float64_t b) { 3475 return vmulq_n_f64(a, b); 3476} 3477 3478// CHECK-LABEL: define <2 x float> @test_vfma_n_f32(<2 x float> %a, <2 x float> %b, float %n) #0 { 3479// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %n, i32 0 3480// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %n, i32 1 3481// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 3482// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 3483// CHECK: [[TMP2:%.*]] = bitcast <2 x float> [[VECINIT1_I]] to <8 x i8> 3484// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 3485// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 3486// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> 3487// CHECK: [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x float> [[TMP3]]) #2 3488// CHECK: ret <2 x float> [[TMP6]] 3489float32x2_t test_vfma_n_f32(float32x2_t a, float32x2_t b, float32_t n) { 3490 return vfma_n_f32(a, b, n); 3491} 3492 3493// CHECK-LABEL: define <4 x float> @test_vfmaq_n_f32(<4 x float> %a, <4 x float> %b, float %n) #0 { 3494// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %n, i32 0 3495// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %n, i32 1 3496// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %n, i32 2 3497// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %n, i32 3 3498// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 3499// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 3500// CHECK: [[TMP2:%.*]] = bitcast <4 x float> [[VECINIT3_I]] to <16 x i8> 3501// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 3502// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 3503// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> 3504// CHECK: [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x float> [[TMP3]]) #2 3505// CHECK: ret <4 x float> [[TMP6]] 3506float32x4_t test_vfmaq_n_f32(float32x4_t a, float32x4_t b, float32_t n) { 3507 return vfmaq_n_f32(a, b, n); 3508} 3509 3510// CHECK-LABEL: define <2 x float> @test_vfms_n_f32(<2 x float> %a, <2 x float> %b, float %n) #0 { 3511// CHECK: [[SUB_I:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %b 3512// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %n, i32 0 3513// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %n, i32 1 3514// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 3515// CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SUB_I]] to <8 x i8> 3516// CHECK: [[TMP2:%.*]] = bitcast <2 x float> [[VECINIT1_I]] to <8 x i8> 3517// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 3518// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 3519// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> 3520// CHECK: [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x float> [[TMP3]]) #2 3521// CHECK: ret <2 x float> [[TMP6]] 3522float32x2_t test_vfms_n_f32(float32x2_t a, float32x2_t b, float32_t n) { 3523 return vfms_n_f32(a, b, n); 3524} 3525 3526// CHECK-LABEL: define <4 x float> @test_vfmsq_n_f32(<4 x float> %a, <4 x float> %b, float %n) #0 { 3527// CHECK: [[SUB_I:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b 3528// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %n, i32 0 3529// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %n, i32 1 3530// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %n, i32 2 3531// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %n, i32 3 3532// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 3533// CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SUB_I]] to <16 x i8> 3534// CHECK: [[TMP2:%.*]] = bitcast <4 x float> [[VECINIT3_I]] to <16 x i8> 3535// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 3536// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 3537// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> 3538// CHECK: [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x float> [[TMP3]]) #2 3539// CHECK: ret <4 x float> [[TMP6]] 3540float32x4_t test_vfmsq_n_f32(float32x4_t a, float32x4_t b, float32_t n) { 3541 return vfmsq_n_f32(a, b, n); 3542} 3543 3544// CHECK-LABEL: define <4 x i16> @test_vmul_n_s16(<4 x i16> %a, i16 %b) #0 { 3545// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0 3546// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1 3547// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 3548// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3 3549// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %a, [[VECINIT3_I]] 3550// CHECK: ret <4 x i16> [[MUL_I]] 3551int16x4_t test_vmul_n_s16(int16x4_t a, int16_t b) { 3552 return vmul_n_s16(a, b); 3553} 3554 3555// CHECK-LABEL: define <8 x i16> @test_vmulq_n_s16(<8 x i16> %a, i16 %b) #0 { 3556// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0 3557// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1 3558// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2 3559// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3 3560// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4 3561// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5 3562// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6 3563// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7 3564// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %a, [[VECINIT7_I]] 3565// CHECK: ret <8 x i16> [[MUL_I]] 3566int16x8_t test_vmulq_n_s16(int16x8_t a, int16_t b) { 3567 return vmulq_n_s16(a, b); 3568} 3569 3570// CHECK-LABEL: define <2 x i32> @test_vmul_n_s32(<2 x i32> %a, i32 %b) #0 { 3571// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 3572// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 3573// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %a, [[VECINIT1_I]] 3574// CHECK: ret <2 x i32> [[MUL_I]] 3575int32x2_t test_vmul_n_s32(int32x2_t a, int32_t b) { 3576 return vmul_n_s32(a, b); 3577} 3578 3579// CHECK-LABEL: define <4 x i32> @test_vmulq_n_s32(<4 x i32> %a, i32 %b) #0 { 3580// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0 3581// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1 3582// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2 3583// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3 3584// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %a, [[VECINIT3_I]] 3585// CHECK: ret <4 x i32> [[MUL_I]] 3586int32x4_t test_vmulq_n_s32(int32x4_t a, int32_t b) { 3587 return vmulq_n_s32(a, b); 3588} 3589 3590// CHECK-LABEL: define <4 x i16> @test_vmul_n_u16(<4 x i16> %a, i16 %b) #0 { 3591// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0 3592// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1 3593// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 3594// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3 3595// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %a, [[VECINIT3_I]] 3596// CHECK: ret <4 x i16> [[MUL_I]] 3597uint16x4_t test_vmul_n_u16(uint16x4_t a, uint16_t b) { 3598 return vmul_n_u16(a, b); 3599} 3600 3601// CHECK-LABEL: define <8 x i16> @test_vmulq_n_u16(<8 x i16> %a, i16 %b) #0 { 3602// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0 3603// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1 3604// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2 3605// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3 3606// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4 3607// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5 3608// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6 3609// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7 3610// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %a, [[VECINIT7_I]] 3611// CHECK: ret <8 x i16> [[MUL_I]] 3612uint16x8_t test_vmulq_n_u16(uint16x8_t a, uint16_t b) { 3613 return vmulq_n_u16(a, b); 3614} 3615 3616// CHECK-LABEL: define <2 x i32> @test_vmul_n_u32(<2 x i32> %a, i32 %b) #0 { 3617// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 3618// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 3619// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %a, [[VECINIT1_I]] 3620// CHECK: ret <2 x i32> [[MUL_I]] 3621uint32x2_t test_vmul_n_u32(uint32x2_t a, uint32_t b) { 3622 return vmul_n_u32(a, b); 3623} 3624 3625// CHECK-LABEL: define <4 x i32> @test_vmulq_n_u32(<4 x i32> %a, i32 %b) #0 { 3626// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0 3627// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1 3628// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2 3629// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3 3630// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %a, [[VECINIT3_I]] 3631// CHECK: ret <4 x i32> [[MUL_I]] 3632uint32x4_t test_vmulq_n_u32(uint32x4_t a, uint32_t b) { 3633 return vmulq_n_u32(a, b); 3634} 3635 3636// CHECK-LABEL: define <4 x i32> @test_vmull_n_s16(<4 x i16> %a, i16 %b) #0 { 3637// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3638// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0 3639// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1 3640// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 3641// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3 3642// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 3643// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3644// CHECK: [[VMULL4_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3645// CHECK: [[VMULL5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL4_I]]) #2 3646// CHECK: ret <4 x i32> [[VMULL5_I]] 3647int32x4_t test_vmull_n_s16(int16x4_t a, int16_t b) { 3648 return vmull_n_s16(a, b); 3649} 3650 3651// CHECK-LABEL: define <2 x i64> @test_vmull_n_s32(<2 x i32> %a, i32 %b) #0 { 3652// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 3653// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 3654// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 3655// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 3656// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3657// CHECK: [[VMULL2_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3658// CHECK: [[VMULL3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL2_I]]) #2 3659// CHECK: ret <2 x i64> [[VMULL3_I]] 3660int64x2_t test_vmull_n_s32(int32x2_t a, int32_t b) { 3661 return vmull_n_s32(a, b); 3662} 3663 3664// CHECK-LABEL: define <4 x i32> @test_vmull_n_u16(<4 x i16> %a, i16 %b) #0 { 3665// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3666// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0 3667// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1 3668// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 3669// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3 3670// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 3671// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3672// CHECK: [[VMULL4_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3673// CHECK: [[VMULL5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL4_I]]) #2 3674// CHECK: ret <4 x i32> [[VMULL5_I]] 3675uint32x4_t test_vmull_n_u16(uint16x4_t a, uint16_t b) { 3676 return vmull_n_u16(a, b); 3677} 3678 3679// CHECK-LABEL: define <2 x i64> @test_vmull_n_u32(<2 x i32> %a, i32 %b) #0 { 3680// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 3681// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 3682// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 3683// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 3684// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3685// CHECK: [[VMULL2_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3686// CHECK: [[VMULL3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL2_I]]) #2 3687// CHECK: ret <2 x i64> [[VMULL3_I]] 3688uint64x2_t test_vmull_n_u32(uint32x2_t a, uint32_t b) { 3689 return vmull_n_u32(a, b); 3690} 3691 3692// CHECK-LABEL: define <4 x i32> @test_vqdmull_n_s16(<4 x i16> %a, i16 %b) #0 { 3693// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3694// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0 3695// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1 3696// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 3697// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3 3698// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 3699// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3700// CHECK: [[VQDMULL_V4_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3701// CHECK: [[VQDMULL_V5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V4_I]]) #2 3702// CHECK: [[VQDMULL_V6_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V5_I]] to <16 x i8> 3703// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V6_I]] to <4 x i32> 3704// CHECK: ret <4 x i32> [[TMP2]] 3705int32x4_t test_vqdmull_n_s16(int16x4_t a, int16_t b) { 3706 return vqdmull_n_s16(a, b); 3707} 3708 3709// CHECK-LABEL: define <2 x i64> @test_vqdmull_n_s32(<2 x i32> %a, i32 %b) #0 { 3710// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 3711// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 3712// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 3713// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 3714// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3715// CHECK: [[VQDMULL_V2_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3716// CHECK: [[VQDMULL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V2_I]]) #2 3717// CHECK: [[VQDMULL_V4_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V3_I]] to <16 x i8> 3718// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V4_I]] to <2 x i64> 3719// CHECK: ret <2 x i64> [[TMP2]] 3720int64x2_t test_vqdmull_n_s32(int32x2_t a, int32_t b) { 3721 return vqdmull_n_s32(a, b); 3722} 3723 3724// CHECK-LABEL: define <4 x i16> @test_vqdmulh_n_s16(<4 x i16> %a, i16 %b) #0 { 3725// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3726// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0 3727// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1 3728// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 3729// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3 3730// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 3731// CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3732// CHECK: [[VQDMULH_V4_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3733// CHECK: [[VQDMULH_V5_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[VQDMULH_V_I]], <4 x i16> [[VQDMULH_V4_I]]) #2 3734// CHECK: [[VQDMULH_V6_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V5_I]] to <8 x i8> 3735// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V6_I]] to <4 x i16> 3736// CHECK: ret <4 x i16> [[TMP2]] 3737int16x4_t test_vqdmulh_n_s16(int16x4_t a, int16_t b) { 3738 return vqdmulh_n_s16(a, b); 3739} 3740 3741// CHECK-LABEL: define <8 x i16> @test_vqdmulhq_n_s16(<8 x i16> %a, i16 %b) #0 { 3742// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3743// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0 3744// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1 3745// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2 3746// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3 3747// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4 3748// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5 3749// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6 3750// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7 3751// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <16 x i8> 3752// CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 3753// CHECK: [[VQDMULHQ_V8_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 3754// CHECK: [[VQDMULHQ_V9_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> [[VQDMULHQ_V_I]], <8 x i16> [[VQDMULHQ_V8_I]]) #2 3755// CHECK: [[VQDMULHQ_V10_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V9_I]] to <16 x i8> 3756// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V10_I]] to <8 x i16> 3757// CHECK: ret <8 x i16> [[TMP2]] 3758int16x8_t test_vqdmulhq_n_s16(int16x8_t a, int16_t b) { 3759 return vqdmulhq_n_s16(a, b); 3760} 3761 3762// CHECK-LABEL: define <2 x i32> @test_vqdmulh_n_s32(<2 x i32> %a, i32 %b) #0 { 3763// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 3764// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 3765// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 3766// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 3767// CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3768// CHECK: [[VQDMULH_V2_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3769// CHECK: [[VQDMULH_V3_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> [[VQDMULH_V_I]], <2 x i32> [[VQDMULH_V2_I]]) #2 3770// CHECK: [[VQDMULH_V4_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V3_I]] to <8 x i8> 3771// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V4_I]] to <2 x i32> 3772// CHECK: ret <2 x i32> [[TMP2]] 3773int32x2_t test_vqdmulh_n_s32(int32x2_t a, int32_t b) { 3774 return vqdmulh_n_s32(a, b); 3775} 3776 3777// CHECK-LABEL: define <4 x i32> @test_vqdmulhq_n_s32(<4 x i32> %a, i32 %b) #0 { 3778// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 3779// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0 3780// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1 3781// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2 3782// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3 3783// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT3_I]] to <16 x i8> 3784// CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 3785// CHECK: [[VQDMULHQ_V4_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 3786// CHECK: [[VQDMULHQ_V5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> [[VQDMULHQ_V_I]], <4 x i32> [[VQDMULHQ_V4_I]]) #2 3787// CHECK: [[VQDMULHQ_V6_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V5_I]] to <16 x i8> 3788// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V6_I]] to <4 x i32> 3789// CHECK: ret <4 x i32> [[TMP2]] 3790int32x4_t test_vqdmulhq_n_s32(int32x4_t a, int32_t b) { 3791 return vqdmulhq_n_s32(a, b); 3792} 3793 3794// CHECK-LABEL: define <4 x i16> @test_vqrdmulh_n_s16(<4 x i16> %a, i16 %b) #0 { 3795// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3796// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0 3797// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1 3798// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 3799// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3 3800// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 3801// CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3802// CHECK: [[VQRDMULH_V4_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3803// CHECK: [[VQRDMULH_V5_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[VQRDMULH_V_I]], <4 x i16> [[VQRDMULH_V4_I]]) #2 3804// CHECK: [[VQRDMULH_V6_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V5_I]] to <8 x i8> 3805// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V6_I]] to <4 x i16> 3806// CHECK: ret <4 x i16> [[TMP2]] 3807int16x4_t test_vqrdmulh_n_s16(int16x4_t a, int16_t b) { 3808 return vqrdmulh_n_s16(a, b); 3809} 3810 3811// CHECK-LABEL: define <8 x i16> @test_vqrdmulhq_n_s16(<8 x i16> %a, i16 %b) #0 { 3812// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3813// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0 3814// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1 3815// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2 3816// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3 3817// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4 3818// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5 3819// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6 3820// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7 3821// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <16 x i8> 3822// CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 3823// CHECK: [[VQRDMULHQ_V8_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 3824// CHECK: [[VQRDMULHQ_V9_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> [[VQRDMULHQ_V_I]], <8 x i16> [[VQRDMULHQ_V8_I]]) #2 3825// CHECK: [[VQRDMULHQ_V10_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V9_I]] to <16 x i8> 3826// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V10_I]] to <8 x i16> 3827// CHECK: ret <8 x i16> [[TMP2]] 3828int16x8_t test_vqrdmulhq_n_s16(int16x8_t a, int16_t b) { 3829 return vqrdmulhq_n_s16(a, b); 3830} 3831 3832// CHECK-LABEL: define <2 x i32> @test_vqrdmulh_n_s32(<2 x i32> %a, i32 %b) #0 { 3833// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 3834// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 3835// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 3836// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 3837// CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3838// CHECK: [[VQRDMULH_V2_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3839// CHECK: [[VQRDMULH_V3_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> [[VQRDMULH_V_I]], <2 x i32> [[VQRDMULH_V2_I]]) #2 3840// CHECK: [[VQRDMULH_V4_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V3_I]] to <8 x i8> 3841// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V4_I]] to <2 x i32> 3842// CHECK: ret <2 x i32> [[TMP2]] 3843int32x2_t test_vqrdmulh_n_s32(int32x2_t a, int32_t b) { 3844 return vqrdmulh_n_s32(a, b); 3845} 3846 3847// CHECK-LABEL: define <4 x i32> @test_vqrdmulhq_n_s32(<4 x i32> %a, i32 %b) #0 { 3848// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 3849// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0 3850// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1 3851// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2 3852// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3 3853// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT3_I]] to <16 x i8> 3854// CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 3855// CHECK: [[VQRDMULHQ_V4_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 3856// CHECK: [[VQRDMULHQ_V5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> [[VQRDMULHQ_V_I]], <4 x i32> [[VQRDMULHQ_V4_I]]) #2 3857// CHECK: [[VQRDMULHQ_V6_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V5_I]] to <16 x i8> 3858// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V6_I]] to <4 x i32> 3859// CHECK: ret <4 x i32> [[TMP2]] 3860int32x4_t test_vqrdmulhq_n_s32(int32x4_t a, int32_t b) { 3861 return vqrdmulhq_n_s32(a, b); 3862} 3863 3864// CHECK-LABEL: define <4 x i16> @test_vmla_n_s16(<4 x i16> %a, <4 x i16> %b, i16 %c) #0 { 3865// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 3866// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 3867// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 3868// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 3869// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]] 3870// CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[MUL_I]] 3871// CHECK: ret <4 x i16> [[ADD_I]] 3872int16x4_t test_vmla_n_s16(int16x4_t a, int16x4_t b, int16_t c) { 3873 return vmla_n_s16(a, b, c); 3874} 3875 3876// CHECK-LABEL: define <8 x i16> @test_vmlaq_n_s16(<8 x i16> %a, <8 x i16> %b, i16 %c) #0 { 3877// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0 3878// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1 3879// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2 3880// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3 3881// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4 3882// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5 3883// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6 3884// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7 3885// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]] 3886// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[MUL_I]] 3887// CHECK: ret <8 x i16> [[ADD_I]] 3888int16x8_t test_vmlaq_n_s16(int16x8_t a, int16x8_t b, int16_t c) { 3889 return vmlaq_n_s16(a, b, c); 3890} 3891 3892// CHECK-LABEL: define <2 x i32> @test_vmla_n_s32(<2 x i32> %a, <2 x i32> %b, i32 %c) #0 { 3893// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 3894// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 3895// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]] 3896// CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[MUL_I]] 3897// CHECK: ret <2 x i32> [[ADD_I]] 3898int32x2_t test_vmla_n_s32(int32x2_t a, int32x2_t b, int32_t c) { 3899 return vmla_n_s32(a, b, c); 3900} 3901 3902// CHECK-LABEL: define <4 x i32> @test_vmlaq_n_s32(<4 x i32> %a, <4 x i32> %b, i32 %c) #0 { 3903// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0 3904// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1 3905// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2 3906// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3 3907// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]] 3908// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[MUL_I]] 3909// CHECK: ret <4 x i32> [[ADD_I]] 3910int32x4_t test_vmlaq_n_s32(int32x4_t a, int32x4_t b, int32_t c) { 3911 return vmlaq_n_s32(a, b, c); 3912} 3913 3914// CHECK-LABEL: define <4 x i16> @test_vmla_n_u16(<4 x i16> %a, <4 x i16> %b, i16 %c) #0 { 3915// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 3916// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 3917// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 3918// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 3919// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]] 3920// CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[MUL_I]] 3921// CHECK: ret <4 x i16> [[ADD_I]] 3922uint16x4_t test_vmla_n_u16(uint16x4_t a, uint16x4_t b, uint16_t c) { 3923 return vmla_n_u16(a, b, c); 3924} 3925 3926// CHECK-LABEL: define <8 x i16> @test_vmlaq_n_u16(<8 x i16> %a, <8 x i16> %b, i16 %c) #0 { 3927// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0 3928// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1 3929// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2 3930// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3 3931// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4 3932// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5 3933// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6 3934// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7 3935// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]] 3936// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[MUL_I]] 3937// CHECK: ret <8 x i16> [[ADD_I]] 3938uint16x8_t test_vmlaq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c) { 3939 return vmlaq_n_u16(a, b, c); 3940} 3941 3942// CHECK-LABEL: define <2 x i32> @test_vmla_n_u32(<2 x i32> %a, <2 x i32> %b, i32 %c) #0 { 3943// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 3944// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 3945// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]] 3946// CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[MUL_I]] 3947// CHECK: ret <2 x i32> [[ADD_I]] 3948uint32x2_t test_vmla_n_u32(uint32x2_t a, uint32x2_t b, uint32_t c) { 3949 return vmla_n_u32(a, b, c); 3950} 3951 3952// CHECK-LABEL: define <4 x i32> @test_vmlaq_n_u32(<4 x i32> %a, <4 x i32> %b, i32 %c) #0 { 3953// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0 3954// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1 3955// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2 3956// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3 3957// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]] 3958// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[MUL_I]] 3959// CHECK: ret <4 x i32> [[ADD_I]] 3960uint32x4_t test_vmlaq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c) { 3961 return vmlaq_n_u32(a, b, c); 3962} 3963 3964// CHECK-LABEL: define <4 x i32> @test_vmlal_n_s16(<4 x i32> %a, <4 x i16> %b, i16 %c) #0 { 3965// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 3966// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 3967// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 3968// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 3969// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 3970// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 3971// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3972// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3973// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #2 3974// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]] 3975// CHECK: ret <4 x i32> [[ADD_I]] 3976int32x4_t test_vmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) { 3977 return vmlal_n_s16(a, b, c); 3978} 3979 3980// CHECK-LABEL: define <2 x i64> @test_vmlal_n_s32(<2 x i64> %a, <2 x i32> %b, i32 %c) #0 { 3981// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 3982// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 3983// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 3984// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 3985// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3986// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3987// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #2 3988// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]] 3989// CHECK: ret <2 x i64> [[ADD_I]] 3990int64x2_t test_vmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c) { 3991 return vmlal_n_s32(a, b, c); 3992} 3993 3994// CHECK-LABEL: define <4 x i32> @test_vmlal_n_u16(<4 x i32> %a, <4 x i16> %b, i16 %c) #0 { 3995// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 3996// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 3997// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 3998// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 3999// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 4000// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 4001// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 4002// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4003// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #2 4004// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]] 4005// CHECK: ret <4 x i32> [[ADD_I]] 4006uint32x4_t test_vmlal_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c) { 4007 return vmlal_n_u16(a, b, c); 4008} 4009 4010// CHECK-LABEL: define <2 x i64> @test_vmlal_n_u32(<2 x i64> %a, <2 x i32> %b, i32 %c) #0 { 4011// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 4012// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 4013// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 4014// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 4015// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 4016// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 4017// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #2 4018// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]] 4019// CHECK: ret <2 x i64> [[ADD_I]] 4020uint64x2_t test_vmlal_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c) { 4021 return vmlal_n_u32(a, b, c); 4022} 4023 4024// CHECK-LABEL: define <4 x i32> @test_vqdmlal_n_s16(<4 x i32> %a, <4 x i16> %b, i16 %c) #0 { 4025// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 4026// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 4027// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 4028// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 4029// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 4030// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 4031// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 4032// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4033// CHECK: [[VQDMLAL4_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 4034// CHECK: [[VQDMLAL5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL4_I]]) #2 4035// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 4036// CHECK: [[VQDMLAL_V6_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL5_I]]) #2 4037// CHECK: ret <4 x i32> [[VQDMLAL_V6_I]] 4038int32x4_t test_vqdmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) { 4039 return vqdmlal_n_s16(a, b, c); 4040} 4041 4042// CHECK-LABEL: define <2 x i64> @test_vqdmlal_n_s32(<2 x i64> %a, <2 x i32> %b, i32 %c) #0 { 4043// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 4044// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 4045// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 4046// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 4047// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 4048// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 4049// CHECK: [[VQDMLAL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 4050// CHECK: [[VQDMLAL3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL2_I]]) #2 4051// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 4052// CHECK: [[VQDMLAL_V4_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL3_I]]) #2 4053// CHECK: ret <2 x i64> [[VQDMLAL_V4_I]] 4054int64x2_t test_vqdmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c) { 4055 return vqdmlal_n_s32(a, b, c); 4056} 4057 4058// CHECK-LABEL: define <4 x i16> @test_vmls_n_s16(<4 x i16> %a, <4 x i16> %b, i16 %c) #0 { 4059// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 4060// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 4061// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 4062// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 4063// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]] 4064// CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, [[MUL_I]] 4065// CHECK: ret <4 x i16> [[SUB_I]] 4066int16x4_t test_vmls_n_s16(int16x4_t a, int16x4_t b, int16_t c) { 4067 return vmls_n_s16(a, b, c); 4068} 4069 4070// CHECK-LABEL: define <8 x i16> @test_vmlsq_n_s16(<8 x i16> %a, <8 x i16> %b, i16 %c) #0 { 4071// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0 4072// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1 4073// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2 4074// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3 4075// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4 4076// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5 4077// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6 4078// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7 4079// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]] 4080// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[MUL_I]] 4081// CHECK: ret <8 x i16> [[SUB_I]] 4082int16x8_t test_vmlsq_n_s16(int16x8_t a, int16x8_t b, int16_t c) { 4083 return vmlsq_n_s16(a, b, c); 4084} 4085 4086// CHECK-LABEL: define <2 x i32> @test_vmls_n_s32(<2 x i32> %a, <2 x i32> %b, i32 %c) #0 { 4087// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 4088// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 4089// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]] 4090// CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, [[MUL_I]] 4091// CHECK: ret <2 x i32> [[SUB_I]] 4092int32x2_t test_vmls_n_s32(int32x2_t a, int32x2_t b, int32_t c) { 4093 return vmls_n_s32(a, b, c); 4094} 4095 4096// CHECK-LABEL: define <4 x i32> @test_vmlsq_n_s32(<4 x i32> %a, <4 x i32> %b, i32 %c) #0 { 4097// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0 4098// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1 4099// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2 4100// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3 4101// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]] 4102// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[MUL_I]] 4103// CHECK: ret <4 x i32> [[SUB_I]] 4104int32x4_t test_vmlsq_n_s32(int32x4_t a, int32x4_t b, int32_t c) { 4105 return vmlsq_n_s32(a, b, c); 4106} 4107 4108// CHECK-LABEL: define <4 x i16> @test_vmls_n_u16(<4 x i16> %a, <4 x i16> %b, i16 %c) #0 { 4109// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 4110// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 4111// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 4112// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 4113// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]] 4114// CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, [[MUL_I]] 4115// CHECK: ret <4 x i16> [[SUB_I]] 4116uint16x4_t test_vmls_n_u16(uint16x4_t a, uint16x4_t b, uint16_t c) { 4117 return vmls_n_u16(a, b, c); 4118} 4119 4120// CHECK-LABEL: define <8 x i16> @test_vmlsq_n_u16(<8 x i16> %a, <8 x i16> %b, i16 %c) #0 { 4121// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0 4122// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1 4123// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2 4124// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3 4125// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4 4126// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5 4127// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6 4128// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7 4129// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]] 4130// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[MUL_I]] 4131// CHECK: ret <8 x i16> [[SUB_I]] 4132uint16x8_t test_vmlsq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c) { 4133 return vmlsq_n_u16(a, b, c); 4134} 4135 4136// CHECK-LABEL: define <2 x i32> @test_vmls_n_u32(<2 x i32> %a, <2 x i32> %b, i32 %c) #0 { 4137// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 4138// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 4139// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]] 4140// CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, [[MUL_I]] 4141// CHECK: ret <2 x i32> [[SUB_I]] 4142uint32x2_t test_vmls_n_u32(uint32x2_t a, uint32x2_t b, uint32_t c) { 4143 return vmls_n_u32(a, b, c); 4144} 4145 4146// CHECK-LABEL: define <4 x i32> @test_vmlsq_n_u32(<4 x i32> %a, <4 x i32> %b, i32 %c) #0 { 4147// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0 4148// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1 4149// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2 4150// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3 4151// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]] 4152// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[MUL_I]] 4153// CHECK: ret <4 x i32> [[SUB_I]] 4154uint32x4_t test_vmlsq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c) { 4155 return vmlsq_n_u32(a, b, c); 4156} 4157 4158// CHECK-LABEL: define <4 x i32> @test_vmlsl_n_s16(<4 x i32> %a, <4 x i16> %b, i16 %c) #0 { 4159// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 4160// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 4161// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 4162// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 4163// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 4164// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 4165// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 4166// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4167// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #2 4168// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]] 4169// CHECK: ret <4 x i32> [[SUB_I]] 4170int32x4_t test_vmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) { 4171 return vmlsl_n_s16(a, b, c); 4172} 4173 4174// CHECK-LABEL: define <2 x i64> @test_vmlsl_n_s32(<2 x i64> %a, <2 x i32> %b, i32 %c) #0 { 4175// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 4176// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 4177// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 4178// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 4179// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 4180// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 4181// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #2 4182// CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]] 4183// CHECK: ret <2 x i64> [[SUB_I]] 4184int64x2_t test_vmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c) { 4185 return vmlsl_n_s32(a, b, c); 4186} 4187 4188// CHECK-LABEL: define <4 x i32> @test_vmlsl_n_u16(<4 x i32> %a, <4 x i16> %b, i16 %c) #0 { 4189// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 4190// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 4191// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 4192// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 4193// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 4194// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 4195// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 4196// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4197// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #2 4198// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]] 4199// CHECK: ret <4 x i32> [[SUB_I]] 4200uint32x4_t test_vmlsl_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c) { 4201 return vmlsl_n_u16(a, b, c); 4202} 4203 4204// CHECK-LABEL: define <2 x i64> @test_vmlsl_n_u32(<2 x i64> %a, <2 x i32> %b, i32 %c) #0 { 4205// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 4206// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 4207// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 4208// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 4209// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 4210// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 4211// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #2 4212// CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]] 4213// CHECK: ret <2 x i64> [[SUB_I]] 4214uint64x2_t test_vmlsl_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c) { 4215 return vmlsl_n_u32(a, b, c); 4216} 4217 4218// CHECK-LABEL: define <4 x i32> @test_vqdmlsl_n_s16(<4 x i32> %a, <4 x i16> %b, i16 %c) #0 { 4219// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 4220// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 4221// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 4222// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 4223// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 4224// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 4225// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 4226// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4227// CHECK: [[VQDMLAL4_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 4228// CHECK: [[VQDMLAL5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL4_I]]) #2 4229// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 4230// CHECK: [[VQDMLSL_V6_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL5_I]]) #2 4231// CHECK: ret <4 x i32> [[VQDMLSL_V6_I]] 4232int32x4_t test_vqdmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) { 4233 return vqdmlsl_n_s16(a, b, c); 4234} 4235 4236// CHECK-LABEL: define <2 x i64> @test_vqdmlsl_n_s32(<2 x i64> %a, <2 x i32> %b, i32 %c) #0 { 4237// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 4238// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 4239// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 4240// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 4241// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 4242// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 4243// CHECK: [[VQDMLAL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 4244// CHECK: [[VQDMLAL3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL2_I]]) #2 4245// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 4246// CHECK: [[VQDMLSL_V4_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL3_I]]) #2 4247// CHECK: ret <2 x i64> [[VQDMLSL_V4_I]] 4248int64x2_t test_vqdmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c) { 4249 return vqdmlsl_n_s32(a, b, c); 4250} 4251 4252// CHECK-LABEL: define <4 x i16> @test_vmla_lane_u16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) #0 { 4253// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer 4254// CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] 4255// CHECK: [[ADD:%.*]] = add <4 x i16> %a, [[MUL]] 4256// CHECK: ret <4 x i16> [[ADD]] 4257uint16x4_t test_vmla_lane_u16_0(uint16x4_t a, uint16x4_t b, uint16x4_t v) { 4258 return vmla_lane_u16(a, b, v, 0); 4259} 4260 4261// CHECK-LABEL: define <8 x i16> @test_vmlaq_lane_u16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) #0 { 4262// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> zeroinitializer 4263// CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] 4264// CHECK: [[ADD:%.*]] = add <8 x i16> %a, [[MUL]] 4265// CHECK: ret <8 x i16> [[ADD]] 4266uint16x8_t test_vmlaq_lane_u16_0(uint16x8_t a, uint16x8_t b, uint16x4_t v) { 4267 return vmlaq_lane_u16(a, b, v, 0); 4268} 4269 4270// CHECK-LABEL: define <2 x i32> @test_vmla_lane_u32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) #0 { 4271// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer 4272// CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] 4273// CHECK: [[ADD:%.*]] = add <2 x i32> %a, [[MUL]] 4274// CHECK: ret <2 x i32> [[ADD]] 4275uint32x2_t test_vmla_lane_u32_0(uint32x2_t a, uint32x2_t b, uint32x2_t v) { 4276 return vmla_lane_u32(a, b, v, 0); 4277} 4278 4279// CHECK-LABEL: define <4 x i32> @test_vmlaq_lane_u32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) #0 { 4280// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> zeroinitializer 4281// CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] 4282// CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[MUL]] 4283// CHECK: ret <4 x i32> [[ADD]] 4284uint32x4_t test_vmlaq_lane_u32_0(uint32x4_t a, uint32x4_t b, uint32x2_t v) { 4285 return vmlaq_lane_u32(a, b, v, 0); 4286} 4287 4288// CHECK-LABEL: define <4 x i16> @test_vmla_laneq_u16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) #0 { 4289// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer 4290// CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] 4291// CHECK: [[ADD:%.*]] = add <4 x i16> %a, [[MUL]] 4292// CHECK: ret <4 x i16> [[ADD]] 4293uint16x4_t test_vmla_laneq_u16_0(uint16x4_t a, uint16x4_t b, uint16x8_t v) { 4294 return vmla_laneq_u16(a, b, v, 0); 4295} 4296 4297// CHECK-LABEL: define <8 x i16> @test_vmlaq_laneq_u16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) #0 { 4298// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> zeroinitializer 4299// CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] 4300// CHECK: [[ADD:%.*]] = add <8 x i16> %a, [[MUL]] 4301// CHECK: ret <8 x i16> [[ADD]] 4302uint16x8_t test_vmlaq_laneq_u16_0(uint16x8_t a, uint16x8_t b, uint16x8_t v) { 4303 return vmlaq_laneq_u16(a, b, v, 0); 4304} 4305 4306// CHECK-LABEL: define <2 x i32> @test_vmla_laneq_u32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) #0 { 4307// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer 4308// CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] 4309// CHECK: [[ADD:%.*]] = add <2 x i32> %a, [[MUL]] 4310// CHECK: ret <2 x i32> [[ADD]] 4311uint32x2_t test_vmla_laneq_u32_0(uint32x2_t a, uint32x2_t b, uint32x4_t v) { 4312 return vmla_laneq_u32(a, b, v, 0); 4313} 4314 4315// CHECK-LABEL: define <4 x i32> @test_vmlaq_laneq_u32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) #0 { 4316// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> zeroinitializer 4317// CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] 4318// CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[MUL]] 4319// CHECK: ret <4 x i32> [[ADD]] 4320uint32x4_t test_vmlaq_laneq_u32_0(uint32x4_t a, uint32x4_t b, uint32x4_t v) { 4321 return vmlaq_laneq_u32(a, b, v, 0); 4322} 4323 4324// CHECK-LABEL: define <4 x i32> @test_vqdmlal_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) #0 { 4325// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer 4326// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 4327// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 4328// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 4329// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4330// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 4331// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #2 4332// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 4333// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #2 4334// CHECK: ret <4 x i32> [[VQDMLAL_V3_I]] 4335int32x4_t test_vqdmlal_laneq_s16_0(int32x4_t a, int16x4_t b, int16x8_t v) { 4336 return vqdmlal_laneq_s16(a, b, v, 0); 4337} 4338 4339// CHECK-LABEL: define <2 x i64> @test_vqdmlal_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) #0 { 4340// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer 4341// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 4342// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 4343// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 4344// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 4345// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 4346// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #2 4347// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 4348// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #2 4349// CHECK: ret <2 x i64> [[VQDMLAL_V3_I]] 4350int64x2_t test_vqdmlal_laneq_s32_0(int64x2_t a, int32x2_t b, int32x4_t v) { 4351 return vqdmlal_laneq_s32(a, b, v, 0); 4352} 4353 4354// CHECK-LABEL: define <4 x i32> @test_vqdmlal_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) #0 { 4355// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 4356// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer 4357// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 4358// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 4359// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 4360// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4361// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 4362// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #2 4363// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 4364// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #2 4365// CHECK: ret <4 x i32> [[VQDMLAL_V3_I]] 4366int32x4_t test_vqdmlal_high_laneq_s16_0(int32x4_t a, int16x8_t b, int16x8_t v) { 4367 return vqdmlal_high_laneq_s16(a, b, v, 0); 4368} 4369 4370// CHECK-LABEL: define <2 x i64> @test_vqdmlal_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) #0 { 4371// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 4372// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer 4373// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 4374// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 4375// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 4376// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 4377// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 4378// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #2 4379// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 4380// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #2 4381// CHECK: ret <2 x i64> [[VQDMLAL_V3_I]] 4382int64x2_t test_vqdmlal_high_laneq_s32_0(int64x2_t a, int32x4_t b, int32x4_t v) { 4383 return vqdmlal_high_laneq_s32(a, b, v, 0); 4384} 4385 4386// CHECK-LABEL: define <4 x i16> @test_vmls_lane_u16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) #0 { 4387// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer 4388// CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] 4389// CHECK: [[SUB:%.*]] = sub <4 x i16> %a, [[MUL]] 4390// CHECK: ret <4 x i16> [[SUB]] 4391uint16x4_t test_vmls_lane_u16_0(uint16x4_t a, uint16x4_t b, uint16x4_t v) { 4392 return vmls_lane_u16(a, b, v, 0); 4393} 4394 4395// CHECK-LABEL: define <8 x i16> @test_vmlsq_lane_u16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) #0 { 4396// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> zeroinitializer 4397// CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] 4398// CHECK: [[SUB:%.*]] = sub <8 x i16> %a, [[MUL]] 4399// CHECK: ret <8 x i16> [[SUB]] 4400uint16x8_t test_vmlsq_lane_u16_0(uint16x8_t a, uint16x8_t b, uint16x4_t v) { 4401 return vmlsq_lane_u16(a, b, v, 0); 4402} 4403 4404// CHECK-LABEL: define <2 x i32> @test_vmls_lane_u32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) #0 { 4405// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer 4406// CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] 4407// CHECK: [[SUB:%.*]] = sub <2 x i32> %a, [[MUL]] 4408// CHECK: ret <2 x i32> [[SUB]] 4409uint32x2_t test_vmls_lane_u32_0(uint32x2_t a, uint32x2_t b, uint32x2_t v) { 4410 return vmls_lane_u32(a, b, v, 0); 4411} 4412 4413// CHECK-LABEL: define <4 x i32> @test_vmlsq_lane_u32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) #0 { 4414// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> zeroinitializer 4415// CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] 4416// CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[MUL]] 4417// CHECK: ret <4 x i32> [[SUB]] 4418uint32x4_t test_vmlsq_lane_u32_0(uint32x4_t a, uint32x4_t b, uint32x2_t v) { 4419 return vmlsq_lane_u32(a, b, v, 0); 4420} 4421 4422// CHECK-LABEL: define <4 x i16> @test_vmls_laneq_u16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) #0 { 4423// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer 4424// CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] 4425// CHECK: [[SUB:%.*]] = sub <4 x i16> %a, [[MUL]] 4426// CHECK: ret <4 x i16> [[SUB]] 4427uint16x4_t test_vmls_laneq_u16_0(uint16x4_t a, uint16x4_t b, uint16x8_t v) { 4428 return vmls_laneq_u16(a, b, v, 0); 4429} 4430 4431// CHECK-LABEL: define <8 x i16> @test_vmlsq_laneq_u16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) #0 { 4432// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> zeroinitializer 4433// CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] 4434// CHECK: [[SUB:%.*]] = sub <8 x i16> %a, [[MUL]] 4435// CHECK: ret <8 x i16> [[SUB]] 4436uint16x8_t test_vmlsq_laneq_u16_0(uint16x8_t a, uint16x8_t b, uint16x8_t v) { 4437 return vmlsq_laneq_u16(a, b, v, 0); 4438} 4439 4440// CHECK-LABEL: define <2 x i32> @test_vmls_laneq_u32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) #0 { 4441// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer 4442// CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] 4443// CHECK: [[SUB:%.*]] = sub <2 x i32> %a, [[MUL]] 4444// CHECK: ret <2 x i32> [[SUB]] 4445uint32x2_t test_vmls_laneq_u32_0(uint32x2_t a, uint32x2_t b, uint32x4_t v) { 4446 return vmls_laneq_u32(a, b, v, 0); 4447} 4448 4449// CHECK-LABEL: define <4 x i32> @test_vmlsq_laneq_u32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) #0 { 4450// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> zeroinitializer 4451// CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] 4452// CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[MUL]] 4453// CHECK: ret <4 x i32> [[SUB]] 4454uint32x4_t test_vmlsq_laneq_u32_0(uint32x4_t a, uint32x4_t b, uint32x4_t v) { 4455 return vmlsq_laneq_u32(a, b, v, 0); 4456} 4457 4458// CHECK-LABEL: define <4 x i32> @test_vqdmlsl_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) #0 { 4459// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer 4460// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 4461// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 4462// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 4463// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4464// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 4465// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #2 4466// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 4467// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #2 4468// CHECK: ret <4 x i32> [[VQDMLSL_V3_I]] 4469int32x4_t test_vqdmlsl_laneq_s16_0(int32x4_t a, int16x4_t b, int16x8_t v) { 4470 return vqdmlsl_laneq_s16(a, b, v, 0); 4471} 4472 4473// CHECK-LABEL: define <2 x i64> @test_vqdmlsl_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) #0 { 4474// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer 4475// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 4476// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 4477// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 4478// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 4479// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 4480// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #2 4481// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 4482// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #2 4483// CHECK: ret <2 x i64> [[VQDMLSL_V3_I]] 4484int64x2_t test_vqdmlsl_laneq_s32_0(int64x2_t a, int32x2_t b, int32x4_t v) { 4485 return vqdmlsl_laneq_s32(a, b, v, 0); 4486} 4487 4488// CHECK-LABEL: define <4 x i32> @test_vqdmlsl_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) #0 { 4489// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 4490// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer 4491// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 4492// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 4493// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 4494// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4495// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 4496// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #2 4497// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 4498// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #2 4499// CHECK: ret <4 x i32> [[VQDMLSL_V3_I]] 4500int32x4_t test_vqdmlsl_high_laneq_s16_0(int32x4_t a, int16x8_t b, int16x8_t v) { 4501 return vqdmlsl_high_laneq_s16(a, b, v, 0); 4502} 4503 4504// CHECK-LABEL: define <2 x i64> @test_vqdmlsl_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) #0 { 4505// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 4506// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer 4507// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 4508// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 4509// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 4510// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 4511// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 4512// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #2 4513// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 4514// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #2 4515// CHECK: ret <2 x i64> [[VQDMLSL_V3_I]] 4516int64x2_t test_vqdmlsl_high_laneq_s32_0(int64x2_t a, int32x4_t b, int32x4_t v) { 4517 return vqdmlsl_high_laneq_s32(a, b, v, 0); 4518} 4519 4520// CHECK-LABEL: define <4 x i16> @test_vqdmulh_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) #0 { 4521// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer 4522// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 4523// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 4524// CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 4525// CHECK: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4526// CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[VQDMULH_V_I]], <4 x i16> [[VQDMULH_V1_I]]) #2 4527// CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8> 4528// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <4 x i16> 4529// CHECK: ret <4 x i16> [[TMP2]] 4530int16x4_t test_vqdmulh_laneq_s16_0(int16x4_t a, int16x8_t v) { 4531 return vqdmulh_laneq_s16(a, v, 0); 4532} 4533 4534// CHECK-LABEL: define <8 x i16> @test_vqdmulhq_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) #0 { 4535// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> zeroinitializer 4536// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 4537// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8> 4538// CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 4539// CHECK: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 4540// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> [[VQDMULHQ_V_I]], <8 x i16> [[VQDMULHQ_V1_I]]) #2 4541// CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8> 4542// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <8 x i16> 4543// CHECK: ret <8 x i16> [[TMP2]] 4544int16x8_t test_vqdmulhq_laneq_s16_0(int16x8_t a, int16x8_t v) { 4545 return vqdmulhq_laneq_s16(a, v, 0); 4546} 4547 4548// CHECK-LABEL: define <2 x i32> @test_vqdmulh_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) #0 { 4549// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer 4550// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 4551// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 4552// CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 4553// CHECK: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 4554// CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> [[VQDMULH_V_I]], <2 x i32> [[VQDMULH_V1_I]]) #2 4555// CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8> 4556// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <2 x i32> 4557// CHECK: ret <2 x i32> [[TMP2]] 4558int32x2_t test_vqdmulh_laneq_s32_0(int32x2_t a, int32x4_t v) { 4559 return vqdmulh_laneq_s32(a, v, 0); 4560} 4561 4562// CHECK-LABEL: define <4 x i32> @test_vqdmulhq_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) #0 { 4563// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> zeroinitializer 4564// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 4565// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8> 4566// CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 4567// CHECK: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 4568// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> [[VQDMULHQ_V_I]], <4 x i32> [[VQDMULHQ_V1_I]]) #2 4569// CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8> 4570// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <4 x i32> 4571// CHECK: ret <4 x i32> [[TMP2]] 4572int32x4_t test_vqdmulhq_laneq_s32_0(int32x4_t a, int32x4_t v) { 4573 return vqdmulhq_laneq_s32(a, v, 0); 4574} 4575 4576// CHECK-LABEL: define <4 x i16> @test_vqrdmulh_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) #0 { 4577// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer 4578// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 4579// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 4580// CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 4581// CHECK: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4582// CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[VQRDMULH_V_I]], <4 x i16> [[VQRDMULH_V1_I]]) #2 4583// CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8> 4584// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <4 x i16> 4585// CHECK: ret <4 x i16> [[TMP2]] 4586int16x4_t test_vqrdmulh_laneq_s16_0(int16x4_t a, int16x8_t v) { 4587 return vqrdmulh_laneq_s16(a, v, 0); 4588} 4589 4590// CHECK-LABEL: define <8 x i16> @test_vqrdmulhq_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) #0 { 4591// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> zeroinitializer 4592// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 4593// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8> 4594// CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 4595// CHECK: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 4596// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> [[VQRDMULHQ_V_I]], <8 x i16> [[VQRDMULHQ_V1_I]]) #2 4597// CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8> 4598// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <8 x i16> 4599// CHECK: ret <8 x i16> [[TMP2]] 4600int16x8_t test_vqrdmulhq_laneq_s16_0(int16x8_t a, int16x8_t v) { 4601 return vqrdmulhq_laneq_s16(a, v, 0); 4602} 4603 4604// CHECK-LABEL: define <2 x i32> @test_vqrdmulh_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) #0 { 4605// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer 4606// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 4607// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 4608// CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 4609// CHECK: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 4610// CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> [[VQRDMULH_V_I]], <2 x i32> [[VQRDMULH_V1_I]]) #2 4611// CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8> 4612// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <2 x i32> 4613// CHECK: ret <2 x i32> [[TMP2]] 4614int32x2_t test_vqrdmulh_laneq_s32_0(int32x2_t a, int32x4_t v) { 4615 return vqrdmulh_laneq_s32(a, v, 0); 4616} 4617 4618// CHECK-LABEL: define <4 x i32> @test_vqrdmulhq_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) #0 { 4619// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> zeroinitializer 4620// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 4621// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8> 4622// CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 4623// CHECK: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 4624// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> [[VQRDMULHQ_V_I]], <4 x i32> [[VQRDMULHQ_V1_I]]) #2 4625// CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8> 4626// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <4 x i32> 4627// CHECK: ret <4 x i32> [[TMP2]] 4628int32x4_t test_vqrdmulhq_laneq_s32_0(int32x4_t a, int32x4_t v) { 4629 return vqrdmulhq_laneq_s32(a, v, 0); 4630} 4631 4632// CHECK-LABEL: define <4 x i16> @test_vmla_lane_u16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) #0 { 4633// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 4634// CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] 4635// CHECK: [[ADD:%.*]] = add <4 x i16> %a, [[MUL]] 4636// CHECK: ret <4 x i16> [[ADD]] 4637uint16x4_t test_vmla_lane_u16(uint16x4_t a, uint16x4_t b, uint16x4_t v) { 4638 return vmla_lane_u16(a, b, v, 3); 4639} 4640 4641// CHECK-LABEL: define <8 x i16> @test_vmlaq_lane_u16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) #0 { 4642// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 4643// CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] 4644// CHECK: [[ADD:%.*]] = add <8 x i16> %a, [[MUL]] 4645// CHECK: ret <8 x i16> [[ADD]] 4646uint16x8_t test_vmlaq_lane_u16(uint16x8_t a, uint16x8_t b, uint16x4_t v) { 4647 return vmlaq_lane_u16(a, b, v, 3); 4648} 4649 4650// CHECK-LABEL: define <2 x i32> @test_vmla_lane_u32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) #0 { 4651// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> 4652// CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] 4653// CHECK: [[ADD:%.*]] = add <2 x i32> %a, [[MUL]] 4654// CHECK: ret <2 x i32> [[ADD]] 4655uint32x2_t test_vmla_lane_u32(uint32x2_t a, uint32x2_t b, uint32x2_t v) { 4656 return vmla_lane_u32(a, b, v, 1); 4657} 4658 4659// CHECK-LABEL: define <4 x i32> @test_vmlaq_lane_u32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) #0 { 4660// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 4661// CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] 4662// CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[MUL]] 4663// CHECK: ret <4 x i32> [[ADD]] 4664uint32x4_t test_vmlaq_lane_u32(uint32x4_t a, uint32x4_t b, uint32x2_t v) { 4665 return vmlaq_lane_u32(a, b, v, 1); 4666} 4667 4668// CHECK-LABEL: define <4 x i16> @test_vmla_laneq_u16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) #0 { 4669// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 4670// CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] 4671// CHECK: [[ADD:%.*]] = add <4 x i16> %a, [[MUL]] 4672// CHECK: ret <4 x i16> [[ADD]] 4673uint16x4_t test_vmla_laneq_u16(uint16x4_t a, uint16x4_t b, uint16x8_t v) { 4674 return vmla_laneq_u16(a, b, v, 7); 4675} 4676 4677// CHECK-LABEL: define <8 x i16> @test_vmlaq_laneq_u16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) #0 { 4678// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 4679// CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] 4680// CHECK: [[ADD:%.*]] = add <8 x i16> %a, [[MUL]] 4681// CHECK: ret <8 x i16> [[ADD]] 4682uint16x8_t test_vmlaq_laneq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t v) { 4683 return vmlaq_laneq_u16(a, b, v, 7); 4684} 4685 4686// CHECK-LABEL: define <2 x i32> @test_vmla_laneq_u32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) #0 { 4687// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> 4688// CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] 4689// CHECK: [[ADD:%.*]] = add <2 x i32> %a, [[MUL]] 4690// CHECK: ret <2 x i32> [[ADD]] 4691uint32x2_t test_vmla_laneq_u32(uint32x2_t a, uint32x2_t b, uint32x4_t v) { 4692 return vmla_laneq_u32(a, b, v, 3); 4693} 4694 4695// CHECK-LABEL: define <4 x i32> @test_vmlaq_laneq_u32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) #0 { 4696// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 4697// CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] 4698// CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[MUL]] 4699// CHECK: ret <4 x i32> [[ADD]] 4700uint32x4_t test_vmlaq_laneq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t v) { 4701 return vmlaq_laneq_u32(a, b, v, 3); 4702} 4703 4704// CHECK-LABEL: define <4 x i32> @test_vqdmlal_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) #0 { 4705// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 4706// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 4707// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 4708// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 4709// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4710// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 4711// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #2 4712// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 4713// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #2 4714// CHECK: ret <4 x i32> [[VQDMLAL_V3_I]] 4715int32x4_t test_vqdmlal_laneq_s16(int32x4_t a, int16x4_t b, int16x8_t v) { 4716 return vqdmlal_laneq_s16(a, b, v, 7); 4717} 4718 4719// CHECK-LABEL: define <2 x i64> @test_vqdmlal_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) #0 { 4720// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> 4721// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 4722// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 4723// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 4724// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 4725// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 4726// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #2 4727// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 4728// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #2 4729// CHECK: ret <2 x i64> [[VQDMLAL_V3_I]] 4730int64x2_t test_vqdmlal_laneq_s32(int64x2_t a, int32x2_t b, int32x4_t v) { 4731 return vqdmlal_laneq_s32(a, b, v, 3); 4732} 4733 4734// CHECK-LABEL: define <4 x i32> @test_vqdmlal_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) #0 { 4735// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 4736// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 4737// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 4738// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 4739// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 4740// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4741// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 4742// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #2 4743// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 4744// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #2 4745// CHECK: ret <4 x i32> [[VQDMLAL_V3_I]] 4746int32x4_t test_vqdmlal_high_laneq_s16(int32x4_t a, int16x8_t b, int16x8_t v) { 4747 return vqdmlal_high_laneq_s16(a, b, v, 7); 4748} 4749 4750// CHECK-LABEL: define <2 x i64> @test_vqdmlal_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) #0 { 4751// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 4752// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> 4753// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 4754// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 4755// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 4756// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 4757// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 4758// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #2 4759// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 4760// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #2 4761// CHECK: ret <2 x i64> [[VQDMLAL_V3_I]] 4762int64x2_t test_vqdmlal_high_laneq_s32(int64x2_t a, int32x4_t b, int32x4_t v) { 4763 return vqdmlal_high_laneq_s32(a, b, v, 3); 4764} 4765 4766// CHECK-LABEL: define <4 x i16> @test_vmls_lane_u16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) #0 { 4767// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 4768// CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] 4769// CHECK: [[SUB:%.*]] = sub <4 x i16> %a, [[MUL]] 4770// CHECK: ret <4 x i16> [[SUB]] 4771uint16x4_t test_vmls_lane_u16(uint16x4_t a, uint16x4_t b, uint16x4_t v) { 4772 return vmls_lane_u16(a, b, v, 3); 4773} 4774 4775// CHECK-LABEL: define <8 x i16> @test_vmlsq_lane_u16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) #0 { 4776// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 4777// CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] 4778// CHECK: [[SUB:%.*]] = sub <8 x i16> %a, [[MUL]] 4779// CHECK: ret <8 x i16> [[SUB]] 4780uint16x8_t test_vmlsq_lane_u16(uint16x8_t a, uint16x8_t b, uint16x4_t v) { 4781 return vmlsq_lane_u16(a, b, v, 3); 4782} 4783 4784// CHECK-LABEL: define <2 x i32> @test_vmls_lane_u32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) #0 { 4785// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> 4786// CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] 4787// CHECK: [[SUB:%.*]] = sub <2 x i32> %a, [[MUL]] 4788// CHECK: ret <2 x i32> [[SUB]] 4789uint32x2_t test_vmls_lane_u32(uint32x2_t a, uint32x2_t b, uint32x2_t v) { 4790 return vmls_lane_u32(a, b, v, 1); 4791} 4792 4793// CHECK-LABEL: define <4 x i32> @test_vmlsq_lane_u32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) #0 { 4794// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 4795// CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] 4796// CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[MUL]] 4797// CHECK: ret <4 x i32> [[SUB]] 4798uint32x4_t test_vmlsq_lane_u32(uint32x4_t a, uint32x4_t b, uint32x2_t v) { 4799 return vmlsq_lane_u32(a, b, v, 1); 4800} 4801 4802// CHECK-LABEL: define <4 x i16> @test_vmls_laneq_u16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) #0 { 4803// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 4804// CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] 4805// CHECK: [[SUB:%.*]] = sub <4 x i16> %a, [[MUL]] 4806// CHECK: ret <4 x i16> [[SUB]] 4807uint16x4_t test_vmls_laneq_u16(uint16x4_t a, uint16x4_t b, uint16x8_t v) { 4808 return vmls_laneq_u16(a, b, v, 7); 4809} 4810 4811// CHECK-LABEL: define <8 x i16> @test_vmlsq_laneq_u16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) #0 { 4812// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 4813// CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] 4814// CHECK: [[SUB:%.*]] = sub <8 x i16> %a, [[MUL]] 4815// CHECK: ret <8 x i16> [[SUB]] 4816uint16x8_t test_vmlsq_laneq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t v) { 4817 return vmlsq_laneq_u16(a, b, v, 7); 4818} 4819 4820// CHECK-LABEL: define <2 x i32> @test_vmls_laneq_u32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) #0 { 4821// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> 4822// CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] 4823// CHECK: [[SUB:%.*]] = sub <2 x i32> %a, [[MUL]] 4824// CHECK: ret <2 x i32> [[SUB]] 4825uint32x2_t test_vmls_laneq_u32(uint32x2_t a, uint32x2_t b, uint32x4_t v) { 4826 return vmls_laneq_u32(a, b, v, 3); 4827} 4828 4829// CHECK-LABEL: define <4 x i32> @test_vmlsq_laneq_u32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) #0 { 4830// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 4831// CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] 4832// CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[MUL]] 4833// CHECK: ret <4 x i32> [[SUB]] 4834uint32x4_t test_vmlsq_laneq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t v) { 4835 return vmlsq_laneq_u32(a, b, v, 3); 4836} 4837 4838// CHECK-LABEL: define <4 x i32> @test_vqdmlsl_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) #0 { 4839// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 4840// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 4841// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 4842// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 4843// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4844// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 4845// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #2 4846// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 4847// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #2 4848// CHECK: ret <4 x i32> [[VQDMLSL_V3_I]] 4849int32x4_t test_vqdmlsl_laneq_s16(int32x4_t a, int16x4_t b, int16x8_t v) { 4850 return vqdmlsl_laneq_s16(a, b, v, 7); 4851} 4852 4853// CHECK-LABEL: define <2 x i64> @test_vqdmlsl_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) #0 { 4854// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> 4855// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 4856// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 4857// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 4858// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 4859// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 4860// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #2 4861// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 4862// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #2 4863// CHECK: ret <2 x i64> [[VQDMLSL_V3_I]] 4864int64x2_t test_vqdmlsl_laneq_s32(int64x2_t a, int32x2_t b, int32x4_t v) { 4865 return vqdmlsl_laneq_s32(a, b, v, 3); 4866} 4867 4868// CHECK-LABEL: define <4 x i32> @test_vqdmlsl_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) #0 { 4869// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 4870// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 4871// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 4872// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 4873// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 4874// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4875// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 4876// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #2 4877// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 4878// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #2 4879// CHECK: ret <4 x i32> [[VQDMLSL_V3_I]] 4880int32x4_t test_vqdmlsl_high_laneq_s16(int32x4_t a, int16x8_t b, int16x8_t v) { 4881 return vqdmlsl_high_laneq_s16(a, b, v, 7); 4882} 4883 4884// CHECK-LABEL: define <2 x i64> @test_vqdmlsl_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) #0 { 4885// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 4886// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> 4887// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 4888// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 4889// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 4890// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 4891// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 4892// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #2 4893// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 4894// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #2 4895// CHECK: ret <2 x i64> [[VQDMLSL_V3_I]] 4896int64x2_t test_vqdmlsl_high_laneq_s32(int64x2_t a, int32x4_t b, int32x4_t v) { 4897 return vqdmlsl_high_laneq_s32(a, b, v, 3); 4898} 4899 4900// CHECK-LABEL: define <4 x i16> @test_vqdmulh_laneq_s16(<4 x i16> %a, <8 x i16> %v) #0 { 4901// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 4902// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 4903// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 4904// CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 4905// CHECK: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4906// CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[VQDMULH_V_I]], <4 x i16> [[VQDMULH_V1_I]]) #2 4907// CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8> 4908// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <4 x i16> 4909// CHECK: ret <4 x i16> [[TMP2]] 4910int16x4_t test_vqdmulh_laneq_s16(int16x4_t a, int16x8_t v) { 4911 return vqdmulh_laneq_s16(a, v, 7); 4912} 4913 4914// CHECK-LABEL: define <8 x i16> @test_vqdmulhq_laneq_s16(<8 x i16> %a, <8 x i16> %v) #0 { 4915// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 4916// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 4917// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8> 4918// CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 4919// CHECK: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 4920// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> [[VQDMULHQ_V_I]], <8 x i16> [[VQDMULHQ_V1_I]]) #2 4921// CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8> 4922// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <8 x i16> 4923// CHECK: ret <8 x i16> [[TMP2]] 4924int16x8_t test_vqdmulhq_laneq_s16(int16x8_t a, int16x8_t v) { 4925 return vqdmulhq_laneq_s16(a, v, 7); 4926} 4927 4928// CHECK-LABEL: define <2 x i32> @test_vqdmulh_laneq_s32(<2 x i32> %a, <4 x i32> %v) #0 { 4929// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> 4930// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 4931// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 4932// CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 4933// CHECK: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 4934// CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> [[VQDMULH_V_I]], <2 x i32> [[VQDMULH_V1_I]]) #2 4935// CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8> 4936// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <2 x i32> 4937// CHECK: ret <2 x i32> [[TMP2]] 4938int32x2_t test_vqdmulh_laneq_s32(int32x2_t a, int32x4_t v) { 4939 return vqdmulh_laneq_s32(a, v, 3); 4940} 4941 4942// CHECK-LABEL: define <4 x i32> @test_vqdmulhq_laneq_s32(<4 x i32> %a, <4 x i32> %v) #0 { 4943// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 4944// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 4945// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8> 4946// CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 4947// CHECK: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 4948// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> [[VQDMULHQ_V_I]], <4 x i32> [[VQDMULHQ_V1_I]]) #2 4949// CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8> 4950// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <4 x i32> 4951// CHECK: ret <4 x i32> [[TMP2]] 4952int32x4_t test_vqdmulhq_laneq_s32(int32x4_t a, int32x4_t v) { 4953 return vqdmulhq_laneq_s32(a, v, 3); 4954} 4955 4956// CHECK-LABEL: define <4 x i16> @test_vqrdmulh_laneq_s16(<4 x i16> %a, <8 x i16> %v) #0 { 4957// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 4958// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 4959// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 4960// CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 4961// CHECK: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4962// CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[VQRDMULH_V_I]], <4 x i16> [[VQRDMULH_V1_I]]) #2 4963// CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8> 4964// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <4 x i16> 4965// CHECK: ret <4 x i16> [[TMP2]] 4966int16x4_t test_vqrdmulh_laneq_s16(int16x4_t a, int16x8_t v) { 4967 return vqrdmulh_laneq_s16(a, v, 7); 4968} 4969 4970// CHECK-LABEL: define <8 x i16> @test_vqrdmulhq_laneq_s16(<8 x i16> %a, <8 x i16> %v) #0 { 4971// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 4972// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 4973// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8> 4974// CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 4975// CHECK: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 4976// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> [[VQRDMULHQ_V_I]], <8 x i16> [[VQRDMULHQ_V1_I]]) #2 4977// CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8> 4978// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <8 x i16> 4979// CHECK: ret <8 x i16> [[TMP2]] 4980int16x8_t test_vqrdmulhq_laneq_s16(int16x8_t a, int16x8_t v) { 4981 return vqrdmulhq_laneq_s16(a, v, 7); 4982} 4983 4984// CHECK-LABEL: define <2 x i32> @test_vqrdmulh_laneq_s32(<2 x i32> %a, <4 x i32> %v) #0 { 4985// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> 4986// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 4987// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 4988// CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 4989// CHECK: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 4990// CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> [[VQRDMULH_V_I]], <2 x i32> [[VQRDMULH_V1_I]]) #2 4991// CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8> 4992// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <2 x i32> 4993// CHECK: ret <2 x i32> [[TMP2]] 4994int32x2_t test_vqrdmulh_laneq_s32(int32x2_t a, int32x4_t v) { 4995 return vqrdmulh_laneq_s32(a, v, 3); 4996} 4997 4998// CHECK-LABEL: define <4 x i32> @test_vqrdmulhq_laneq_s32(<4 x i32> %a, <4 x i32> %v) #0 { 4999// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 5000// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 5001// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8> 5002// CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5003// CHECK: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 5004// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> [[VQRDMULHQ_V_I]], <4 x i32> [[VQRDMULHQ_V1_I]]) #2 5005// CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8> 5006// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <4 x i32> 5007// CHECK: ret <4 x i32> [[TMP2]] 5008int32x4_t test_vqrdmulhq_laneq_s32(int32x4_t a, int32x4_t v) { 5009 return vqrdmulhq_laneq_s32(a, v, 3); 5010} 5011 5012