1// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s 2 3// Test new aarch64 intrinsics and types 4 5#include <arm_neon.h> 6 7// CHECK-LABEL: define <2 x float> @test_vmla_n_f32(<2 x float> %a, <2 x float> %b, float %c) #0 { 8// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %c, i32 0 9// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %c, i32 1 10// CHECK: [[MUL_I:%.*]] = fmul <2 x float> %b, [[VECINIT1_I]] 11// CHECK: [[ADD_I:%.*]] = fadd <2 x float> %a, [[MUL_I]] 12// CHECK: ret <2 x float> [[ADD_I]] 13float32x2_t test_vmla_n_f32(float32x2_t a, float32x2_t b, float32_t c) { 14 return vmla_n_f32(a, b, c); 15} 16 17// CHECK-LABEL: define <4 x float> @test_vmlaq_n_f32(<4 x float> %a, <4 x float> %b, float %c) #0 { 18// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %c, i32 0 19// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %c, i32 1 20// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %c, i32 2 21// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %c, i32 3 22// CHECK: [[MUL_I:%.*]] = fmul <4 x float> %b, [[VECINIT3_I]] 23// CHECK: [[ADD_I:%.*]] = fadd <4 x float> %a, [[MUL_I]] 24// CHECK: ret <4 x float> [[ADD_I]] 25float32x4_t test_vmlaq_n_f32(float32x4_t a, float32x4_t b, float32_t c) { 26 return vmlaq_n_f32(a, b, c); 27} 28 29// CHECK-LABEL: define <2 x double> @test_vmlaq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #0 { 30// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, i32 0 31// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %c, i32 1 32// CHECK: [[MUL_I:%.*]] = fmul <2 x double> %b, [[VECINIT1_I]] 33// CHECK: [[ADD_I:%.*]] = fadd <2 x double> %a, [[MUL_I]] 34// CHECK: ret <2 x double> [[ADD_I]] 35float64x2_t test_vmlaq_n_f64(float64x2_t a, float64x2_t b, float64_t c) { 36 return vmlaq_n_f64(a, b, c); 37} 38 39// CHECK-LABEL: define <4 x float> @test_vmlsq_n_f32(<4 x float> %a, <4 x float> %b, float %c) #0 { 40// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %c, i32 0 41// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %c, i32 1 42// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %c, i32 2 43// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %c, i32 3 44// CHECK: [[MUL_I:%.*]] = fmul <4 x float> %b, [[VECINIT3_I]] 45// CHECK: [[SUB_I:%.*]] = fsub <4 x float> %a, [[MUL_I]] 46// CHECK: ret <4 x float> [[SUB_I]] 47float32x4_t test_vmlsq_n_f32(float32x4_t a, float32x4_t b, float32_t c) { 48 return vmlsq_n_f32(a, b, c); 49} 50 51// CHECK-LABEL: define <2 x float> @test_vmls_n_f32(<2 x float> %a, <2 x float> %b, float %c) #0 { 52// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %c, i32 0 53// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %c, i32 1 54// CHECK: [[MUL_I:%.*]] = fmul <2 x float> %b, [[VECINIT1_I]] 55// CHECK: [[SUB_I:%.*]] = fsub <2 x float> %a, [[MUL_I]] 56// CHECK: ret <2 x float> [[SUB_I]] 57float32x2_t test_vmls_n_f32(float32x2_t a, float32x2_t b, float32_t c) { 58 return vmls_n_f32(a, b, c); 59} 60 61// CHECK-LABEL: define <2 x double> @test_vmlsq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #0 { 62// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, i32 0 63// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %c, i32 1 64// CHECK: [[MUL_I:%.*]] = fmul <2 x double> %b, [[VECINIT1_I]] 65// CHECK: [[SUB_I:%.*]] = fsub <2 x double> %a, [[MUL_I]] 66// CHECK: ret <2 x double> [[SUB_I]] 67float64x2_t test_vmlsq_n_f64(float64x2_t a, float64x2_t b, float64_t c) { 68 return vmlsq_n_f64(a, b, c); 69} 70 71// CHECK-LABEL: define <2 x float> @test_vmla_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) #0 { 72// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <2 x i32> zeroinitializer 73// CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]] 74// CHECK: [[ADD:%.*]] = fadd <2 x float> %a, [[MUL]] 75// CHECK: ret <2 x float> [[ADD]] 76float32x2_t test_vmla_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) { 77 return vmla_lane_f32(a, b, v, 0); 78} 79 80// CHECK-LABEL: define <4 x float> @test_vmlaq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 { 81// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> zeroinitializer 82// CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]] 83// CHECK: [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]] 84// CHECK: ret <4 x float> [[ADD]] 85float32x4_t test_vmlaq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) { 86 return vmlaq_lane_f32(a, b, v, 0); 87} 88 89// CHECK-LABEL: define <2 x float> @test_vmla_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 { 90// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> zeroinitializer 91// CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]] 92// CHECK: [[ADD:%.*]] = fadd <2 x float> %a, [[MUL]] 93// CHECK: ret <2 x float> [[ADD]] 94float32x2_t test_vmla_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) { 95 return vmla_laneq_f32(a, b, v, 0); 96} 97 98// CHECK-LABEL: define <4 x float> @test_vmlaq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 { 99// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> zeroinitializer 100// CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]] 101// CHECK: [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]] 102// CHECK: ret <4 x float> [[ADD]] 103float32x4_t test_vmlaq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v) { 104 return vmlaq_laneq_f32(a, b, v, 0); 105} 106 107// CHECK-LABEL: define <2 x float> @test_vmls_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) #0 { 108// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <2 x i32> zeroinitializer 109// CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]] 110// CHECK: [[SUB:%.*]] = fsub <2 x float> %a, [[MUL]] 111// CHECK: ret <2 x float> [[SUB]] 112float32x2_t test_vmls_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) { 113 return vmls_lane_f32(a, b, v, 0); 114} 115 116// CHECK-LABEL: define <4 x float> @test_vmlsq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 { 117// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> zeroinitializer 118// CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]] 119// CHECK: [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]] 120// CHECK: ret <4 x float> [[SUB]] 121float32x4_t test_vmlsq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) { 122 return vmlsq_lane_f32(a, b, v, 0); 123} 124 125// CHECK-LABEL: define <2 x float> @test_vmls_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 { 126// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> zeroinitializer 127// CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]] 128// CHECK: [[SUB:%.*]] = fsub <2 x float> %a, [[MUL]] 129// CHECK: ret <2 x float> [[SUB]] 130float32x2_t test_vmls_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) { 131 return vmls_laneq_f32(a, b, v, 0); 132} 133 134// CHECK-LABEL: define <4 x float> @test_vmlsq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 { 135// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> zeroinitializer 136// CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]] 137// CHECK: [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]] 138// CHECK: ret <4 x float> [[SUB]] 139float32x4_t test_vmlsq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v) { 140 return vmlsq_laneq_f32(a, b, v, 0); 141} 142 143// CHECK-LABEL: define <2 x float> @test_vmla_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) #0 { 144// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <2 x i32> <i32 1, i32 1> 145// CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]] 146// CHECK: [[ADD:%.*]] = fadd <2 x float> %a, [[MUL]] 147// CHECK: ret <2 x float> [[ADD]] 148float32x2_t test_vmla_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) { 149 return vmla_lane_f32(a, b, v, 1); 150} 151 152// CHECK-LABEL: define <4 x float> @test_vmlaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 { 153// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 154// CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]] 155// CHECK: [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]] 156// CHECK: ret <4 x float> [[ADD]] 157float32x4_t test_vmlaq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) { 158 return vmlaq_lane_f32(a, b, v, 1); 159} 160 161// CHECK-LABEL: define <2 x float> @test_vmla_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 { 162// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> <i32 3, i32 3> 163// CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]] 164// CHECK: [[ADD:%.*]] = fadd <2 x float> %a, [[MUL]] 165// CHECK: ret <2 x float> [[ADD]] 166float32x2_t test_vmla_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) { 167 return vmla_laneq_f32(a, b, v, 3); 168} 169 170// CHECK-LABEL: define <4 x float> @test_vmlaq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 { 171// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 172// CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]] 173// CHECK: [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]] 174// CHECK: ret <4 x float> [[ADD]] 175float32x4_t test_vmlaq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) { 176 return vmlaq_laneq_f32(a, b, v, 3); 177} 178 179// CHECK-LABEL: define <2 x float> @test_vmls_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) #0 { 180// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <2 x i32> <i32 1, i32 1> 181// CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]] 182// CHECK: [[SUB:%.*]] = fsub <2 x float> %a, [[MUL]] 183// CHECK: ret <2 x float> [[SUB]] 184float32x2_t test_vmls_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) { 185 return vmls_lane_f32(a, b, v, 1); 186} 187 188// CHECK-LABEL: define <4 x float> @test_vmlsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 { 189// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 190// CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]] 191// CHECK: [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]] 192// CHECK: ret <4 x float> [[SUB]] 193float32x4_t test_vmlsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) { 194 return vmlsq_lane_f32(a, b, v, 1); 195} 196// CHECK-LABEL: define <2 x float> @test_vmls_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 { 197// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> <i32 3, i32 3> 198// CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]] 199// CHECK: [[SUB:%.*]] = fsub <2 x float> %a, [[MUL]] 200// CHECK: ret <2 x float> [[SUB]] 201float32x2_t test_vmls_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) { 202 return vmls_laneq_f32(a, b, v, 3); 203} 204 205// CHECK-LABEL: define <4 x float> @test_vmlsq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 { 206// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 207// CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]] 208// CHECK: [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]] 209// CHECK: ret <4 x float> [[SUB]] 210float32x4_t test_vmlsq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) { 211 return vmlsq_laneq_f32(a, b, v, 3); 212} 213 214// CHECK-LABEL: define <2 x double> @test_vfmaq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #0 { 215// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, i32 0 216// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %c, i32 1 217// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 218// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> 219// CHECK: [[TMP2:%.*]] = bitcast <2 x double> [[VECINIT1_I]] to <16 x i8> 220// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 221// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 222// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double> 223// CHECK: [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP4]], <2 x double> [[TMP5]], <2 x double> [[TMP3]]) #2 224// CHECK: ret <2 x double> [[TMP6]] 225float64x2_t test_vfmaq_n_f64(float64x2_t a, float64x2_t b, float64_t c) { 226 return vfmaq_n_f64(a, b, c); 227} 228 229// CHECK-LABEL: define <2 x double> @test_vfmsq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #0 { 230// CHECK: [[SUB_I:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %b 231// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, i32 0 232// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %c, i32 1 233// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 234// CHECK: [[TMP1:%.*]] = bitcast <2 x double> [[SUB_I]] to <16 x i8> 235// CHECK: [[TMP2:%.*]] = bitcast <2 x double> [[VECINIT1_I]] to <16 x i8> 236// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 237// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 238// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double> 239// CHECK: [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP4]], <2 x double> [[TMP5]], <2 x double> [[TMP3]]) #2 240// CHECK: ret <2 x double> [[TMP6]] 241float64x2_t test_vfmsq_n_f64(float64x2_t a, float64x2_t b, float64_t c) { 242 return vfmsq_n_f64(a, b, c); 243} 244