16bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines// RUN: %clang_cc1 -O1 -triple arm64-apple-ios7 -target-feature +neon -ffreestanding -S -o - -emit-llvm %s | FileCheck %s 2651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines// Test ARM64 SIMD fused multiply add intrinsics 3651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines 4651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines#include <arm_neon.h> 5651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines 6651f13cea278ec967336033dd032faef0e9fc2ecStephen Hinesfloat32x2_t test_vfma_f32(float32x2_t a1, float32x2_t a2, float32x2_t a3) { 7651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK: test_vfma_f32 8651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines return vfma_f32(a1, a2, a3); 9651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK: llvm.fma.v2f32({{.*a2, .*a3, .*a1}}) 10651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK-NEXT: ret 11651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines} 12651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines 13651f13cea278ec967336033dd032faef0e9fc2ecStephen Hinesfloat32x4_t test_vfmaq_f32(float32x4_t a1, float32x4_t a2, float32x4_t a3) { 14651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK: test_vfmaq_f32 15651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines return vfmaq_f32(a1, a2, a3); 16651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK: llvm.fma.v4f32({{.*a2, .*a3, .*a1}}) 17651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK-NEXT: ret 18651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines} 19651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines 20651f13cea278ec967336033dd032faef0e9fc2ecStephen Hinesfloat64x2_t test_vfmaq_f64(float64x2_t a1, float64x2_t a2, float64x2_t a3) { 21651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK: test_vfmaq_f64 22651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines return vfmaq_f64(a1, a2, a3); 23651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK: llvm.fma.v2f64({{.*a2, .*a3, .*a1}}) 24651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK-NEXT: ret 25651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines} 26651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines 27651f13cea278ec967336033dd032faef0e9fc2ecStephen Hinesfloat32x2_t test_vfma_lane_f32(float32x2_t a1, float32x2_t a2, float32x2_t a3) { 28651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK: test_vfma_lane_f32 29651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines return vfma_lane_f32(a1, a2, a3, 1); 30651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // NB: the test below is deliberately lose, so that we don't depend too much 31651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // upon the exact IR used to select lane 1 (usually a shufflevector) 32651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK: llvm.fma.v2f32(<2 x float> %a2, <2 x float> {{.*}}, <2 x float> %a1) 33651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK-NEXT: ret 34651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines} 35651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines 36651f13cea278ec967336033dd032faef0e9fc2ecStephen Hinesfloat32x4_t test_vfmaq_lane_f32(float32x4_t a1, float32x4_t a2, float32x2_t a3) { 37651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK: test_vfmaq_lane_f32 38651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines return vfmaq_lane_f32(a1, a2, a3, 1); 39651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // NB: the test below is deliberately lose, so that we don't depend too much 40651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // upon the exact IR used to select lane 1 (usually a shufflevector) 41651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK: llvm.fma.v4f32(<4 x float> %a2, <4 x float> {{.*}}, <4 x float> %a1) 42651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK-NEXT: ret 43651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines} 44651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines 45651f13cea278ec967336033dd032faef0e9fc2ecStephen Hinesfloat64x2_t test_vfmaq_lane_f64(float64x2_t a1, float64x2_t a2, float64x1_t a3) { 46651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK: test_vfmaq_lane_f64 47651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines return vfmaq_lane_f64(a1, a2, a3, 0); 48651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // NB: the test below is deliberately lose, so that we don't depend too much 49651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // upon the exact IR used to select lane 1 (usually a shufflevector) 50651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK: llvm.fma.v2f64(<2 x double> %a2, <2 x double> {{.*}}, <2 x double> %a1) 51651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK-NEXT: ret 52651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines} 53651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines 54651f13cea278ec967336033dd032faef0e9fc2ecStephen Hinesfloat32x2_t test_vfma_n_f32(float32x2_t a1, float32x2_t a2, float32_t a3) { 55651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK: test_vfma_n_f32 56651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines return vfma_n_f32(a1, a2, a3); 57651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // NB: the test below is deliberately lose, so that we don't depend too much 58651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // upon the exact IR used to select lane 0 (usually two insertelements) 59651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK: llvm.fma.v2f32 60651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK-NEXT: ret 61651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines} 62651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines 63651f13cea278ec967336033dd032faef0e9fc2ecStephen Hinesfloat32x4_t test_vfmaq_n_f32(float32x4_t a1, float32x4_t a2, float32_t a3) { 64651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK: test_vfmaq_n_f32 65651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines return vfmaq_n_f32(a1, a2, a3); 66651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // NB: the test below is deliberately lose, so that we don't depend too much 67651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // upon the exact IR used to select lane 0 (usually four insertelements) 68651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK: llvm.fma.v4f32 69651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK-NEXT: ret 70651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines} 71651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines 72651f13cea278ec967336033dd032faef0e9fc2ecStephen Hinesfloat64x2_t test_vfmaq_n_f64(float64x2_t a1, float64x2_t a2, float64_t a3) { 73651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK: test_vfmaq_n_f64 74651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines return vfmaq_n_f64(a1, a2, a3); 75651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // NB: the test below is deliberately lose, so that we don't depend too much 76651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // upon the exact IR used to select lane 0 (usually two insertelements) 77651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK: llvm.fma.v2f64 78651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK-NEXT: ret 79651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines} 80651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines 81651f13cea278ec967336033dd032faef0e9fc2ecStephen Hinesfloat32x2_t test_vfms_f32(float32x2_t a1, float32x2_t a2, float32x2_t a3) { 82651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK: test_vfms_f32 83651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines return vfms_f32(a1, a2, a3); 84651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK: [[NEG:%.*]] = fsub <2 x float> {{.*}}, %a2 85651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK: llvm.fma.v2f32(<2 x float> %a3, <2 x float> [[NEG]], <2 x float> %a1) 86651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK-NEXT: ret 87651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines} 88651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines 89651f13cea278ec967336033dd032faef0e9fc2ecStephen Hinesfloat32x4_t test_vfmsq_f32(float32x4_t a1, float32x4_t a2, float32x4_t a3) { 90651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK: test_vfmsq_f32 91651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines return vfmsq_f32(a1, a2, a3); 92651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK: [[NEG:%.*]] = fsub <4 x float> {{.*}}, %a2 93651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK: llvm.fma.v4f32(<4 x float> %a3, <4 x float> [[NEG]], <4 x float> %a1) 94651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK-NEXT: ret 95651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines} 96651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines 97651f13cea278ec967336033dd032faef0e9fc2ecStephen Hinesfloat64x2_t test_vfmsq_f64(float64x2_t a1, float64x2_t a2, float64x2_t a3) { 98651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK: test_vfmsq_f64 99651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines return vfmsq_f64(a1, a2, a3); 100651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK: [[NEG:%.*]] = fsub <2 x double> {{.*}}, %a2 101651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK: llvm.fma.v2f64(<2 x double> %a3, <2 x double> [[NEG]], <2 x double> %a1) 102651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK-NEXT: ret 103651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines} 104651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines 105651f13cea278ec967336033dd032faef0e9fc2ecStephen Hinesfloat32x2_t test_vfms_lane_f32(float32x2_t a1, float32x2_t a2, float32x2_t a3) { 106651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK: test_vfms_lane_f32 107651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines return vfms_lane_f32(a1, a2, a3, 1); 108651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // NB: the test below is deliberately lose, so that we don't depend too much 109651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // upon the exact IR used to select lane 1 (usually a shufflevector) 110651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK: [[NEG:%.*]] = fsub <2 x float> {{.*}}, %a3 111651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[NEG]] 112651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK: llvm.fma.v2f32(<2 x float> {{.*}}, <2 x float> [[LANE]], <2 x float> %a1) 113651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK-NEXT: ret 114651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines} 115651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines 116651f13cea278ec967336033dd032faef0e9fc2ecStephen Hinesfloat32x4_t test_vfmsq_lane_f32(float32x4_t a1, float32x4_t a2, float32x2_t a3) { 117651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK: test_vfmsq_lane_f32 118651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines return vfmsq_lane_f32(a1, a2, a3, 1); 119651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // NB: the test below is deliberately lose, so that we don't depend too much 120651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // upon the exact IR used to select lane 1 (usually a shufflevector) 121651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK: [[NEG:%.*]] = fsub <2 x float> {{.*}}, %a3 122651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[NEG]] 123651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK: llvm.fma.v4f32(<4 x float> {{.*}}, <4 x float> [[LANE]], <4 x float> %a1) 124651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK-NEXT: ret 125651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines} 126651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines 127651f13cea278ec967336033dd032faef0e9fc2ecStephen Hinesfloat64x2_t test_vfmsq_lane_f64(float64x2_t a1, float64x2_t a2, float64x1_t a3) { 128651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK: test_vfmsq_lane_f64 129651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines return vfmsq_lane_f64(a1, a2, a3, 0); 130651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // NB: the test below is deliberately lose, so that we don't depend too much 131651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // upon the exact IR used to select lane 1 (usually a shufflevector) 132651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK: [[NEG:%.*]] = fsub <1 x double> {{.*}}, %a3 133651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK: [[LANE:%.*]] = shufflevector <1 x double> [[NEG]] 134651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK: llvm.fma.v2f64(<2 x double> {{.*}}, <2 x double> [[LANE]], <2 x double> %a1) 135651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines // CHECK-NEXT: ret 136651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines} 137