16bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines// RUN: %clang_cc1 -O1 -triple arm64-apple-ios7 -target-feature +neon -ffreestanding -S -o - -emit-llvm %s | FileCheck %s
2651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines// Test ARM64 SIMD fused multiply add intrinsics
3651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines
4651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines#include <arm_neon.h>
5651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines
6651f13cea278ec967336033dd032faef0e9fc2ecStephen Hinesfloat32x2_t test_vfma_f32(float32x2_t a1, float32x2_t a2, float32x2_t a3) {
7651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK: test_vfma_f32
8651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  return vfma_f32(a1, a2, a3);
9651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK: llvm.fma.v2f32({{.*a2, .*a3, .*a1}})
10651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK-NEXT: ret
11651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines}
12651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines
13651f13cea278ec967336033dd032faef0e9fc2ecStephen Hinesfloat32x4_t test_vfmaq_f32(float32x4_t a1, float32x4_t a2, float32x4_t a3) {
14651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK: test_vfmaq_f32
15651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  return vfmaq_f32(a1, a2, a3);
16651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK: llvm.fma.v4f32({{.*a2, .*a3, .*a1}})
17651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK-NEXT: ret
18651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines}
19651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines
20651f13cea278ec967336033dd032faef0e9fc2ecStephen Hinesfloat64x2_t test_vfmaq_f64(float64x2_t a1, float64x2_t a2, float64x2_t a3) {
21651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK: test_vfmaq_f64
22651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  return vfmaq_f64(a1, a2, a3);
23651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK: llvm.fma.v2f64({{.*a2, .*a3, .*a1}})
24651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK-NEXT: ret
25651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines}
26651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines
27651f13cea278ec967336033dd032faef0e9fc2ecStephen Hinesfloat32x2_t test_vfma_lane_f32(float32x2_t a1, float32x2_t a2, float32x2_t a3) {
28651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK: test_vfma_lane_f32
29651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  return vfma_lane_f32(a1, a2, a3, 1);
30651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // NB: the test below is deliberately lose, so that we don't depend too much
31651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // upon the exact IR used to select lane 1 (usually a shufflevector)
32651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK: llvm.fma.v2f32(<2 x float> %a2, <2 x float> {{.*}}, <2 x float> %a1)
33651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK-NEXT: ret
34651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines}
35651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines
36651f13cea278ec967336033dd032faef0e9fc2ecStephen Hinesfloat32x4_t test_vfmaq_lane_f32(float32x4_t a1, float32x4_t a2, float32x2_t a3) {
37651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK: test_vfmaq_lane_f32
38651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  return vfmaq_lane_f32(a1, a2, a3, 1);
39651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // NB: the test below is deliberately lose, so that we don't depend too much
40651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // upon the exact IR used to select lane 1 (usually a shufflevector)
41651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK: llvm.fma.v4f32(<4 x float> %a2, <4 x float> {{.*}}, <4 x float> %a1)
42651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK-NEXT: ret
43651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines}
44651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines
45651f13cea278ec967336033dd032faef0e9fc2ecStephen Hinesfloat64x2_t test_vfmaq_lane_f64(float64x2_t a1, float64x2_t a2, float64x1_t a3) {
46651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK: test_vfmaq_lane_f64
47651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  return vfmaq_lane_f64(a1, a2, a3, 0);
48651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // NB: the test below is deliberately lose, so that we don't depend too much
49651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // upon the exact IR used to select lane 1 (usually a shufflevector)
50651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK: llvm.fma.v2f64(<2 x double> %a2, <2 x double> {{.*}}, <2 x double> %a1)
51651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK-NEXT: ret
52651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines}
53651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines
54651f13cea278ec967336033dd032faef0e9fc2ecStephen Hinesfloat32x2_t test_vfma_n_f32(float32x2_t a1, float32x2_t a2, float32_t a3) {
55651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK: test_vfma_n_f32
56651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  return vfma_n_f32(a1, a2, a3);
57651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // NB: the test below is deliberately lose, so that we don't depend too much
58651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // upon the exact IR used to select lane 0 (usually two insertelements)
59651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK: llvm.fma.v2f32
60651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK-NEXT: ret
61651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines}
62651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines
63651f13cea278ec967336033dd032faef0e9fc2ecStephen Hinesfloat32x4_t test_vfmaq_n_f32(float32x4_t a1, float32x4_t a2, float32_t a3) {
64651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK: test_vfmaq_n_f32
65651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  return vfmaq_n_f32(a1, a2, a3);
66651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // NB: the test below is deliberately lose, so that we don't depend too much
67651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // upon the exact IR used to select lane 0 (usually four insertelements)
68651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK: llvm.fma.v4f32
69651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK-NEXT: ret
70651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines}
71651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines
72651f13cea278ec967336033dd032faef0e9fc2ecStephen Hinesfloat64x2_t test_vfmaq_n_f64(float64x2_t a1, float64x2_t a2, float64_t a3) {
73651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK: test_vfmaq_n_f64
74651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  return vfmaq_n_f64(a1, a2, a3);
75651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // NB: the test below is deliberately lose, so that we don't depend too much
76651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // upon the exact IR used to select lane 0 (usually two insertelements)
77651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK: llvm.fma.v2f64
78651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK-NEXT: ret
79651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines}
80651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines
81651f13cea278ec967336033dd032faef0e9fc2ecStephen Hinesfloat32x2_t test_vfms_f32(float32x2_t a1, float32x2_t a2, float32x2_t a3) {
82651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK: test_vfms_f32
83651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  return vfms_f32(a1, a2, a3);
84651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK: [[NEG:%.*]] = fsub <2 x float> {{.*}}, %a2
85651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK: llvm.fma.v2f32(<2 x float> %a3, <2 x float> [[NEG]], <2 x float> %a1)
86651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK-NEXT: ret
87651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines}
88651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines
89651f13cea278ec967336033dd032faef0e9fc2ecStephen Hinesfloat32x4_t test_vfmsq_f32(float32x4_t a1, float32x4_t a2, float32x4_t a3) {
90651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK: test_vfmsq_f32
91651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  return vfmsq_f32(a1, a2, a3);
92651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK: [[NEG:%.*]] = fsub <4 x float> {{.*}}, %a2
93651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK: llvm.fma.v4f32(<4 x float> %a3, <4 x float> [[NEG]], <4 x float> %a1)
94651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK-NEXT: ret
95651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines}
96651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines
97651f13cea278ec967336033dd032faef0e9fc2ecStephen Hinesfloat64x2_t test_vfmsq_f64(float64x2_t a1, float64x2_t a2, float64x2_t a3) {
98651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK: test_vfmsq_f64
99651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  return vfmsq_f64(a1, a2, a3);
100651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK: [[NEG:%.*]] = fsub <2 x double> {{.*}}, %a2
101651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK: llvm.fma.v2f64(<2 x double> %a3, <2 x double> [[NEG]], <2 x double> %a1)
102651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK-NEXT: ret
103651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines}
104651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines
105651f13cea278ec967336033dd032faef0e9fc2ecStephen Hinesfloat32x2_t test_vfms_lane_f32(float32x2_t a1, float32x2_t a2, float32x2_t a3) {
106651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK: test_vfms_lane_f32
107651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  return vfms_lane_f32(a1, a2, a3, 1);
108651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // NB: the test below is deliberately lose, so that we don't depend too much
109651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // upon the exact IR used to select lane 1 (usually a shufflevector)
110651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK: [[NEG:%.*]] = fsub <2 x float> {{.*}}, %a3
111651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[NEG]]
112651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK: llvm.fma.v2f32(<2 x float> {{.*}}, <2 x float> [[LANE]], <2 x float> %a1)
113651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK-NEXT: ret
114651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines}
115651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines
116651f13cea278ec967336033dd032faef0e9fc2ecStephen Hinesfloat32x4_t test_vfmsq_lane_f32(float32x4_t a1, float32x4_t a2, float32x2_t a3) {
117651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK: test_vfmsq_lane_f32
118651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  return vfmsq_lane_f32(a1, a2, a3, 1);
119651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // NB: the test below is deliberately lose, so that we don't depend too much
120651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // upon the exact IR used to select lane 1 (usually a shufflevector)
121651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK: [[NEG:%.*]] = fsub <2 x float> {{.*}}, %a3
122651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[NEG]]
123651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK: llvm.fma.v4f32(<4 x float> {{.*}}, <4 x float> [[LANE]], <4 x float> %a1)
124651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK-NEXT: ret
125651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines}
126651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines
127651f13cea278ec967336033dd032faef0e9fc2ecStephen Hinesfloat64x2_t test_vfmsq_lane_f64(float64x2_t a1, float64x2_t a2, float64x1_t a3) {
128651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK: test_vfmsq_lane_f64
129651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  return vfmsq_lane_f64(a1, a2, a3, 0);
130651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // NB: the test below is deliberately lose, so that we don't depend too much
131651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // upon the exact IR used to select lane 1 (usually a shufflevector)
132651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK: [[NEG:%.*]] = fsub <1 x double> {{.*}}, %a3
133651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK: [[LANE:%.*]] = shufflevector <1 x double> [[NEG]]
134651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK: llvm.fma.v2f64(<2 x double> {{.*}}, <2 x double> [[LANE]], <2 x double> %a1)
135651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  // CHECK-NEXT: ret
136651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines}
137