1// REQUIRES: aarch64-registered-target 2// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -S -O3 -o - %s | FileCheck %s 3 4// Test new aarch64 intrinsics and types 5 6#include <arm_neon.h> 7 8float32x2_t test_vmla_n_f32(float32x2_t a, float32x2_t b, float32_t c) { 9 // CHECK-LABEL: test_vmla_n_f32 10 return vmla_n_f32(a, b, c); 11 // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 12 // CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 13 // CHECK-FMA: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 14 // CHECK-FMA: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 15} 16 17float32x4_t test_vmlaq_n_f32(float32x4_t a, float32x4_t b, float32_t c) { 18 // CHECK-LABEL: test_vmlaq_n_f32 19 return vmlaq_n_f32(a, b, c); 20 // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 21 // CHECK: fadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 22 // CHECK-FMA: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 23 // CHECK-FMA: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 24} 25 26float64x2_t test_vmlaq_n_f64(float64x2_t a, float64x2_t b, float64_t c) { 27 // CHECK-LABEL: test_vmlaq_n_f64 28 return vmlaq_n_f64(a, b, c); 29 // CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 30 // CHECK: fadd {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 31 // CHECK-FMA: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 32 // CHECK-FMA: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 33} 34 35float32x4_t test_vmlsq_n_f32(float32x4_t a, float32x4_t b, float32_t c) { 36 // CHECK-LABEL: test_vmlsq_n_f32 37 return vmlsq_n_f32(a, b, c); 38 // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 39 // CHECK: fsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 40 // CHECK-FMA: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 41 // CHECK-FMA: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 42} 43 44float32x2_t test_vmls_n_f32(float32x2_t a, float32x2_t b, float32_t c) { 45 // CHECK-LABEL: test_vmls_n_f32 46 return vmls_n_f32(a, b, c); 47 // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 48 // CHECK: fsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 49 // CHECK-FMA: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 50 // CHECK-FMA: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 51} 52 53float64x2_t test_vmlsq_n_f64(float64x2_t a, float64x2_t b, float64_t c) { 54 // CHECK-LABEL: test_vmlsq_n_f64 55 return vmlsq_n_f64(a, b, c); 56 // CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 57 // CHECK: fsub {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 58 // CHECK-FMA: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 59 // CHECK-FMA: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 60} 61 62float32x2_t test_vmla_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) { 63 // CHECK-LABEL: test_vmla_lane_f32_0 64 return vmla_lane_f32(a, b, v, 0); 65 // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 66 // CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 67 // CHECK-FMA: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 68} 69 70float32x4_t test_vmlaq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) { 71 // CHECK-LABEL: test_vmlaq_lane_f32_0 72 return vmlaq_lane_f32(a, b, v, 0); 73 // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 74 // CHECK: fadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 75 // CHECK-FMA: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 76} 77 78float32x2_t test_vmla_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) { 79 // CHECK-LABEL: test_vmla_laneq_f32_0 80 return vmla_laneq_f32(a, b, v, 0); 81 // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 82 // CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 83 // CHECK-FMA: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 84} 85 86float32x4_t test_vmlaq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v) { 87 // CHECK-LABEL: test_vmlaq_laneq_f32_0 88 return vmlaq_laneq_f32(a, b, v, 0); 89 // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 90 // CHECK: fadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 91 // CHECK-FMA: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 92} 93 94float32x2_t test_vmls_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) { 95 // CHECK-LABEL: test_vmls_lane_f32_0 96 return vmls_lane_f32(a, b, v, 0); 97 // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 98 // CHECK: fsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 99 // CHECK-FMA: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 100} 101 102float32x4_t test_vmlsq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) { 103 // CHECK-LABEL: test_vmlsq_lane_f32_0 104 return vmlsq_lane_f32(a, b, v, 0); 105 // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 106 // CHECK: fsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 107 // CHECK-FMA: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 108} 109 110float32x2_t test_vmls_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) { 111 // CHECK-LABEL: test_vmls_laneq_f32_0 112 return vmls_laneq_f32(a, b, v, 0); 113 // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 114 // CHECK: fsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 115 // CHECK-FMA: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 116} 117 118float32x4_t test_vmlsq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v) { 119 // CHECK-LABEL: test_vmlsq_laneq_f32_0 120 return vmlsq_laneq_f32(a, b, v, 0); 121 // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 122 // CHECK: fsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 123 // CHECK-FMA: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 124} 125 126float32x2_t test_vmla_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) { 127 // CHECK-LABEL: test_vmla_lane_f32 128 return vmla_lane_f32(a, b, v, 1); 129 // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 130 // CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 131 // CHECK-FMA: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 132} 133 134float32x4_t test_vmlaq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) { 135 // CHECK-LABEL: test_vmlaq_lane_f32 136 return vmlaq_lane_f32(a, b, v, 1); 137 // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 138 // CHECK: fadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 139 // CHECK-FMA: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 140} 141 142float32x2_t test_vmla_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) { 143 // CHECK-LABEL: test_vmla_laneq_f32 144 return vmla_laneq_f32(a, b, v, 3); 145 // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 146 // CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 147 // CHECK-FMA: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 148} 149 150float32x4_t test_vmlaq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) { 151 // CHECK-LABEL: test_vmlaq_laneq_f32 152 return vmlaq_laneq_f32(a, b, v, 3); 153 // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 154 // CHECK: fadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 155 // CHECK-FMA: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 156} 157 158float32x2_t test_vmls_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) { 159 // CHECK-LABEL: test_vmls_lane_f32 160 return vmls_lane_f32(a, b, v, 1); 161 // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 162 // CHECK: fsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 163 // CHECK-FMA: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 164} 165 166float32x4_t test_vmlsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) { 167 // CHECK-LABEL: test_vmlsq_lane_f32 168 return vmlsq_lane_f32(a, b, v, 1); 169 // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 170 // CHECK: fsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 171 // CHECK-FMA: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 172} 173float32x2_t test_vmls_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) { 174 // CHECK-LABEL: test_vmls_laneq_f32 175 return vmls_laneq_f32(a, b, v, 3); 176 // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 177 // CHECK: fsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 178 // CHECK-FMA: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 179} 180 181float32x4_t test_vmlsq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) { 182 // CHECK-LABEL: test_vmlsq_laneq_f32 183 return vmlsq_laneq_f32(a, b, v, 3); 184 // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 185 // CHECK: fsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 186 // CHECK-FMA: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 187} 188 189float64x2_t test_vfmaq_n_f64(float64x2_t a, float64x2_t b, float64_t c) { 190 // CHECK-LABEL: test_vfmaq_n_f64: 191 return vfmaq_n_f64(a, b, c); 192 // CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+\.2d|v[0-9]+\.d\[0\]}} 193} 194 195float64x2_t test_vfmsq_n_f64(float64x2_t a, float64x2_t b, float64_t c) { 196 // CHECK-LABEL: test_vfmsq_n_f64: 197 return vfmsq_n_f64(a, b, c); 198 // CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+\.2d|v[0-9]+\.d\[0\]}} 199} 200