1// REQUIRES: aarch64-registered-target 2// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-cpu cyclone \ 3// RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s 4 5// Test new aarch64 intrinsics and types 6 7#include <arm_neon.h> 8 9 10float32_t test_vmuls_lane_f32(float32_t a, float32x2_t b) { 11 // CHECK-LABEL: test_vmuls_lane_f32 12 return vmuls_lane_f32(a, b, 1); 13 // CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] 14} 15 16float64_t test_vmuld_lane_f64(float64_t a, float64x1_t b) { 17 // CHECK-LABEL: test_vmuld_lane_f64 18 return vmuld_lane_f64(a, b, 0); 19 // CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+.d\[0\]|d[0-9]+}} 20} 21 22float32_t test_vmuls_laneq_f32(float32_t a, float32x4_t b) { 23 // CHECK-LABEL: test_vmuls_laneq_f32 24 return vmuls_laneq_f32(a, b, 3); 25 // CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] 26} 27 28float64_t test_vmuld_laneq_f64(float64_t a, float64x2_t b) { 29 // CHECK-LABEL: test_vmuld_laneq_f64 30 return vmuld_laneq_f64(a, b, 1); 31 // CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] 32} 33 34float64x1_t test_vmul_n_f64(float64x1_t a, float64_t b) { 35 // CHECK-LABEL: test_vmul_n_f64 36 return vmul_n_f64(a, b); 37 // CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+.d\[0\]|d[0-9]+}} 38} 39 40float32_t test_vmulxs_lane_f32(float32_t a, float32x2_t b) { 41// CHECK-LABEL: test_vmulxs_lane_f32 42 return vmulxs_lane_f32(a, b, 1); 43// CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] 44} 45 46float32_t test_vmulxs_laneq_f32(float32_t a, float32x4_t b) { 47// CHECK-LABEL: test_vmulxs_laneq_f32 48 return vmulxs_laneq_f32(a, b, 3); 49// CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] 50} 51 52float64_t test_vmulxd_lane_f64(float64_t a, float64x1_t b) { 53// CHECK-LABEL: test_vmulxd_lane_f64 54 return vmulxd_lane_f64(a, b, 0); 55// CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+.d\[0\]|d[0-9]+}} 56} 57 58float64_t test_vmulxd_laneq_f64(float64_t a, float64x2_t b) { 59// CHECK-LABEL: test_vmulxd_laneq_f64 60 return vmulxd_laneq_f64(a, b, 1); 61// CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] 62} 63 64// CHECK-LABEL: test_vmulx_lane_f64 65float64x1_t test_vmulx_lane_f64(float64x1_t a, float64x1_t b) { 66 return vmulx_lane_f64(a, b, 0); 67 // CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+.d\[0\]|d[0-9]+}} 68} 69 70 71// CHECK-LABEL: test_vmulx_laneq_f64_0 72float64x1_t test_vmulx_laneq_f64_0(float64x1_t a, float64x2_t b) { 73 return vmulx_laneq_f64(a, b, 0); 74 // CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] 75} 76 77// CHECK-LABEL: test_vmulx_laneq_f64_1 78float64x1_t test_vmulx_laneq_f64_1(float64x1_t a, float64x2_t b) { 79 return vmulx_laneq_f64(a, b, 1); 80 // CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] 81} 82 83 84// CHECK-LABEL: test_vfmas_lane_f32 85float32_t test_vfmas_lane_f32(float32_t a, float32_t b, float32x2_t c) { 86 return vfmas_lane_f32(a, b, c, 1); 87 // CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] 88} 89 90// CHECK-LABEL: test_vfmad_lane_f64 91float64_t test_vfmad_lane_f64(float64_t a, float64_t b, float64x1_t c) { 92 return vfmad_lane_f64(a, b, c, 0); 93 // CHECK: {{fmla|fmadd}} {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+.d\[0\]|d[0-9]+}} 94} 95 96// CHECK-LABEL: test_vfmad_laneq_f64 97float64_t test_vfmad_laneq_f64(float64_t a, float64_t b, float64x2_t c) { 98 return vfmad_laneq_f64(a, b, c, 1); 99 // CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] 100} 101 102// CHECK-LABEL: test_vfmss_lane_f32 103float32_t test_vfmss_lane_f32(float32_t a, float32_t b, float32x2_t c) { 104 return vfmss_lane_f32(a, b, c, 1); 105 // CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] 106} 107 108// CHECK-LABEL: test_vfma_lane_f64 109float64x1_t test_vfma_lane_f64(float64x1_t a, float64x1_t b, float64x1_t v) { 110 return vfma_lane_f64(a, b, v, 0); 111 // CHECK: {{fmla|fmadd}} {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+.d\[0\]|d[0-9]+}} 112} 113 114// CHECK-LABEL: test_vfms_lane_f64 115float64x1_t test_vfms_lane_f64(float64x1_t a, float64x1_t b, float64x1_t v) { 116 return vfms_lane_f64(a, b, v, 0); 117 // CHECK: {{fmls|fmsub}} {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+.d\[0\]|d[0-9]+}} 118} 119 120// CHECK-LABEL: test_vfma_laneq_f64 121float64x1_t test_vfma_laneq_f64(float64x1_t a, float64x1_t b, float64x2_t v) { 122 return vfma_laneq_f64(a, b, v, 0); 123 // CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] 124} 125 126// CHECK-LABEL: test_vfms_laneq_f64 127float64x1_t test_vfms_laneq_f64(float64x1_t a, float64x1_t b, float64x2_t v) { 128 return vfms_laneq_f64(a, b, v, 0); 129 // CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] 130} 131 132// CHECK-LABEL: test_vqdmullh_lane_s16 133int32_t test_vqdmullh_lane_s16(int16_t a, int16x4_t b) { 134 return vqdmullh_lane_s16(a, b, 3); 135 // CHECK: sqdmull {{s[0-9]+|v[0-9]+.4s}}, {{h[0-9]+|v[0-9].4h}}, {{v[0-9]+}}.h[3] 136} 137 138// CHECK-LABEL: test_vqdmulls_lane_s32 139int64_t test_vqdmulls_lane_s32(int32_t a, int32x2_t b) { 140 return vqdmulls_lane_s32(a, b, 1); 141 // CHECK: sqdmull {{d[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] 142} 143 144// CHECK-LABEL: test_vqdmullh_laneq_s16 145int32_t test_vqdmullh_laneq_s16(int16_t a, int16x8_t b) { 146 return vqdmullh_laneq_s16(a, b, 7); 147 // CHECK: sqdmull {{s[0-9]+|v[0-9]+.4s}}, {{h[0-9]+|v[0-9]+.4h}}, {{v[0-9]+}}.h[7] 148} 149 150// CHECK-LABEL: test_vqdmulls_laneq_s32 151int64_t test_vqdmulls_laneq_s32(int32_t a, int32x4_t b) { 152 return vqdmulls_laneq_s32(a, b, 3); 153 // CHECK: sqdmull {{d[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] 154} 155 156// CHECK-LABEL: test_vqdmulhh_lane_s16 157int16_t test_vqdmulhh_lane_s16(int16_t a, int16x4_t b) { 158 return vqdmulhh_lane_s16(a, b, 3); 159// CHECK: sqdmulh {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{v[0-9]+}}.h[3] 160} 161 162// CHECK-LABEL: test_vqdmulhs_lane_s32 163int32_t test_vqdmulhs_lane_s32(int32_t a, int32x2_t b) { 164 return vqdmulhs_lane_s32(a, b, 1); 165// CHECK: sqdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] 166} 167 168 169// CHECK-LABEL: test_vqdmulhh_laneq_s16 170int16_t test_vqdmulhh_laneq_s16(int16_t a, int16x8_t b) { 171 return vqdmulhh_laneq_s16(a, b, 7); 172// CHECK: sqdmulh {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{v[0-9]+}}.h[7] 173} 174 175 176// CHECK-LABEL: test_vqdmulhs_laneq_s32 177int32_t test_vqdmulhs_laneq_s32(int32_t a, int32x4_t b) { 178 return vqdmulhs_laneq_s32(a, b, 3); 179// CHECK: sqdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] 180} 181 182// CHECK-LABEL: test_vqrdmulhh_lane_s16 183int16_t test_vqrdmulhh_lane_s16(int16_t a, int16x4_t b) { 184 return vqrdmulhh_lane_s16(a, b, 3); 185// CHECK: sqrdmulh {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{v[0-9]+}}.h[3] 186} 187 188// CHECK-LABEL: test_vqrdmulhs_lane_s32 189int32_t test_vqrdmulhs_lane_s32(int32_t a, int32x2_t b) { 190 return vqrdmulhs_lane_s32(a, b, 1); 191// CHECK: sqrdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] 192} 193 194 195// CHECK-LABEL: test_vqrdmulhh_laneq_s16 196int16_t test_vqrdmulhh_laneq_s16(int16_t a, int16x8_t b) { 197 return vqrdmulhh_laneq_s16(a, b, 7); 198// CHECK: sqrdmulh {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{v[0-9]+}}.h[7] 199} 200 201 202// CHECK-LABEL: test_vqrdmulhs_laneq_s32 203int32_t test_vqrdmulhs_laneq_s32(int32_t a, int32x4_t b) { 204 return vqrdmulhs_laneq_s32(a, b, 3); 205// CHECK: sqrdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] 206} 207 208// CHECK-LABEL: test_vqdmlalh_lane_s16 209int32_t test_vqdmlalh_lane_s16(int32_t a, int16_t b, int16x4_t c) { 210 return vqdmlalh_lane_s16(a, b, c, 3); 211// CHECK: sqdmlal {{s[0-9]+|v[0-9]+.4s}}, {{h[0-9]+|v[0-9]+.4h}}, {{v[0-9]+}}.h[3] 212} 213 214// CHECK-LABEL: test_vqdmlals_lane_s32 215int64_t test_vqdmlals_lane_s32(int64_t a, int32_t b, int32x2_t c) { 216 return vqdmlals_lane_s32(a, b, c, 1); 217// CHECK: sqdmlal {{d[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] 218} 219 220// CHECK-LABEL: test_vqdmlalh_laneq_s16 221int32_t test_vqdmlalh_laneq_s16(int32_t a, int16_t b, int16x8_t c) { 222 return vqdmlalh_laneq_s16(a, b, c, 7); 223// CHECK: sqdmlal {{s[0-9]+|v[0-9]+.4s}}, {{h[0-9]+|v[0-9]+.4h}}, {{v[0-9]+}}.h[7] 224} 225 226// CHECK-LABEL: test_vqdmlals_laneq_s32 227int64_t test_vqdmlals_laneq_s32(int64_t a, int32_t b, int32x4_t c) { 228 return vqdmlals_laneq_s32(a, b, c, 3); 229// CHECK: sqdmlal {{d[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] 230} 231 232// CHECK-LABEL: test_vqdmlslh_lane_s16 233int32_t test_vqdmlslh_lane_s16(int32_t a, int16_t b, int16x4_t c) { 234 return vqdmlslh_lane_s16(a, b, c, 3); 235// CHECK: sqdmlsl {{s[0-9]+|v[0-9]+.4s}}, {{h[0-9]+|v[0-9]+.4h}}, {{v[0-9]+}}.h[3] 236} 237 238// CHECK-LABEL: test_vqdmlsls_lane_s32 239int64_t test_vqdmlsls_lane_s32(int64_t a, int32_t b, int32x2_t c) { 240 return vqdmlsls_lane_s32(a, b, c, 1); 241// CHECK: sqdmlsl {{d[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] 242} 243 244// CHECK-LABEL: test_vqdmlslh_laneq_s16 245int32_t test_vqdmlslh_laneq_s16(int32_t a, int16_t b, int16x8_t c) { 246 return vqdmlslh_laneq_s16(a, b, c, 7); 247// CHECK: sqdmlsl {{s[0-9]+|v[0-9]+.4s}}, {{h[0-9]+|v[0-9]+.4h}}, {{v[0-9]+}}.h[7] 248} 249 250// CHECK-LABEL: test_vqdmlsls_laneq_s32 251int64_t test_vqdmlsls_laneq_s32(int64_t a, int32_t b, int32x4_t c) { 252 return vqdmlsls_laneq_s32(a, b, c, 3); 253// CHECK: sqdmlsl {{d[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] 254} 255 256// CHECK-LABEL: test_vmulx_lane_f64_0: 257float64x1_t test_vmulx_lane_f64_0() { 258 float64x1_t arg1; 259 float64x1_t arg2; 260 float64x1_t result; 261 float64_t sarg1, sarg2, sres; 262 arg1 = vcreate_f64(UINT64_C(0x3fd6304bc43ab5c2)); 263 arg2 = vcreate_f64(UINT64_C(0x3fee211e215aeef3)); 264 result = vmulx_lane_f64(arg1, arg2, 0); 265// CHECK: adrp x[[ADDRLO:[0-9]+]] 266// CHECK: ldr d0, [x[[ADDRLO]], 267// CHECK: adrp x[[ADDRLO:[0-9]+]] 268// CHECK: ldr d1, [x[[ADDRLO]], 269// CHECK: fmulx d0, d1, d0 270 return result; 271} 272 273// CHECK-LABEL: test_vmulx_laneq_f64_2: 274float64x1_t test_vmulx_laneq_f64_2() { 275 float64x1_t arg1; 276 float64x1_t arg2; 277 float64x2_t arg3; 278 float64x1_t result; 279 float64_t sarg1, sarg2, sres; 280 arg1 = vcreate_f64(UINT64_C(0x3fd6304bc43ab5c2)); 281 arg2 = vcreate_f64(UINT64_C(0x3fee211e215aeef3)); 282 arg3 = vcombine_f64(arg1, arg2); 283 result = vmulx_laneq_f64(arg1, arg3, 1); 284// CHECK: adrp x[[ADDRLO:[0-9]+]] 285// CHECK: ldr d0, [x[[ADDRLO]], 286// CHECK: adrp x[[ADDRLO:[0-9]+]] 287// CHECK: ldr d1, [x[[ADDRLO]], 288// CHECK: fmulx d0, d1, d0 289 return result; 290} 291