1//===- subzero/unittest/AssemblerX8664/XmmArith.cpp -----------------------===// 2// 3// The Subzero Code Generator 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9#include "AssemblerX8664/TestUtil.h" 10 11namespace Ice { 12namespace X8664 { 13namespace Test { 14namespace { 15 16TEST_F(AssemblerX8664Test, ArithSS) { 17#define TestArithSSXmmXmm(FloatSize, Src, Value0, Dst, Value1, Inst, Op) \ 18 do { \ 19 static_assert(FloatSize == 32 || FloatSize == 64, \ 20 "Invalid fp size " #FloatSize); \ 21 static constexpr char TestString[] = \ 22 "(" #FloatSize ", " #Src ", " #Value0 ", " #Dst ", " #Value1 \ 23 ", " #Inst ", " #Op ")"; \ 24 static constexpr bool IsDouble = FloatSize == 64; \ 25 using Type = std::conditional<IsDouble, double, float>::type; \ 26 const uint32_t T0 = allocateQword(); \ 27 const Type V0 = Value0; \ 28 const uint32_t T1 = allocateQword(); \ 29 const Type V1 = Value1; \ 30 \ 31 __ movss(IceType_f##FloatSize, Encoded_Xmm_##Dst(), dwordAddress(T0)); \ 32 __ movss(IceType_f##FloatSize, Encoded_Xmm_##Src(), dwordAddress(T1)); \ 33 __ Inst(IceType_f##FloatSize, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \ 34 \ 35 AssembledTest test = assemble(); \ 36 if (IsDouble) { \ 37 test.setQwordTo(T0, static_cast<double>(V0)); \ 38 test.setQwordTo(T1, static_cast<double>(V1)); \ 39 } else { \ 40 test.setDwordTo(T0, static_cast<float>(V0)); \ 41 test.setDwordTo(T1, static_cast<float>(V1)); \ 42 } \ 43 \ 44 test.run(); \ 45 \ 46 ASSERT_DOUBLE_EQ(V0 Op V1, test.Dst<Type>()) << TestString; \ 47 reset(); \ 48 } while (0) 49 50#define TestArithSSXmmAddr(FloatSize, Value0, Dst, Value1, Inst, Op) \ 51 do { \ 52 static_assert(FloatSize == 32 || FloatSize == 64, \ 53 "Invalid fp size " #FloatSize); \ 54 static constexpr char TestString[] = \ 55 "(" #FloatSize ", Addr, " #Value0 ", " #Dst ", " #Value1 ", " #Inst \ 56 ", " #Op ")"; \ 57 static constexpr bool IsDouble = FloatSize == 64; \ 58 using Type = std::conditional<IsDouble, double, float>::type; \ 59 const uint32_t T0 = allocateQword(); \ 60 const Type V0 = Value0; \ 61 const uint32_t T1 = allocateQword(); \ 62 const Type V1 = Value1; \ 63 \ 64 __ movss(IceType_f##FloatSize, Encoded_Xmm_##Dst(), dwordAddress(T0)); \ 65 __ Inst(IceType_f##FloatSize, Encoded_Xmm_##Dst(), dwordAddress(T1)); \ 66 \ 67 AssembledTest test = assemble(); \ 68 if (IsDouble) { \ 69 test.setQwordTo(T0, static_cast<double>(V0)); \ 70 test.setQwordTo(T1, static_cast<double>(V1)); \ 71 } else { \ 72 test.setDwordTo(T0, static_cast<float>(V0)); \ 73 test.setDwordTo(T1, static_cast<float>(V1)); \ 74 } \ 75 \ 76 test.run(); \ 77 \ 78 ASSERT_DOUBLE_EQ(V0 Op V1, test.Dst<Type>()) << TestString; \ 79 reset(); \ 80 } while (0) 81 82#define TestArithSS(FloatSize, Src, Dst0, Dst1) \ 83 do { \ 84 TestArithSSXmmXmm(FloatSize, Src, 1.0, Dst0, 10.0, addss, +); \ 85 TestArithSSXmmAddr(FloatSize, 2.0, Dst1, 20.0, addss, +); \ 86 TestArithSSXmmXmm(FloatSize, Src, 3.0, Dst0, 30.0, subss, -); \ 87 TestArithSSXmmAddr(FloatSize, 4.0, Dst1, 40.0, subss, -); \ 88 TestArithSSXmmXmm(FloatSize, Src, 5.0, Dst0, 50.0, mulss, *); \ 89 TestArithSSXmmAddr(FloatSize, 6.0, Dst1, 60.0, mulss, *); \ 90 TestArithSSXmmXmm(FloatSize, Src, 7.0, Dst0, 70.0, divss, / ); \ 91 TestArithSSXmmAddr(FloatSize, 8.0, Dst1, 80.0, divss, / ); \ 92 } while (0) 93 94#define TestImpl(Src, Dst0, Dst1) \ 95 do { \ 96 TestArithSS(32, Src, Dst0, Dst1); \ 97 TestArithSS(64, Src, Dst0, Dst1); \ 98 } while (0) 99 100 TestImpl(xmm0, xmm1, xmm2); 101 TestImpl(xmm1, xmm2, xmm3); 102 TestImpl(xmm2, xmm3, xmm4); 103 TestImpl(xmm3, xmm4, xmm5); 104 TestImpl(xmm4, xmm5, xmm6); 105 TestImpl(xmm5, xmm6, xmm7); 106 TestImpl(xmm6, xmm7, xmm8); 107 TestImpl(xmm7, xmm8, xmm9); 108 TestImpl(xmm8, xmm9, xmm10); 109 TestImpl(xmm9, xmm10, xmm11); 110 TestImpl(xmm10, xmm11, xmm12); 111 TestImpl(xmm11, xmm12, xmm13); 112 TestImpl(xmm12, xmm13, xmm14); 113 TestImpl(xmm13, xmm14, xmm15); 114 TestImpl(xmm14, xmm15, xmm0); 115 TestImpl(xmm15, xmm0, xmm1); 116 117#undef TestImpl 118#undef TestArithSS 119#undef TestArithSSXmmAddr 120#undef TestArithSSXmmXmm 121} 122 123TEST_F(AssemblerX8664Test, PArith) { 124#define TestPArithXmmXmm(Dst, Value0, Src, Value1, Inst, Op, Type, Size) \ 125 do { \ 126 static constexpr char TestString[] = \ 127 "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Inst ", " #Op \ 128 ", " #Type ", " #Size ")"; \ 129 const uint32_t T0 = allocateDqword(); \ 130 const Dqword V0 Value0; \ 131 \ 132 const uint32_t T1 = allocateDqword(); \ 133 const Dqword V1 Value1; \ 134 \ 135 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ 136 __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \ 137 __ Inst(IceType_i##Size, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \ 138 \ 139 AssembledTest test = assemble(); \ 140 test.setDqwordTo(T0, V0); \ 141 test.setDqwordTo(T1, V1); \ 142 test.run(); \ 143 \ 144 ASSERT_EQ(packedAs<Type##Size##_t>(V0) Op V1, test.Dst<Dqword>()) \ 145 << TestString; \ 146 reset(); \ 147 } while (0) 148 149#define TestPArithXmmAddr(Dst, Value0, Value1, Inst, Op, Type, Size) \ 150 do { \ 151 static constexpr char TestString[] = \ 152 "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Inst ", " #Op \ 153 ", " #Type ", " #Size ")"; \ 154 const uint32_t T0 = allocateDqword(); \ 155 const Dqword V0 Value0; \ 156 \ 157 const uint32_t T1 = allocateDqword(); \ 158 const Dqword V1 Value1; \ 159 \ 160 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ 161 __ Inst(IceType_i##Size, Encoded_Xmm_##Dst(), dwordAddress(T1)); \ 162 \ 163 AssembledTest test = assemble(); \ 164 test.setDqwordTo(T0, V0); \ 165 test.setDqwordTo(T1, V1); \ 166 test.run(); \ 167 \ 168 ASSERT_EQ(packedAs<Type##Size##_t>(V0) Op V1, test.Dst<Dqword>()) \ 169 << TestString; \ 170 reset(); \ 171 } while (0) 172 173#define TestPArithXmmImm(Dst, Value0, Imm, Inst, Op, Type, Size) \ 174 do { \ 175 static constexpr char TestString[] = \ 176 "(" #Dst ", " #Value0 ", " #Imm ", " #Inst ", " #Op ", " #Type \ 177 ", " #Size ")"; \ 178 const uint32_t T0 = allocateDqword(); \ 179 const Dqword V0 Value0; \ 180 \ 181 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ 182 __ Inst(IceType_i##Size, Encoded_Xmm_##Dst(), Immediate(Imm)); \ 183 \ 184 AssembledTest test = assemble(); \ 185 test.setDqwordTo(T0, V0); \ 186 test.run(); \ 187 \ 188 ASSERT_EQ(packedAs<Type##Size##_t>(V0) Op Imm, test.Dst<Dqword>()) \ 189 << TestString; \ 190 reset(); \ 191 } while (0) 192 193#define TestPAndnXmmXmm(Dst, Value0, Src, Value1, Type, Size) \ 194 do { \ 195 static constexpr char TestString[] = \ 196 "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", pandn, " #Type \ 197 ", " #Size ")"; \ 198 const uint32_t T0 = allocateDqword(); \ 199 const Dqword V0 Value0; \ 200 \ 201 const uint32_t T1 = allocateDqword(); \ 202 const Dqword V1 Value1; \ 203 \ 204 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ 205 __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \ 206 __ pandn(IceType_i##Size, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \ 207 \ 208 AssembledTest test = assemble(); \ 209 test.setDqwordTo(T0, V0); \ 210 test.setDqwordTo(T1, V1); \ 211 test.run(); \ 212 \ 213 ASSERT_EQ(~(packedAs<Type##Size##_t>(V0)) & V1, test.Dst<Dqword>()) \ 214 << TestString; \ 215 reset(); \ 216 } while (0) 217 218#define TestPAndnXmmAddr(Dst, Value0, Value1, Type, Size) \ 219 do { \ 220 static constexpr char TestString[] = \ 221 "(" #Dst ", " #Value0 ", Addr, " #Value1 ", pandn, " #Type ", " #Size \ 222 ")"; \ 223 const uint32_t T0 = allocateDqword(); \ 224 const Dqword V0 Value0; \ 225 \ 226 const uint32_t T1 = allocateDqword(); \ 227 const Dqword V1 Value1; \ 228 \ 229 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ 230 __ pandn(IceType_i##Size, Encoded_Xmm_##Dst(), dwordAddress(T1)); \ 231 \ 232 AssembledTest test = assemble(); \ 233 test.setDqwordTo(T0, V0); \ 234 test.setDqwordTo(T1, V1); \ 235 test.run(); \ 236 \ 237 ASSERT_EQ((~packedAs<Type##Size##_t>(V0)) & V1, test.Dst<Dqword>()) \ 238 << TestString; \ 239 reset(); \ 240 } while (0) 241 242#define TestPArithSize(Dst, Src, Size) \ 243 do { \ 244 static_assert(Size == 8 || Size == 16 || Size == 32, "Invalid size."); \ 245 if (Size != 8) { \ 246 TestPArithXmmXmm( \ 247 Dst, \ 248 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \ 249 Src, (uint64_t(3u), uint64_t(0u)), psra, >>, int, Size); \ 250 TestPArithXmmAddr(Dst, (uint64_t(0x8040201008040201ull), \ 251 uint64_t(0x8080404002020101ull)), \ 252 (uint64_t(3u), uint64_t(0u)), psra, >>, int, Size); \ 253 TestPArithXmmImm(Dst, (uint64_t(0x8040201008040201ull), \ 254 uint64_t(0x8080404002020101ull)), \ 255 3u, psra, >>, int, Size); \ 256 TestPArithXmmXmm( \ 257 Dst, \ 258 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \ 259 Src, (uint64_t(3u), uint64_t(0u)), psrl, >>, uint, Size); \ 260 TestPArithXmmAddr(Dst, (uint64_t(0x8040201008040201ull), \ 261 uint64_t(0x8080404002020101ull)), \ 262 (uint64_t(3u), uint64_t(0u)), psrl, >>, uint, Size); \ 263 TestPArithXmmImm(Dst, (uint64_t(0x8040201008040201ull), \ 264 uint64_t(0x8080404002020101ull)), \ 265 3u, psrl, >>, uint, Size); \ 266 TestPArithXmmXmm( \ 267 Dst, \ 268 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \ 269 Src, (uint64_t(3u), uint64_t(0u)), psll, <<, uint, Size); \ 270 TestPArithXmmAddr(Dst, (uint64_t(0x8040201008040201ull), \ 271 uint64_t(0x8080404002020101ull)), \ 272 (uint64_t(3u), uint64_t(0u)), psll, <<, uint, Size); \ 273 TestPArithXmmImm(Dst, (uint64_t(0x8040201008040201ull), \ 274 uint64_t(0x8080404002020101ull)), \ 275 3u, psll, <<, uint, Size); \ 276 \ 277 TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull), \ 278 uint64_t(0x8080404002020101ull)), \ 279 Src, (uint64_t(0xFFFFFFFF00000000ull), \ 280 uint64_t(0x0123456789ABCDEull)), \ 281 pmull, *, int, Size); \ 282 TestPArithXmmAddr( \ 283 Dst, \ 284 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \ 285 (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \ 286 pmull, *, int, Size); \ 287 if (Size != 16) { \ 288 TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull), \ 289 uint64_t(0x8080404002020101ull)), \ 290 Src, (uint64_t(0xFFFFFFFF00000000ull), \ 291 uint64_t(0x0123456789ABCDEull)), \ 292 pmuludq, *, uint, Size); \ 293 TestPArithXmmAddr( \ 294 Dst, (uint64_t(0x8040201008040201ull), \ 295 uint64_t(0x8080404002020101ull)), \ 296 (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \ 297 pmuludq, *, uint, Size); \ 298 } \ 299 } \ 300 TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull), \ 301 uint64_t(0x8080404002020101ull)), \ 302 Src, (uint64_t(0xFFFFFFFF00000000ull), \ 303 uint64_t(0x0123456789ABCDEull)), \ 304 padd, +, int, Size); \ 305 TestPArithXmmAddr( \ 306 Dst, \ 307 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \ 308 (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \ 309 padd, +, int, Size); \ 310 TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull), \ 311 uint64_t(0x8080404002020101ull)), \ 312 Src, (uint64_t(0xFFFFFFFF00000000ull), \ 313 uint64_t(0x0123456789ABCDEull)), \ 314 psub, -, int, Size); \ 315 TestPArithXmmAddr( \ 316 Dst, \ 317 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \ 318 (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \ 319 psub, -, int, Size); \ 320 TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull), \ 321 uint64_t(0x8080404002020101ull)), \ 322 Src, (uint64_t(0xFFFFFFFF00000000ull), \ 323 uint64_t(0x0123456789ABCDEull)), \ 324 pand, &, int, Size); \ 325 TestPArithXmmAddr( \ 326 Dst, \ 327 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \ 328 (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \ 329 pand, &, int, Size); \ 330 \ 331 TestPAndnXmmXmm(Dst, (uint64_t(0x8040201008040201ull), \ 332 uint64_t(0x8080404002020101ull)), \ 333 Src, (uint64_t(0xFFFFFFFF00000000ull), \ 334 uint64_t(0x0123456789ABCDEull)), \ 335 int, Size); \ 336 TestPAndnXmmAddr( \ 337 Dst, \ 338 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \ 339 (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \ 340 int, Size); \ 341 \ 342 TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull), \ 343 uint64_t(0x8080404002020101ull)), \ 344 Src, (uint64_t(0xFFFFFFFF00000000ull), \ 345 uint64_t(0x0123456789ABCDEull)), \ 346 por, |, int, Size); \ 347 TestPArithXmmAddr( \ 348 Dst, \ 349 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \ 350 (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \ 351 por, |, int, Size); \ 352 TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull), \ 353 uint64_t(0x8080404002020101ull)), \ 354 Src, (uint64_t(0xFFFFFFFF00000000ull), \ 355 uint64_t(0x0123456789ABCDEull)), \ 356 pxor, ^, int, Size); \ 357 TestPArithXmmAddr( \ 358 Dst, \ 359 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \ 360 (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \ 361 pxor, ^, int, Size); \ 362 } while (0) 363 364#define TestPArith(Src, Dst) \ 365 do { \ 366 TestPArithSize(Src, Dst, 8); \ 367 TestPArithSize(Src, Dst, 16); \ 368 TestPArithSize(Src, Dst, 32); \ 369 } while (0) 370 371 TestPArith(xmm0, xmm1); 372 TestPArith(xmm1, xmm2); 373 TestPArith(xmm2, xmm3); 374 TestPArith(xmm3, xmm4); 375 TestPArith(xmm4, xmm5); 376 TestPArith(xmm5, xmm6); 377 TestPArith(xmm6, xmm7); 378 TestPArith(xmm7, xmm8); 379 TestPArith(xmm8, xmm9); 380 TestPArith(xmm9, xmm10); 381 TestPArith(xmm10, xmm11); 382 TestPArith(xmm11, xmm12); 383 TestPArith(xmm12, xmm13); 384 TestPArith(xmm13, xmm14); 385 TestPArith(xmm14, xmm15); 386 TestPArith(xmm15, xmm0); 387 388#undef TestPArith 389#undef TestPArithSize 390#undef TestPAndnXmmAddr 391#undef TestPAndnXmmXmm 392#undef TestPArithXmmImm 393#undef TestPArithXmmAddr 394#undef TestPArithXmmXmm 395} 396 397TEST_F(AssemblerX8664Test, ArithPS) { 398#define TestArithPSXmmXmm(FloatSize, Dst, Value0, Src, Value1, Inst, Op, Type) \ 399 do { \ 400 static constexpr char TestString[] = \ 401 "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Inst ", " #Op \ 402 ", " #Type ")"; \ 403 const uint32_t T0 = allocateDqword(); \ 404 const Dqword V0 Value0; \ 405 const uint32_t T1 = allocateDqword(); \ 406 const Dqword V1 Value1; \ 407 \ 408 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ 409 __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \ 410 __ Inst(IceType_f32, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \ 411 \ 412 AssembledTest test = assemble(); \ 413 test.setDqwordTo(T0, V0); \ 414 test.setDqwordTo(T1, V1); \ 415 test.run(); \ 416 \ 417 ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString; \ 418 \ 419 reset(); \ 420 } while (0) 421 422#define TestArithPSXmmXmmUntyped(Dst, Value0, Src, Value1, Inst, Op, Type) \ 423 do { \ 424 static constexpr char TestString[] = \ 425 "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Inst ", " #Op \ 426 ", " #Type ")"; \ 427 const uint32_t T0 = allocateDqword(); \ 428 const Dqword V0 Value0; \ 429 const uint32_t T1 = allocateDqword(); \ 430 const Dqword V1 Value1; \ 431 \ 432 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ 433 __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \ 434 __ Inst(Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \ 435 \ 436 AssembledTest test = assemble(); \ 437 test.setDqwordTo(T0, V0); \ 438 test.setDqwordTo(T1, V1); \ 439 test.run(); \ 440 \ 441 ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString; \ 442 \ 443 reset(); \ 444 } while (0) 445 446#define TestArithPSXmmAddrUntyped(Dst, Value0, Value1, Inst, Op, Type) \ 447 do { \ 448 static constexpr char TestString[] = \ 449 "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Inst ", " #Op \ 450 ", " #Type ")"; \ 451 const uint32_t T0 = allocateDqword(); \ 452 const Dqword V0 Value0; \ 453 const uint32_t T1 = allocateDqword(); \ 454 const Dqword V1 Value1; \ 455 \ 456 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ 457 __ Inst(Encoded_Xmm_##Dst(), dwordAddress(T1)); \ 458 \ 459 AssembledTest test = assemble(); \ 460 test.setDqwordTo(T0, V0); \ 461 test.setDqwordTo(T1, V1); \ 462 test.run(); \ 463 \ 464 ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString; \ 465 \ 466 reset(); \ 467 } while (0) 468 469#define TestMinMaxPS(FloatSize, Dst, Value0, Src, Value1, Inst, Type) \ 470 do { \ 471 static constexpr char TestString[] = \ 472 "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Inst ", " #Type \ 473 ")"; \ 474 const uint32_t T0 = allocateDqword(); \ 475 const Dqword V0 Value0; \ 476 const uint32_t T1 = allocateDqword(); \ 477 const Dqword V1 Value1; \ 478 \ 479 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ 480 __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \ 481 __ Inst(IceType_f##FloatSize, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \ 482 \ 483 AssembledTest test = assemble(); \ 484 test.setDqwordTo(T0, V0); \ 485 test.setDqwordTo(T1, V1); \ 486 test.run(); \ 487 \ 488 ASSERT_EQ(packedAs<Type>(V0).Inst(V1), test.Dst<Dqword>()) << TestString; \ 489 \ 490 reset(); \ 491 } while (0) 492 493#define TestArithPSXmmAddr(FloatSize, Dst, Value0, Value1, Inst, Op, Type) \ 494 do { \ 495 static constexpr char TestString[] = \ 496 "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Inst ", " #Op \ 497 ", " #Type ")"; \ 498 const uint32_t T0 = allocateDqword(); \ 499 const Dqword V0 Value0; \ 500 const uint32_t T1 = allocateDqword(); \ 501 const Dqword V1 Value1; \ 502 \ 503 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ 504 __ Inst(IceType_f##FloatSize, Encoded_Xmm_##Dst(), dwordAddress(T1)); \ 505 \ 506 AssembledTest test = assemble(); \ 507 test.setDqwordTo(T0, V0); \ 508 test.setDqwordTo(T1, V1); \ 509 test.run(); \ 510 \ 511 ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString; \ 512 \ 513 reset(); \ 514 } while (0) 515 516#define TestArithPS(Dst, Src) \ 517 do { \ 518 TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src, \ 519 (0.55, 0.43, 0.23, 1.21), addps, +, float); \ 520 TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0), \ 521 (0.55, 0.43, 0.23, 1.21), addps, +, float); \ 522 TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src, \ 523 (0.55, 0.43, 0.23, 1.21), subps, -, float); \ 524 TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0), \ 525 (0.55, 0.43, 0.23, 1.21), subps, -, float); \ 526 TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src, \ 527 (0.55, 0.43, 0.23, 1.21), mulps, *, float); \ 528 TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0), \ 529 (0.55, 0.43, 0.23, 1.21), mulps, *, float); \ 530 TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src, \ 531 (0.55, 0.43, 0.23, 1.21), divps, /, float); \ 532 TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0), \ 533 (0.55, 0.43, 0.23, 1.21), divps, /, float); \ 534 TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src, \ 535 (0.55, 0.43, 0.23, 1.21), andps, &, float); \ 536 TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0), \ 537 (0.55, 0.43, 0.23, 1.21), andps, &, float); \ 538 TestArithPSXmmXmm(64, Dst, (1.0, -1000.0), Src, (0.55, 1.21), andps, &, \ 539 double); \ 540 TestArithPSXmmAddr(64, Dst, (1.0, -1000.0), (0.55, 1.21), andps, &, \ 541 double); \ 542 TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src, \ 543 (0.55, 0.43, 0.23, 1.21), orps, |, float); \ 544 TestArithPSXmmXmm(64, Dst, (1.0, -1000.0), Src, (0.55, 1.21), orps, |, \ 545 double); \ 546 TestMinMaxPS(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src, \ 547 (0.55, 0.43, 0.23, 1.21), minps, float); \ 548 TestMinMaxPS(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src, \ 549 (0.55, 0.43, 0.23, 1.21), maxps, float); \ 550 TestMinMaxPS(64, Dst, (1.0, -1000.0), Src, (0.55, 1.21), minps, double); \ 551 TestMinMaxPS(64, Dst, (1.0, -1000.0), Src, (0.55, 1.21), maxps, double); \ 552 TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src, \ 553 (0.55, 0.43, 0.23, 1.21), xorps, ^, float); \ 554 TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0), \ 555 (0.55, 0.43, 0.23, 1.21), xorps, ^, float); \ 556 TestArithPSXmmXmm(64, Dst, (1.0, -1000.0), Src, (0.55, 1.21), xorps, ^, \ 557 double); \ 558 TestArithPSXmmAddr(64, Dst, (1.0, -1000.0), (0.55, 1.21), xorps, ^, \ 559 double); \ 560 } while (0) 561 562 TestArithPS(xmm0, xmm1); 563 TestArithPS(xmm1, xmm2); 564 TestArithPS(xmm2, xmm3); 565 TestArithPS(xmm3, xmm4); 566 TestArithPS(xmm4, xmm5); 567 TestArithPS(xmm5, xmm6); 568 TestArithPS(xmm6, xmm7); 569 TestArithPS(xmm7, xmm8); 570 TestArithPS(xmm8, xmm9); 571 TestArithPS(xmm9, xmm10); 572 TestArithPS(xmm10, xmm11); 573 TestArithPS(xmm11, xmm12); 574 TestArithPS(xmm12, xmm13); 575 TestArithPS(xmm13, xmm14); 576 TestArithPS(xmm14, xmm15); 577 TestArithPS(xmm15, xmm0); 578 579#undef TestArithPs 580#undef TestMinMaxPS 581#undef TestArithPSXmmXmmUntyped 582#undef TestArithPSXmmAddr 583#undef TestArithPSXmmXmm 584} 585 586TEST_F(AssemblerX8664Test, Blending) { 587 using f32 = float; 588 using i8 = uint8_t; 589 590#define TestBlendingXmmXmm(Dst, Value0, Src, Value1, M /*ask*/, Inst, Type) \ 591 do { \ 592 static constexpr char TestString[] = \ 593 "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #M ", " #Inst \ 594 ", " #Type ")"; \ 595 const uint32_t T0 = allocateDqword(); \ 596 const Dqword V0 Value0; \ 597 const uint32_t T1 = allocateDqword(); \ 598 const Dqword V1 Value1; \ 599 const uint32_t Mask = allocateDqword(); \ 600 const Dqword MaskValue M; \ 601 \ 602 __ movups(Encoded_Xmm_xmm0(), dwordAddress(Mask)); \ 603 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ 604 __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \ 605 __ Inst(IceType_##Type, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \ 606 \ 607 AssembledTest test = assemble(); \ 608 test.setDqwordTo(T0, V0); \ 609 test.setDqwordTo(T1, V1); \ 610 test.setDqwordTo(Mask, MaskValue); \ 611 test.run(); \ 612 \ 613 ASSERT_EQ(packedAs<Type>(V0).blendWith(V1, MaskValue), test.Dst<Dqword>()) \ 614 << TestString; \ 615 reset(); \ 616 } while (0) 617 618#define TestBlendingXmmAddr(Dst, Value0, Value1, M /*ask*/, Inst, Type) \ 619 do { \ 620 static constexpr char TestString[] = \ 621 "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #M ", " #Inst ", " #Type \ 622 ")"; \ 623 const uint32_t T0 = allocateDqword(); \ 624 const Dqword V0 Value0; \ 625 const uint32_t T1 = allocateDqword(); \ 626 const Dqword V1 Value1; \ 627 const uint32_t Mask = allocateDqword(); \ 628 const Dqword MaskValue M; \ 629 \ 630 __ movups(Encoded_Xmm_xmm0(), dwordAddress(Mask)); \ 631 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ 632 __ Inst(IceType_##Type, Encoded_Xmm_##Dst(), dwordAddress(T1)); \ 633 \ 634 AssembledTest test = assemble(); \ 635 test.setDqwordTo(T0, V0); \ 636 test.setDqwordTo(T1, V1); \ 637 test.setDqwordTo(Mask, MaskValue); \ 638 test.run(); \ 639 \ 640 ASSERT_EQ(packedAs<Type>(V0).blendWith(V1, MaskValue), test.Dst<Dqword>()) \ 641 << TestString; \ 642 reset(); \ 643 } while (0) 644 645#define TestBlending(Src, Dst) \ 646 do { \ 647 TestBlendingXmmXmm( \ 648 Dst, (1.0, 2.0, 1.0, 2.0), Src, (-1.0, -2.0, -1.0, -2.0), \ 649 (uint64_t(0x8000000000000000ull), uint64_t(0x0000000080000000ull)), \ 650 blendvps, f32); \ 651 TestBlendingXmmAddr( \ 652 Dst, (1.0, 2.0, 1.0, 2.0), (-1.0, -2.0, -1.0, -2.0), \ 653 (uint64_t(0x8000000000000000ull), uint64_t(0x0000000080000000ull)), \ 654 blendvps, f32); \ 655 TestBlendingXmmXmm( \ 656 Dst, \ 657 (uint64_t(0xFFFFFFFFFFFFFFFFull), uint64_t(0xBBBBBBBBBBBBBBBBull)), \ 658 Src, \ 659 (uint64_t(0xAAAAAAAAAAAAAAAAull), uint64_t(0xEEEEEEEEEEEEEEEEull)), \ 660 (uint64_t(0x8000000000000080ull), uint64_t(0x8080808000000000ull)), \ 661 pblendvb, i8); \ 662 TestBlendingXmmAddr( \ 663 Dst, \ 664 (uint64_t(0xFFFFFFFFFFFFFFFFull), uint64_t(0xBBBBBBBBBBBBBBBBull)), \ 665 (uint64_t(0xAAAAAAAAAAAAAAAAull), uint64_t(0xEEEEEEEEEEEEEEEEull)), \ 666 (uint64_t(0x8000000000000080ull), uint64_t(0x8080808000000000ull)), \ 667 pblendvb, i8); \ 668 } while (0) 669 670 /* xmm0 is taken. It is the implicit mask . */ 671 TestBlending(xmm1, xmm2); 672 TestBlending(xmm2, xmm3); 673 TestBlending(xmm3, xmm4); 674 TestBlending(xmm4, xmm5); 675 TestBlending(xmm5, xmm6); 676 TestBlending(xmm6, xmm7); 677 TestBlending(xmm7, xmm8); 678 TestBlending(xmm8, xmm9); 679 TestBlending(xmm9, xmm10); 680 TestBlending(xmm10, xmm11); 681 TestBlending(xmm11, xmm12); 682 TestBlending(xmm12, xmm13); 683 TestBlending(xmm13, xmm14); 684 TestBlending(xmm14, xmm15); 685 TestBlending(xmm15, xmm1); 686 687#undef TestBlending 688#undef TestBlendingXmmAddr 689#undef TestBlendingXmmXmm 690} 691 692TEST_F(AssemblerX8664Test, Cmpps) { 693#define TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, C, Op, Type) \ 694 do { \ 695 static constexpr char TestString[] = \ 696 "(" #Src ", " #Dst ", " #C ", " #Op ")"; \ 697 const uint32_t T0 = allocateDqword(); \ 698 const Dqword V0 Value0; \ 699 const uint32_t T1 = allocateDqword(); \ 700 const Dqword V1 Value1; \ 701 \ 702 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ 703 __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \ 704 __ cmpps(IceType_f##FloatSize, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src(), \ 705 Cond::Cmpps_##C); \ 706 \ 707 AssembledTest test = assemble(); \ 708 test.setDqwordTo(T0, V0); \ 709 test.setDqwordTo(T1, V1); \ 710 test.run(); \ 711 \ 712 ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString; \ 713 ; \ 714 reset(); \ 715 } while (0) 716 717#define TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, C, Op, Type) \ 718 do { \ 719 static constexpr char TestString[] = "(" #Dst ", Addr, " #C ", " #Op ")"; \ 720 const uint32_t T0 = allocateDqword(); \ 721 const Dqword V0 Value0; \ 722 const uint32_t T1 = allocateDqword(); \ 723 const Dqword V1 Value1; \ 724 \ 725 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ 726 __ cmpps(IceType_f##FloatSize, Encoded_Xmm_##Dst(), dwordAddress(T1), \ 727 Cond::Cmpps_##C); \ 728 \ 729 AssembledTest test = assemble(); \ 730 test.setDqwordTo(T0, V0); \ 731 test.setDqwordTo(T1, V1); \ 732 test.run(); \ 733 \ 734 ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString; \ 735 ; \ 736 reset(); \ 737 } while (0) 738 739#define TestCmppsOrdUnordXmmXmm(FloatSize, Dst, Value0, Src, Value1, C, Type) \ 740 do { \ 741 static constexpr char TestString[] = "(" #Src ", " #Dst ", " #C ")"; \ 742 const uint32_t T0 = allocateDqword(); \ 743 const Dqword V0(1.0, 1.0, std::numeric_limits<float>::quiet_NaN(), \ 744 std::numeric_limits<float>::quiet_NaN()); \ 745 const uint32_t T1 = allocateDqword(); \ 746 const Dqword V1(1.0, std::numeric_limits<float>::quiet_NaN(), 1.0, \ 747 std::numeric_limits<float>::quiet_NaN()); \ 748 \ 749 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ 750 __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \ 751 __ cmpps(IceType_f##FloatSize, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src(), \ 752 Cond::Cmpps_##C); \ 753 \ 754 AssembledTest test = assemble(); \ 755 test.setDqwordTo(T0, V0); \ 756 test.setDqwordTo(T1, V1); \ 757 test.run(); \ 758 \ 759 ASSERT_EQ(packedAs<Type>(V0).C(V1), test.Dst<Dqword>()) << TestString; \ 760 ; \ 761 reset(); \ 762 } while (0) 763 764#define TestCmppsOrdUnordXmmAddr(FloatSize, Dst, Value0, Value1, C, Type) \ 765 do { \ 766 static constexpr char TestString[] = "(" #Dst ", " #C ")"; \ 767 const uint32_t T0 = allocateDqword(); \ 768 const Dqword V0(1.0, 1.0, std::numeric_limits<float>::quiet_NaN(), \ 769 std::numeric_limits<float>::quiet_NaN()); \ 770 const uint32_t T1 = allocateDqword(); \ 771 const Dqword V1(1.0, std::numeric_limits<float>::quiet_NaN(), 1.0, \ 772 std::numeric_limits<float>::quiet_NaN()); \ 773 \ 774 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ 775 __ cmpps(IceType_f##FloatSize, Encoded_Xmm_##Dst(), dwordAddress(T1), \ 776 Cond::Cmpps_##C); \ 777 \ 778 AssembledTest test = assemble(); \ 779 test.setDqwordTo(T0, V0); \ 780 test.setDqwordTo(T1, V1); \ 781 test.run(); \ 782 \ 783 ASSERT_EQ(packedAs<Type>(V0).C(V1), test.Dst<Dqword>()) << TestString; \ 784 ; \ 785 reset(); \ 786 } while (0) 787 788#define TestCmpps(FloatSize, Dst, Value0, Src, Value1, Type) \ 789 do { \ 790 TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type); \ 791 TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type); \ 792 TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type); \ 793 TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type); \ 794 TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type); \ 795 TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type); \ 796 TestCmppsOrdUnordXmmXmm(FloatSize, Dst, Value0, Src, Value1, unord, Type); \ 797 TestCmppsOrdUnordXmmAddr(FloatSize, Dst, Value0, Value1, unord, Type); \ 798 TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type); \ 799 TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type); \ 800 TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type); \ 801 TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type); \ 802 TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type); \ 803 TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type); \ 804 if (FloatSize == 32) { \ 805 TestCmppsOrdUnordXmmXmm( \ 806 32, Dst, (1.0, 1.0, std::numeric_limits<float>::quiet_NaN(), \ 807 std::numeric_limits<float>::quiet_NaN()), \ 808 Src, (1.0, std::numeric_limits<float>::quiet_NaN(), 1.0, \ 809 std::numeric_limits<float>::quiet_NaN()), \ 810 unord, Type); \ 811 TestCmppsOrdUnordXmmAddr( \ 812 32, Dst, (1.0, 1.0, std::numeric_limits<float>::quiet_NaN(), \ 813 std::numeric_limits<float>::quiet_NaN()), \ 814 (1.0, std::numeric_limits<float>::quiet_NaN(), 1.0, \ 815 std::numeric_limits<float>::quiet_NaN()), \ 816 unord, Type); \ 817 } else { \ 818 TestCmppsOrdUnordXmmXmm(64, Dst, \ 819 (1.0, std::numeric_limits<double>::quiet_NaN()), \ 820 Src, (std::numeric_limits<double>::quiet_NaN(), \ 821 std::numeric_limits<double>::quiet_NaN()), \ 822 unord, Type); \ 823 TestCmppsOrdUnordXmmXmm(64, Dst, (1.0, 1.0), Src, \ 824 (1.0, std::numeric_limits<double>::quiet_NaN()), \ 825 unord, Type); \ 826 TestCmppsOrdUnordXmmAddr( \ 827 64, Dst, (1.0, std::numeric_limits<double>::quiet_NaN()), \ 828 (std::numeric_limits<double>::quiet_NaN(), \ 829 std::numeric_limits<double>::quiet_NaN()), \ 830 unord, Type); \ 831 TestCmppsOrdUnordXmmAddr( \ 832 64, Dst, (1.0, 1.0), \ 833 (1.0, std::numeric_limits<double>::quiet_NaN()), unord, Type); \ 834 } \ 835 } while (0) 836 837#define TestCmppsSize(FloatSize, Value0, Value1, Type) \ 838 do { \ 839 TestCmpps(FloatSize, xmm0, Value0, xmm1, Value1, Type); \ 840 TestCmpps(FloatSize, xmm1, Value0, xmm2, Value1, Type); \ 841 TestCmpps(FloatSize, xmm2, Value0, xmm3, Value1, Type); \ 842 TestCmpps(FloatSize, xmm3, Value0, xmm4, Value1, Type); \ 843 TestCmpps(FloatSize, xmm4, Value0, xmm5, Value1, Type); \ 844 TestCmpps(FloatSize, xmm5, Value0, xmm6, Value1, Type); \ 845 TestCmpps(FloatSize, xmm6, Value0, xmm7, Value1, Type); \ 846 TestCmpps(FloatSize, xmm7, Value0, xmm8, Value1, Type); \ 847 TestCmpps(FloatSize, xmm8, Value0, xmm9, Value1, Type); \ 848 TestCmpps(FloatSize, xmm9, Value0, xmm10, Value1, Type); \ 849 TestCmpps(FloatSize, xmm10, Value0, xmm11, Value1, Type); \ 850 TestCmpps(FloatSize, xmm11, Value0, xmm12, Value1, Type); \ 851 TestCmpps(FloatSize, xmm12, Value0, xmm13, Value1, Type); \ 852 TestCmpps(FloatSize, xmm13, Value0, xmm14, Value1, Type); \ 853 TestCmpps(FloatSize, xmm14, Value0, xmm15, Value1, Type); \ 854 TestCmpps(FloatSize, xmm15, Value0, xmm0, Value1, Type); \ 855 } while (0) 856 857 TestCmppsSize(32, (-1.0, 1.0, 3.14, 1024.5), (-1.0, 1.0, 3.14, 1024.5), 858 float); 859 TestCmppsSize(64, (1.0, -1000.0), (1.0, -1000.0), double); 860 861#undef TestCmpps 862#undef TestCmppsOrdUnordXmmAddr 863#undef TestCmppsOrdUnordXmmXmm 864#undef TestCmppsXmmAddr 865#undef TestCmppsXmmXmm 866} 867 868TEST_F(AssemblerX8664Test, Sqrtps_Rsqrtps_Reciprocalps_Sqrtpd) { 869#define TestImplSingle(Dst, Inst, Expect) \ 870 do { \ 871 static constexpr char TestString[] = "(" #Dst ", " #Inst ")"; \ 872 const uint32_t T0 = allocateDqword(); \ 873 const Dqword V0(1.0, 4.0, 20.0, 3.14); \ 874 \ 875 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ 876 __ Inst(Encoded_Xmm_##Dst()); \ 877 \ 878 AssembledTest test = assemble(); \ 879 test.setDqwordTo(T0, V0); \ 880 test.run(); \ 881 ASSERT_EQ(Dqword Expect, test.Dst<Dqword>()) << TestString; \ 882 reset(); \ 883 } while (0) 884 885#define TestImpl(Dst) \ 886 do { \ 887 TestImplSingle(Dst, sqrtps, (uint64_t(0x400000003F800000ull), \ 888 uint64_t(0x3FE2D10B408F1BBDull))); \ 889 TestImplSingle(Dst, rsqrtps, (uint64_t(0x3EFFF0003F7FF000ull), \ 890 uint64_t(0x3F1078003E64F000ull))); \ 891 TestImplSingle(Dst, reciprocalps, (uint64_t(0x3E7FF0003F7FF000ull), \ 892 uint64_t(0x3EA310003D4CC000ull))); \ 893 \ 894 TestImplSingle(Dst, sqrtpd, (uint64_t(0x4036A09E9365F5F3ull), \ 895 uint64_t(0x401C42FAE40282A8ull))); \ 896 } while (0) 897 898 TestImpl(xmm0); 899 TestImpl(xmm1); 900 TestImpl(xmm2); 901 TestImpl(xmm3); 902 TestImpl(xmm4); 903 TestImpl(xmm5); 904 TestImpl(xmm6); 905 TestImpl(xmm7); 906 TestImpl(xmm8); 907 TestImpl(xmm9); 908 TestImpl(xmm10); 909 TestImpl(xmm11); 910 TestImpl(xmm12); 911 TestImpl(xmm13); 912 TestImpl(xmm14); 913 TestImpl(xmm15); 914 915#undef TestImpl 916#undef TestImplSingle 917} 918 919TEST_F(AssemblerX8664Test, Unpck) { 920 const Dqword V0(uint64_t(0xAAAAAAAABBBBBBBBull), 921 uint64_t(0xCCCCCCCCDDDDDDDDull)); 922 const Dqword V1(uint64_t(0xEEEEEEEEFFFFFFFFull), 923 uint64_t(0x9999999988888888ull)); 924 925 const Dqword unpcklpsExpected(uint64_t(0xFFFFFFFFBBBBBBBBull), 926 uint64_t(0xEEEEEEEEAAAAAAAAull)); 927 const Dqword unpcklpdExpected(uint64_t(0xAAAAAAAABBBBBBBBull), 928 uint64_t(0xEEEEEEEEFFFFFFFFull)); 929 const Dqword unpckhpsExpected(uint64_t(0x88888888DDDDDDDDull), 930 uint64_t(0x99999999CCCCCCCCull)); 931 const Dqword unpckhpdExpected(uint64_t(0xCCCCCCCCDDDDDDDDull), 932 uint64_t(0x9999999988888888ull)); 933 934#define TestImplSingle(Dst, Src, Inst) \ 935 do { \ 936 static constexpr char TestString[] = "(" #Dst ", " #Src ", " #Inst ")"; \ 937 const uint32_t T0 = allocateDqword(); \ 938 const uint32_t T1 = allocateDqword(); \ 939 \ 940 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ 941 __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \ 942 __ Inst(Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \ 943 \ 944 AssembledTest test = assemble(); \ 945 test.setDqwordTo(T0, V0); \ 946 test.setDqwordTo(T1, V1); \ 947 test.run(); \ 948 \ 949 ASSERT_EQ(Inst##Expected, test.Dst<Dqword>()) << TestString; \ 950 reset(); \ 951 } while (0) 952 953#define TestImpl(Dst, Src) \ 954 do { \ 955 TestImplSingle(Dst, Src, unpcklps); \ 956 TestImplSingle(Dst, Src, unpcklpd); \ 957 TestImplSingle(Dst, Src, unpckhps); \ 958 TestImplSingle(Dst, Src, unpckhpd); \ 959 } while (0) 960 961 TestImpl(xmm0, xmm1); 962 TestImpl(xmm1, xmm2); 963 TestImpl(xmm2, xmm3); 964 TestImpl(xmm3, xmm4); 965 TestImpl(xmm4, xmm5); 966 TestImpl(xmm5, xmm6); 967 TestImpl(xmm6, xmm7); 968 TestImpl(xmm7, xmm8); 969 TestImpl(xmm8, xmm9); 970 TestImpl(xmm9, xmm10); 971 TestImpl(xmm10, xmm11); 972 TestImpl(xmm11, xmm12); 973 TestImpl(xmm12, xmm13); 974 TestImpl(xmm13, xmm14); 975 TestImpl(xmm14, xmm15); 976 TestImpl(xmm15, xmm0); 977 978#undef TestImpl 979#undef TestImplSingle 980} 981 982TEST_F(AssemblerX8664Test, Shufp) { 983 const Dqword V0(uint64_t(0x1111111122222222ull), 984 uint64_t(0x5555555577777777ull)); 985 const Dqword V1(uint64_t(0xAAAAAAAABBBBBBBBull), 986 uint64_t(0xCCCCCCCCDDDDDDDDull)); 987 988 const uint8_t pshufdImm = 0x63; 989 const Dqword pshufdExpected(uint64_t(0xBBBBBBBBCCCCCCCCull), 990 uint64_t(0xAAAAAAAADDDDDDDDull)); 991 992 const uint8_t shufpsImm = 0xf9; 993 const Dqword shufpsExpected(uint64_t(0x7777777711111111ull), 994 uint64_t(0xCCCCCCCCCCCCCCCCull)); 995 996#define TestImplSingleXmmXmm(Dst, Src, Inst) \ 997 do { \ 998 static constexpr char TestString[] = "(" #Dst ", " #Src ", " #Inst ")"; \ 999 const uint32_t T0 = allocateDqword(); \ 1000 const uint32_t T1 = allocateDqword(); \ 1001 \ 1002 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ 1003 __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \ 1004 __ Inst(IceType_f32, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src(), \ 1005 Immediate(Inst##Imm)); \ 1006 \ 1007 AssembledTest test = assemble(); \ 1008 test.setDqwordTo(T0, V0); \ 1009 test.setDqwordTo(T1, V1); \ 1010 test.run(); \ 1011 \ 1012 ASSERT_EQ(Inst##Expected, test.Dst<Dqword>()) << TestString; \ 1013 reset(); \ 1014 } while (0) 1015 1016#define TestImplSingleXmmAddr(Dst, Inst) \ 1017 do { \ 1018 static constexpr char TestString[] = "(" #Dst ", Addr, " #Inst ")"; \ 1019 const uint32_t T0 = allocateDqword(); \ 1020 const uint32_t T1 = allocateDqword(); \ 1021 \ 1022 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ 1023 __ Inst(IceType_f32, Encoded_Xmm_##Dst(), dwordAddress(T1), \ 1024 Immediate(Inst##Imm)); \ 1025 \ 1026 AssembledTest test = assemble(); \ 1027 test.setDqwordTo(T0, V0); \ 1028 test.setDqwordTo(T1, V1); \ 1029 test.run(); \ 1030 \ 1031 ASSERT_EQ(Inst##Expected, test.Dst<Dqword>()) << TestString; \ 1032 reset(); \ 1033 } while (0) 1034 1035#define TestImplSingleXmmXmmUntyped(Dst, Src, Inst) \ 1036 do { \ 1037 static constexpr char TestString[] = \ 1038 "(" #Dst ", " #Src ", " #Inst ", Untyped)"; \ 1039 const uint32_t T0 = allocateDqword(); \ 1040 const uint32_t T1 = allocateDqword(); \ 1041 \ 1042 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ 1043 __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \ 1044 __ Inst(Encoded_Xmm_##Dst(), Encoded_Xmm_##Src(), Immediate(Inst##Imm)); \ 1045 \ 1046 AssembledTest test = assemble(); \ 1047 test.setDqwordTo(T0, V0); \ 1048 test.setDqwordTo(T1, V1); \ 1049 test.run(); \ 1050 \ 1051 ASSERT_EQ(Inst##UntypedExpected, test.Dst<Dqword>()) << TestString; \ 1052 reset(); \ 1053 } while (0) 1054 1055#define TestImpl(Dst, Src) \ 1056 do { \ 1057 TestImplSingleXmmXmm(Dst, Src, pshufd); \ 1058 TestImplSingleXmmAddr(Dst, pshufd); \ 1059 TestImplSingleXmmXmm(Dst, Src, shufps); \ 1060 TestImplSingleXmmAddr(Dst, shufps); \ 1061 } while (0) 1062 1063 TestImpl(xmm0, xmm1); 1064 TestImpl(xmm1, xmm2); 1065 TestImpl(xmm2, xmm3); 1066 TestImpl(xmm3, xmm4); 1067 TestImpl(xmm4, xmm5); 1068 TestImpl(xmm5, xmm6); 1069 TestImpl(xmm6, xmm7); 1070 TestImpl(xmm7, xmm8); 1071 TestImpl(xmm8, xmm9); 1072 TestImpl(xmm9, xmm10); 1073 TestImpl(xmm10, xmm11); 1074 TestImpl(xmm11, xmm12); 1075 TestImpl(xmm12, xmm13); 1076 TestImpl(xmm13, xmm14); 1077 TestImpl(xmm14, xmm15); 1078 TestImpl(xmm15, xmm0); 1079 1080#undef TestImpl 1081#undef TestImplSingleXmmXmmUntyped 1082#undef TestImplSingleXmmAddr 1083#undef TestImplSingleXmmXmm 1084} 1085 1086TEST_F(AssemblerX8664Test, Punpckl) { 1087 const Dqword V0_v4i32(uint64_t(0x1111111122222222ull), 1088 uint64_t(0x5555555577777777ull)); 1089 const Dqword V1_v4i32(uint64_t(0xAAAAAAAABBBBBBBBull), 1090 uint64_t(0xCCCCCCCCDDDDDDDDull)); 1091 const Dqword Expected_v4i32(uint64_t(0xBBBBBBBB22222222ull), 1092 uint64_t(0xAAAAAAAA11111111ull)); 1093 1094 const Dqword V0_v8i16(uint64_t(0x1111222233334444ull), 1095 uint64_t(0x5555666677778888ull)); 1096 const Dqword V1_v8i16(uint64_t(0xAAAABBBBCCCCDDDDull), 1097 uint64_t(0xEEEEFFFF00009999ull)); 1098 const Dqword Expected_v8i16(uint64_t(0xCCCC3333DDDD4444ull), 1099 uint64_t(0xAAAA1111BBBB2222ull)); 1100 1101 const Dqword V0_v16i8(uint64_t(0x1122334455667788ull), 1102 uint64_t(0x99AABBCCDDEEFF00ull)); 1103 const Dqword V1_v16i8(uint64_t(0xFFEEDDCCBBAA9900ull), 1104 uint64_t(0xBAADF00DFEEDFACEull)); 1105 const Dqword Expected_v16i8(uint64_t(0xBB55AA6699770088ull), 1106 uint64_t(0xFF11EE22DD33CC44ull)); 1107 1108#define TestImplXmmXmm(Dst, Src, Inst, Ty) \ 1109 do { \ 1110 static constexpr char TestString[] = \ 1111 "(" #Dst ", " #Src ", " #Inst ", " #Ty ")"; \ 1112 const uint32_t T0 = allocateDqword(); \ 1113 const uint32_t T1 = allocateDqword(); \ 1114 \ 1115 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ 1116 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \ 1117 __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, \ 1118 XmmRegister::Encoded_Reg_##Src); \ 1119 \ 1120 AssembledTest test = assemble(); \ 1121 test.setDqwordTo(T0, V0_##Ty); \ 1122 test.setDqwordTo(T1, V1_##Ty); \ 1123 test.run(); \ 1124 \ 1125 ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString; \ 1126 reset(); \ 1127 } while (0) 1128 1129#define TestImplXmmAddr(Dst, Inst, Ty) \ 1130 do { \ 1131 static constexpr char TestString[] = \ 1132 "(" #Dst ", Addr, " #Inst ", " #Ty ")"; \ 1133 const uint32_t T0 = allocateDqword(); \ 1134 const uint32_t T1 = allocateDqword(); \ 1135 \ 1136 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ 1137 __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1)); \ 1138 \ 1139 AssembledTest test = assemble(); \ 1140 test.setDqwordTo(T0, V0_##Ty); \ 1141 test.setDqwordTo(T1, V1_##Ty); \ 1142 test.run(); \ 1143 \ 1144 ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString; \ 1145 reset(); \ 1146 } while (0) 1147 1148#define TestImpl(Dst, Src) \ 1149 do { \ 1150 TestImplXmmXmm(Dst, Src, punpckl, v4i32); \ 1151 TestImplXmmAddr(Dst, punpckl, v4i32); \ 1152 TestImplXmmXmm(Dst, Src, punpckl, v8i16); \ 1153 TestImplXmmAddr(Dst, punpckl, v8i16); \ 1154 TestImplXmmXmm(Dst, Src, punpckl, v16i8); \ 1155 TestImplXmmAddr(Dst, punpckl, v16i8); \ 1156 } while (0) 1157 1158 TestImpl(xmm0, xmm1); 1159 TestImpl(xmm1, xmm2); 1160 TestImpl(xmm2, xmm3); 1161 TestImpl(xmm3, xmm4); 1162 TestImpl(xmm4, xmm5); 1163 TestImpl(xmm5, xmm6); 1164 TestImpl(xmm6, xmm7); 1165 TestImpl(xmm7, xmm0); 1166 1167#undef TestImpl 1168#undef TestImplXmmAddr 1169#undef TestImplXmmXmm 1170} 1171 1172TEST_F(AssemblerX8664Test, Packss) { 1173 const Dqword V0_v4i32(uint64_t(0x0001000000001234ull), 1174 uint64_t(0x7FFFFFFF80000000ull)); 1175 const Dqword V1_v4i32(uint64_t(0xFFFFFFFEFFFFFFFFull), 1176 uint64_t(0x0000800100007FFEull)); 1177 const Dqword Expected_v4i32(uint64_t(0x7FFF80007FFF1234ull), 1178 uint64_t(0x7FFF7FFEFFFEFFFFull)); 1179 1180 const Dqword V0_v8i16(uint64_t(0x0001000000120034ull), 1181 uint64_t(0xFFFEFFFF7FFF8000ull)); 1182 const Dqword V1_v8i16(uint64_t(0x00057FF80081007Eull), 1183 uint64_t(0x0088007700660055ull)); 1184 const Dqword Expected_v8i16(uint64_t(0xFEFF7F8001001234ull), 1185 uint64_t(0x7F776655057F7F7Eull)); 1186 1187#define TestImplXmmXmm(Dst, Src, Inst, Ty) \ 1188 do { \ 1189 static constexpr char TestString[] = \ 1190 "(" #Dst ", " #Src ", " #Inst ", " #Ty ")"; \ 1191 const uint32_t T0 = allocateDqword(); \ 1192 const uint32_t T1 = allocateDqword(); \ 1193 \ 1194 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ 1195 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \ 1196 __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, \ 1197 XmmRegister::Encoded_Reg_##Src); \ 1198 \ 1199 AssembledTest test = assemble(); \ 1200 test.setDqwordTo(T0, V0_##Ty); \ 1201 test.setDqwordTo(T1, V1_##Ty); \ 1202 test.run(); \ 1203 \ 1204 ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString; \ 1205 reset(); \ 1206 } while (0) 1207 1208#define TestImplXmmAddr(Dst, Inst, Ty) \ 1209 do { \ 1210 static constexpr char TestString[] = \ 1211 "(" #Dst ", Addr, " #Inst ", " #Ty ")"; \ 1212 const uint32_t T0 = allocateDqword(); \ 1213 const uint32_t T1 = allocateDqword(); \ 1214 \ 1215 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ 1216 __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1)); \ 1217 \ 1218 AssembledTest test = assemble(); \ 1219 test.setDqwordTo(T0, V0_##Ty); \ 1220 test.setDqwordTo(T1, V1_##Ty); \ 1221 test.run(); \ 1222 \ 1223 ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString; \ 1224 reset(); \ 1225 } while (0) 1226 1227#define TestImpl(Dst, Src) \ 1228 do { \ 1229 TestImplXmmXmm(Dst, Src, packss, v4i32); \ 1230 TestImplXmmAddr(Dst, packss, v4i32); \ 1231 TestImplXmmXmm(Dst, Src, packss, v8i16); \ 1232 TestImplXmmAddr(Dst, packss, v8i16); \ 1233 } while (0) 1234 1235 TestImpl(xmm0, xmm1); 1236 TestImpl(xmm1, xmm2); 1237 TestImpl(xmm2, xmm3); 1238 TestImpl(xmm3, xmm4); 1239 TestImpl(xmm4, xmm5); 1240 TestImpl(xmm5, xmm6); 1241 TestImpl(xmm6, xmm7); 1242 TestImpl(xmm7, xmm0); 1243 1244#undef TestImpl 1245#undef TestImplXmmAddr 1246#undef TestImplXmmXmm 1247} 1248 1249TEST_F(AssemblerX8664Test, Packus) { 1250 const Dqword V0_v4i32(uint64_t(0x0001000000001234ull), 1251 uint64_t(0x7FFFFFFF80000000ull)); 1252 const Dqword V1_v4i32(uint64_t(0xFFFFFFFEFFFFFFFFull), 1253 uint64_t(0x0000800100007FFEull)); 1254 const Dqword Expected_v4i32(uint64_t(0xFFFF0000FFFF1234ull), 1255 uint64_t(0x80017FFE00000000ull)); 1256 1257 const Dqword V0_v8i16(uint64_t(0x0001000000120034ull), 1258 uint64_t(0xFFFEFFFF7FFF8000ull)); 1259 const Dqword V1_v8i16(uint64_t(0x00057FF80081007Eull), 1260 uint64_t(0x0088007700660055ull)); 1261 const Dqword Expected_v8i16(uint64_t(0x0000FF0001001234ull), 1262 uint64_t(0x8877665505FF817Eull)); 1263 1264#define TestImplXmmXmm(Dst, Src, Inst, Ty) \ 1265 do { \ 1266 static constexpr char TestString[] = \ 1267 "(" #Dst ", " #Src ", " #Inst ", " #Ty ")"; \ 1268 const uint32_t T0 = allocateDqword(); \ 1269 const uint32_t T1 = allocateDqword(); \ 1270 \ 1271 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ 1272 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \ 1273 __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, \ 1274 XmmRegister::Encoded_Reg_##Src); \ 1275 \ 1276 AssembledTest test = assemble(); \ 1277 test.setDqwordTo(T0, V0_##Ty); \ 1278 test.setDqwordTo(T1, V1_##Ty); \ 1279 test.run(); \ 1280 \ 1281 ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString; \ 1282 reset(); \ 1283 } while (0) 1284 1285#define TestImplXmmAddr(Dst, Inst, Ty) \ 1286 do { \ 1287 static constexpr char TestString[] = \ 1288 "(" #Dst ", Addr, " #Inst ", " #Ty ")"; \ 1289 const uint32_t T0 = allocateDqword(); \ 1290 const uint32_t T1 = allocateDqword(); \ 1291 \ 1292 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ 1293 __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1)); \ 1294 \ 1295 AssembledTest test = assemble(); \ 1296 test.setDqwordTo(T0, V0_##Ty); \ 1297 test.setDqwordTo(T1, V1_##Ty); \ 1298 test.run(); \ 1299 \ 1300 ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString; \ 1301 reset(); \ 1302 } while (0) 1303 1304#define TestImpl(Dst, Src) \ 1305 do { \ 1306 TestImplXmmXmm(Dst, Src, packus, v4i32); \ 1307 TestImplXmmAddr(Dst, packus, v4i32); \ 1308 TestImplXmmXmm(Dst, Src, packus, v8i16); \ 1309 TestImplXmmAddr(Dst, packus, v8i16); \ 1310 } while (0) 1311 1312 TestImpl(xmm0, xmm1); 1313 TestImpl(xmm1, xmm2); 1314 TestImpl(xmm2, xmm3); 1315 TestImpl(xmm3, xmm4); 1316 TestImpl(xmm4, xmm5); 1317 TestImpl(xmm5, xmm6); 1318 TestImpl(xmm6, xmm7); 1319 TestImpl(xmm7, xmm0); 1320 1321#undef TestImpl 1322#undef TestImplXmmAddr 1323#undef TestImplXmmXmm 1324} 1325 1326TEST_F(AssemblerX8664Test, Pshufb) { 1327 const Dqword V0(uint64_t(0x1122334455667788ull), 1328 uint64_t(0x99aabbccddeeff32ull)); 1329 const Dqword V1(uint64_t(0x0204050380060708ull), 1330 uint64_t(0x010306080a8b0c0dull)); 1331 1332 const Dqword Expected(uint64_t(0x6644335500221132ull), 1333 uint64_t(0x77552232ee00ccbbull)); 1334 1335#define TestImplXmmXmm(Dst, Src, Inst) \ 1336 do { \ 1337 static constexpr char TestString[] = "(" #Dst ", " #Src ", " #Inst ")"; \ 1338 const uint32_t T0 = allocateDqword(); \ 1339 const uint32_t T1 = allocateDqword(); \ 1340 \ 1341 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ 1342 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \ 1343 __ Inst(IceType_void, XmmRegister::Encoded_Reg_##Dst, \ 1344 XmmRegister::Encoded_Reg_##Src); \ 1345 \ 1346 AssembledTest test = assemble(); \ 1347 test.setDqwordTo(T0, V0); \ 1348 test.setDqwordTo(T1, V1); \ 1349 test.run(); \ 1350 \ 1351 ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \ 1352 reset(); \ 1353 } while (0) 1354 1355#define TestImplXmmAddr(Dst, Inst) \ 1356 do { \ 1357 static constexpr char TestString[] = "(" #Dst ", Addr, " #Inst ")"; \ 1358 const uint32_t T0 = allocateDqword(); \ 1359 const uint32_t T1 = allocateDqword(); \ 1360 \ 1361 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ 1362 __ Inst(IceType_void, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1)); \ 1363 \ 1364 AssembledTest test = assemble(); \ 1365 test.setDqwordTo(T0, V0); \ 1366 test.setDqwordTo(T1, V1); \ 1367 test.run(); \ 1368 \ 1369 ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \ 1370 reset(); \ 1371 } while (0) 1372 1373#define TestImpl(Dst, Src) \ 1374 do { \ 1375 TestImplXmmXmm(Dst, Src, pshufb); \ 1376 TestImplXmmAddr(Dst, pshufb); \ 1377 } while (0) 1378 1379 TestImpl(xmm0, xmm1); 1380 TestImpl(xmm1, xmm2); 1381 TestImpl(xmm2, xmm3); 1382 TestImpl(xmm3, xmm4); 1383 TestImpl(xmm4, xmm5); 1384 TestImpl(xmm5, xmm6); 1385 TestImpl(xmm6, xmm7); 1386 TestImpl(xmm7, xmm8); 1387 TestImpl(xmm8, xmm9); 1388 TestImpl(xmm9, xmm10); 1389 TestImpl(xmm10, xmm11); 1390 TestImpl(xmm11, xmm12); 1391 TestImpl(xmm12, xmm13); 1392 TestImpl(xmm13, xmm14); 1393 TestImpl(xmm14, xmm15); 1394 TestImpl(xmm15, xmm0); 1395 1396#undef TestImpl 1397#undef TestImplXmmAddr 1398#undef TestImplXmmXmm 1399} 1400 1401TEST_F(AssemblerX8664Test, Cvt) { 1402 const Dqword dq2ps32DstValue(-1.0f, -1.0f, -1.0f, -1.0f); 1403 const Dqword dq2ps32SrcValue(-5, 3, 100, 200); 1404 const Dqword dq2ps32Expected(-5.0f, 3.0f, 100.0, 200.0); 1405 1406 const Dqword dq2ps64DstValue(0.0f, 0.0f, -1.0f, -1.0f); 1407 const Dqword dq2ps64SrcValue(-5, 3, 100, 200); 1408 const Dqword dq2ps64Expected(-5.0f, 3.0f, 100.0, 200.0); 1409 1410 const Dqword tps2dq32DstValue(-1.0f, -1.0f, -1.0f, -1.0f); 1411 const Dqword tps2dq32SrcValue(-5.0f, 3.0f, 100.0, 200.0); 1412 const Dqword tps2dq32Expected(-5, 3, 100, 200); 1413 1414 const Dqword tps2dq64DstValue(-1.0f, -1.0f, -1.0f, -1.0f); 1415 const Dqword tps2dq64SrcValue(-5.0f, 3.0f, 100.0, 200.0); 1416 const Dqword tps2dq64Expected(-5, 3, 100, 200); 1417 1418 const Dqword si2ss32DstValue(-1.0f, -1.0f, -1.0f, -1.0f); 1419 const int32_t si2ss32SrcValue = 5; 1420 const Dqword si2ss32Expected(5.0f, -1.0f, -1.0f, -1.0f); 1421 1422 const Dqword si2ss64DstValue(-1.0, -1.0); 1423 const int32_t si2ss64SrcValue = 5; 1424 const Dqword si2ss64Expected(5.0, -1.0); 1425 1426 const int32_t tss2si32DstValue = 0xF00F0FF0; 1427 const Dqword tss2si32SrcValue(-5.0f, -1.0f, -1.0f, -1.0f); 1428 const int32_t tss2si32Expected = -5; 1429 1430 const int32_t tss2si64DstValue = 0xF00F0FF0; 1431 const Dqword tss2si64SrcValue(-5.0, -1.0); 1432 const int32_t tss2si64Expected = -5; 1433 1434 const Dqword float2float32DstValue(-1.0, -1.0); 1435 const Dqword float2float32SrcValue(-5.0, 3, 100, 200); 1436 const Dqword float2float32Expected(-5.0, -1.0); 1437 1438 const Dqword float2float64DstValue(-1.0, -1.0, -1.0, -1.0); 1439 const Dqword float2float64SrcValue(-5.0, 3.0); 1440 const Dqword float2float64Expected(-5.0, -1.0, -1.0, -1.0); 1441 1442#define TestImplPXmmXmm(Dst, Src, Inst, Size) \ 1443 do { \ 1444 static constexpr char TestString[] = \ 1445 "(" #Dst ", " #Src ", cvt" #Inst ", f" #Size ")"; \ 1446 const uint32_t T0 = allocateDqword(); \ 1447 const uint32_t T1 = allocateDqword(); \ 1448 \ 1449 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ 1450 __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \ 1451 __ cvt##Inst(IceType_f##Size, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \ 1452 \ 1453 AssembledTest test = assemble(); \ 1454 test.setDqwordTo(T0, Inst##Size##DstValue); \ 1455 test.setDqwordTo(T1, Inst##Size##SrcValue); \ 1456 test.run(); \ 1457 \ 1458 ASSERT_EQ(Inst##Size##Expected, test.Dst<Dqword>()) << TestString; \ 1459 reset(); \ 1460 } while (0) 1461 1462#define TestImplSXmmReg(Dst, GPR, Inst, Size, IntType) \ 1463 do { \ 1464 static constexpr char TestString[] = \ 1465 "(" #Dst ", " #GPR ", cvt" #Inst ", " #IntType ", f" #Size ")"; \ 1466 const uint32_t T0 = allocateDqword(); \ 1467 \ 1468 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ 1469 __ mov(IceType_i32, Encoded_GPR_##GPR(), Immediate(Inst##Size##SrcValue)); \ 1470 __ cvt##Inst(IceType_f##Size, Encoded_Xmm_##Dst(), IntType, \ 1471 Encoded_GPR_##GPR()); \ 1472 \ 1473 AssembledTest test = assemble(); \ 1474 test.setDqwordTo(T0, Inst##Size##DstValue); \ 1475 test.run(); \ 1476 \ 1477 ASSERT_EQ(Inst##Size##Expected, test.Dst<Dqword>()) << TestString; \ 1478 reset(); \ 1479 } while (0) 1480 1481#define TestImplSRegXmm(GPR, Src, Inst, IntSize, Size) \ 1482 do { \ 1483 static constexpr char TestString[] = \ 1484 "(" #GPR ", " #Src ", cvt" #Inst ", " #IntSize ", f" #Size ")"; \ 1485 const uint32_t T0 = allocateDqword(); \ 1486 \ 1487 __ mov(IceType_i32, Encoded_GPR_##GPR(), Immediate(Inst##Size##DstValue)); \ 1488 __ movups(Encoded_Xmm_##Src(), dwordAddress(T0)); \ 1489 __ cvt##Inst(IceType_i##IntSize, Encoded_GPR_##GPR(), IceType_f##Size, \ 1490 Encoded_Xmm_##Src()); \ 1491 \ 1492 AssembledTest test = assemble(); \ 1493 test.setDqwordTo(T0, Inst##Size##SrcValue); \ 1494 test.run(); \ 1495 \ 1496 ASSERT_EQ(static_cast<uint##IntSize##_t>(Inst##Size##Expected), \ 1497 test.GPR()) \ 1498 << TestString; \ 1499 reset(); \ 1500 } while (0) 1501 1502#define TestImplPXmmAddr(Dst, Inst, Size) \ 1503 do { \ 1504 static constexpr char TestString[] = \ 1505 "(" #Dst ", Addr, cvt" #Inst ", f" #Size ")"; \ 1506 const uint32_t T0 = allocateDqword(); \ 1507 const uint32_t T1 = allocateDqword(); \ 1508 \ 1509 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ 1510 __ cvt##Inst(IceType_f##Size, Encoded_Xmm_##Dst(), dwordAddress(T1)); \ 1511 \ 1512 AssembledTest test = assemble(); \ 1513 test.setDqwordTo(T0, Inst##Size##DstValue); \ 1514 test.setDqwordTo(T1, Inst##Size##SrcValue); \ 1515 test.run(); \ 1516 \ 1517 ASSERT_EQ(Inst##Size##Expected, test.Dst<Dqword>()) << TestString; \ 1518 reset(); \ 1519 } while (0) 1520 1521#define TestImplSXmmAddr(Dst, Inst, Size, IntType) \ 1522 do { \ 1523 static constexpr char TestString[] = \ 1524 "(" #Dst ", Addr, cvt" #Inst ", f" #Size ", " #IntType ")"; \ 1525 const uint32_t T0 = allocateDqword(); \ 1526 const uint32_t T1 = allocateDword(); \ 1527 \ 1528 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ 1529 __ cvt##Inst(IceType_f##Size, Encoded_Xmm_##Dst(), IntType, \ 1530 dwordAddress(T1)); \ 1531 \ 1532 AssembledTest test = assemble(); \ 1533 test.setDqwordTo(T0, Inst##Size##DstValue); \ 1534 test.setDwordTo(T1, Inst##Size##SrcValue); \ 1535 test.run(); \ 1536 \ 1537 ASSERT_EQ(Inst##Size##Expected, test.Dst<Dqword>()) << TestString; \ 1538 reset(); \ 1539 } while (0) 1540 1541#define TestImplSRegAddr(GPR, Inst, IntSize, Size) \ 1542 do { \ 1543 static constexpr char TestString[] = \ 1544 "(" #GPR ", Addr, cvt" #Inst ", f" #Size ", " #IntSize ")"; \ 1545 const uint32_t T0 = allocateDqword(); \ 1546 \ 1547 __ mov(IceType_i32, Encoded_GPR_##GPR(), Immediate(Inst##Size##DstValue)); \ 1548 __ cvt##Inst(IceType_i##IntSize, Encoded_GPR_##GPR(), IceType_f##Size, \ 1549 dwordAddress(T0)); \ 1550 \ 1551 AssembledTest test = assemble(); \ 1552 test.setDqwordTo(T0, Inst##Size##SrcValue); \ 1553 test.run(); \ 1554 \ 1555 ASSERT_EQ(static_cast<uint##IntSize##_t>(Inst##Size##Expected), \ 1556 test.GPR()) \ 1557 << TestString; \ 1558 reset(); \ 1559 } while (0) 1560 1561#define TestImplSize(Dst, Src, GPR, Size) \ 1562 do { \ 1563 TestImplPXmmXmm(Dst, Src, dq2ps, Size); \ 1564 TestImplPXmmAddr(Src, dq2ps, Size); \ 1565 TestImplPXmmXmm(Dst, Src, tps2dq, Size); \ 1566 TestImplPXmmAddr(Src, tps2dq, Size); \ 1567 TestImplSXmmReg(Dst, GPR, si2ss, Size, IceType_i32); \ 1568 TestImplSXmmReg(Dst, GPR, si2ss, Size, IceType_i64); \ 1569 TestImplSXmmAddr(Dst, si2ss, Size, IceType_i32); \ 1570 TestImplSXmmAddr(Dst, si2ss, Size, IceType_i64); \ 1571 TestImplSRegXmm(GPR, Src, tss2si, 32, Size); \ 1572 TestImplSRegXmm(GPR, Src, tss2si, 64, Size); \ 1573 TestImplSRegAddr(GPR, tss2si, 32, Size); \ 1574 TestImplSRegAddr(GPR, tss2si, 64, Size); \ 1575 TestImplPXmmXmm(Dst, Src, float2float, Size); \ 1576 TestImplPXmmAddr(Src, float2float, Size); \ 1577 } while (0) 1578 1579#define TestImpl(Dst, Src, GPR) \ 1580 do { \ 1581 TestImplSize(Dst, Src, GPR, 32); \ 1582 TestImplSize(Dst, Src, GPR, 64); \ 1583 } while (0) 1584 1585 TestImpl(xmm0, xmm1, r1); 1586 TestImpl(xmm1, xmm2, r2); 1587 TestImpl(xmm2, xmm3, r3); 1588 TestImpl(xmm3, xmm4, r4); 1589 TestImpl(xmm4, xmm5, r5); 1590 TestImpl(xmm5, xmm6, r6); 1591 TestImpl(xmm6, xmm7, r7); 1592 TestImpl(xmm7, xmm8, r8); 1593 TestImpl(xmm8, xmm9, r10); 1594 TestImpl(xmm9, xmm10, r11); 1595 TestImpl(xmm10, xmm11, r12); 1596 TestImpl(xmm11, xmm12, r13); 1597 TestImpl(xmm12, xmm13, r14); 1598 TestImpl(xmm13, xmm14, r15); 1599 TestImpl(xmm14, xmm15, r1); 1600 TestImpl(xmm15, xmm0, r2); 1601 1602#undef TestImpl 1603#undef TestImplSize 1604#undef TestImplSRegAddr 1605#undef TestImplSXmmAddr 1606#undef TestImplPXmmAddr 1607#undef TestImplSRegXmm 1608#undef TestImplSXmmReg 1609#undef TestImplPXmmXmm 1610} 1611 1612TEST_F(AssemblerX8664Test, Ucomiss) { 1613 static constexpr float qnan32 = std::numeric_limits<float>::quiet_NaN(); 1614 static constexpr double qnan64 = std::numeric_limits<float>::quiet_NaN(); 1615 1616 Dqword test32DstValue(0.0, qnan32, qnan32, qnan32); 1617 Dqword test32SrcValue(0.0, qnan32, qnan32, qnan32); 1618 1619 Dqword test64DstValue(0.0, qnan64); 1620 Dqword test64SrcValue(0.0, qnan64); 1621 1622#define TestImplXmmXmm(Dst, Value0, Src, Value1, Size, CompType, BParity, \ 1623 BOther) \ 1624 do { \ 1625 static constexpr char NearBranch = AssemblerX8664::kNearJump; \ 1626 static constexpr char TestString[] = \ 1627 "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Size ", " #CompType \ 1628 ", " #BParity ", " #BOther ")"; \ 1629 const uint32_t T0 = allocateDqword(); \ 1630 test##Size##DstValue.F##Size[0] = Value0; \ 1631 const uint32_t T1 = allocateDqword(); \ 1632 test##Size##SrcValue.F##Size[0] = Value1; \ 1633 const uint32_t ImmIfTrue = 0xBEEF; \ 1634 const uint32_t ImmIfFalse = 0xC0FFE; \ 1635 \ 1636 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ 1637 __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \ 1638 __ mov(IceType_i32, GPRRegister::Encoded_Reg_eax, Immediate(ImmIfFalse)); \ 1639 __ ucomiss(IceType_f##Size, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \ 1640 Label Done; \ 1641 __ j(Cond::Br_##BParity, &Done, NearBranch); \ 1642 __ j(Cond::Br_##BOther, &Done, NearBranch); \ 1643 __ mov(IceType_i32, GPRRegister::Encoded_Reg_eax, Immediate(ImmIfTrue)); \ 1644 __ bind(&Done); \ 1645 \ 1646 AssembledTest test = assemble(); \ 1647 test.setDqwordTo(T0, test##Size##DstValue); \ 1648 test.setDqwordTo(T1, test##Size##SrcValue); \ 1649 test.run(); \ 1650 \ 1651 ASSERT_EQ(ImmIfTrue, test.eax()) << TestString; \ 1652 reset(); \ 1653 } while (0) 1654 1655#define TestImplXmmAddr(Dst, Value0, Value1, Size, CompType, BParity, BOther) \ 1656 do { \ 1657 static constexpr char NearBranch = AssemblerX8664::kNearJump; \ 1658 static constexpr char TestString[] = \ 1659 "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Size ", " #CompType \ 1660 ", " #BParity ", " #BOther ")"; \ 1661 const uint32_t T0 = allocateDqword(); \ 1662 test##Size##DstValue.F##Size[0] = Value0; \ 1663 const uint32_t T1 = allocateDqword(); \ 1664 test##Size##SrcValue.F##Size[0] = Value1; \ 1665 const uint32_t ImmIfTrue = 0xBEEF; \ 1666 const uint32_t ImmIfFalse = 0xC0FFE; \ 1667 \ 1668 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ 1669 __ mov(IceType_i32, GPRRegister::Encoded_Reg_eax, Immediate(ImmIfFalse)); \ 1670 __ ucomiss(IceType_f##Size, Encoded_Xmm_##Dst(), dwordAddress(T1)); \ 1671 Label Done; \ 1672 __ j(Cond::Br_##BParity, &Done, NearBranch); \ 1673 __ j(Cond::Br_##BOther, &Done, NearBranch); \ 1674 __ mov(IceType_i32, GPRRegister::Encoded_Reg_eax, Immediate(ImmIfTrue)); \ 1675 __ bind(&Done); \ 1676 \ 1677 AssembledTest test = assemble(); \ 1678 test.setDqwordTo(T0, test##Size##DstValue); \ 1679 test.setDqwordTo(T1, test##Size##SrcValue); \ 1680 test.run(); \ 1681 \ 1682 ASSERT_EQ(ImmIfTrue, test.eax()) << TestString; \ 1683 reset(); \ 1684 } while (0) 1685 1686#define TestImplCond(Dst, Value0, Src, Value1, Size, CompType, BParity, \ 1687 BOther) \ 1688 do { \ 1689 TestImplXmmXmm(Dst, Value0, Src, Value1, Size, CompType, BParity, BOther); \ 1690 TestImplXmmAddr(Dst, Value0, Value1, Size, CompType, BParity, BOther); \ 1691 } while (0) 1692 1693#define TestImplSize(Dst, Src, Size) \ 1694 do { \ 1695 TestImplCond(Dst, 1.0, Src, 1.0, Size, isEq, p, ne); \ 1696 TestImplCond(Dst, 1.0, Src, 2.0, Size, isNe, p, e); \ 1697 TestImplCond(Dst, 1.0, Src, 2.0, Size, isLe, p, a); \ 1698 TestImplCond(Dst, 1.0, Src, 1.0, Size, isLe, p, a); \ 1699 TestImplCond(Dst, 1.0, Src, 2.0, Size, isLt, p, ae); \ 1700 TestImplCond(Dst, 2.0, Src, 1.0, Size, isGe, p, b); \ 1701 TestImplCond(Dst, 1.0, Src, 1.0, Size, isGe, p, b); \ 1702 TestImplCond(Dst, 2.0, Src, 1.0, Size, isGt, p, be); \ 1703 TestImplCond(Dst, qnan##Size, Src, 1.0, Size, isUnord, np, o); \ 1704 TestImplCond(Dst, 1.0, Src, qnan##Size, Size, isUnord, np, s); \ 1705 TestImplCond(Dst, qnan##Size, Src, qnan##Size, Size, isUnord, np, s); \ 1706 } while (0) 1707 1708#define TestImpl(Dst, Src) \ 1709 do { \ 1710 TestImplSize(Dst, Src, 32); \ 1711 TestImplSize(Dst, Src, 64); \ 1712 } while (0) 1713 1714 TestImpl(xmm0, xmm1); 1715 TestImpl(xmm1, xmm2); 1716 TestImpl(xmm2, xmm3); 1717 TestImpl(xmm3, xmm4); 1718 TestImpl(xmm4, xmm5); 1719 TestImpl(xmm5, xmm6); 1720 TestImpl(xmm6, xmm7); 1721 TestImpl(xmm7, xmm8); 1722 TestImpl(xmm8, xmm9); 1723 TestImpl(xmm9, xmm10); 1724 TestImpl(xmm10, xmm11); 1725 TestImpl(xmm11, xmm12); 1726 TestImpl(xmm12, xmm13); 1727 TestImpl(xmm13, xmm14); 1728 TestImpl(xmm14, xmm15); 1729 TestImpl(xmm15, xmm0); 1730 1731#undef TestImpl 1732#undef TestImplSize 1733#undef TestImplCond 1734#undef TestImplXmmAddr 1735#undef TestImplXmmXmm 1736} 1737 1738TEST_F(AssemblerX8664Test, Sqrtss) { 1739 Dqword test32SrcValue(-100.0, -100.0, -100.0, -100.0); 1740 Dqword test32DstValue(-1.0, -1.0, -1.0, -1.0); 1741 1742 Dqword test64SrcValue(-100.0, -100.0); 1743 Dqword test64DstValue(-1.0, -1.0); 1744 1745#define TestSqrtssXmmXmm(Dst, Src, Value1, Result, Size) \ 1746 do { \ 1747 static constexpr char TestString[] = \ 1748 "(" #Dst ", " #Src ", " #Value1 ", " #Result ", " #Size ")"; \ 1749 const uint32_t T0 = allocateDqword(); \ 1750 test##Size##SrcValue.F##Size[0] = Value1; \ 1751 const uint32_t T1 = allocateDqword(); \ 1752 \ 1753 __ movups(Encoded_Xmm_##Src(), dwordAddress(T0)); \ 1754 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T1)); \ 1755 __ sqrt(IceType_f##Size, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \ 1756 \ 1757 AssembledTest test = assemble(); \ 1758 test.setDqwordTo(T0, test##Size##SrcValue); \ 1759 test.setDqwordTo(T1, test##Size##DstValue); \ 1760 test.run(); \ 1761 \ 1762 Dqword Expected = test##Size##DstValue; \ 1763 Expected.F##Size[0] = Result; \ 1764 ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \ 1765 reset(); \ 1766 } while (0) 1767 1768#define TestSqrtssXmmAddr(Dst, Value1, Result, Size) \ 1769 do { \ 1770 static constexpr char TestString[] = \ 1771 "(" #Dst ", Addr, " #Value1 ", " #Result ", " #Size ")"; \ 1772 const uint32_t T0 = allocateDqword(); \ 1773 test##Size##SrcValue.F##Size[0] = Value1; \ 1774 const uint32_t T1 = allocateDqword(); \ 1775 \ 1776 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T1)); \ 1777 __ sqrt(IceType_f##Size, Encoded_Xmm_##Dst(), dwordAddress(T0)); \ 1778 \ 1779 AssembledTest test = assemble(); \ 1780 test.setDqwordTo(T0, test##Size##SrcValue); \ 1781 test.setDqwordTo(T1, test##Size##DstValue); \ 1782 test.run(); \ 1783 \ 1784 Dqword Expected = test##Size##DstValue; \ 1785 Expected.F##Size[0] = Result; \ 1786 ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \ 1787 reset(); \ 1788 } while (0) 1789 1790#define TestSqrtssSize(Dst, Src, Size) \ 1791 do { \ 1792 TestSqrtssXmmXmm(Dst, Src, 4.0, 2.0, Size); \ 1793 TestSqrtssXmmAddr(Dst, 4.0, 2.0, Size); \ 1794 TestSqrtssXmmXmm(Dst, Src, 9.0, 3.0, Size); \ 1795 TestSqrtssXmmAddr(Dst, 9.0, 3.0, Size); \ 1796 TestSqrtssXmmXmm(Dst, Src, 100.0, 10.0, Size); \ 1797 TestSqrtssXmmAddr(Dst, 100.0, 10.0, Size); \ 1798 } while (0) 1799 1800#define TestSqrtss(Dst, Src) \ 1801 do { \ 1802 TestSqrtssSize(Dst, Src, 32); \ 1803 TestSqrtssSize(Dst, Src, 64); \ 1804 } while (0) 1805 1806 TestSqrtss(xmm0, xmm1); 1807 TestSqrtss(xmm1, xmm2); 1808 TestSqrtss(xmm2, xmm3); 1809 TestSqrtss(xmm3, xmm4); 1810 TestSqrtss(xmm4, xmm5); 1811 TestSqrtss(xmm5, xmm6); 1812 TestSqrtss(xmm6, xmm7); 1813 TestSqrtss(xmm7, xmm8); 1814 TestSqrtss(xmm8, xmm9); 1815 TestSqrtss(xmm9, xmm10); 1816 TestSqrtss(xmm10, xmm11); 1817 TestSqrtss(xmm11, xmm12); 1818 TestSqrtss(xmm12, xmm13); 1819 TestSqrtss(xmm13, xmm14); 1820 TestSqrtss(xmm14, xmm15); 1821 TestSqrtss(xmm15, xmm0); 1822 1823#undef TestSqrtss 1824#undef TestSqrtssSize 1825#undef TestSqrtssXmmAddr 1826#undef TestSqrtssXmmXmm 1827} 1828 1829TEST_F(AssemblerX8664Test, Insertps) { 1830#define TestInsertpsXmmXmmImm(Dst, Value0, Src, Value1, Imm, Expected) \ 1831 do { \ 1832 static constexpr char TestString[] = \ 1833 "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Imm ", " #Expected \ 1834 ")"; \ 1835 const uint32_t T0 = allocateDqword(); \ 1836 const Dqword V0 Value0; \ 1837 const uint32_t T1 = allocateDqword(); \ 1838 const Dqword V1 Value1; \ 1839 \ 1840 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ 1841 __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \ 1842 __ insertps(IceType_v4f32, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src(), \ 1843 Immediate(Imm)); \ 1844 \ 1845 AssembledTest test = assemble(); \ 1846 test.setDqwordTo(T0, V0); \ 1847 test.setDqwordTo(T1, V1); \ 1848 test.run(); \ 1849 \ 1850 ASSERT_EQ(Dqword Expected, test.Dst<Dqword>()) << TestString; \ 1851 reset(); \ 1852 } while (0) 1853 1854#define TestInsertpsXmmAddrImm(Dst, Value0, Value1, Imm, Expected) \ 1855 do { \ 1856 static constexpr char TestString[] = \ 1857 "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Imm ", " #Expected ")"; \ 1858 const uint32_t T0 = allocateDqword(); \ 1859 const Dqword V0 Value0; \ 1860 const uint32_t T1 = allocateDqword(); \ 1861 const Dqword V1 Value1; \ 1862 \ 1863 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ 1864 __ insertps(IceType_v4f32, Encoded_Xmm_##Dst(), dwordAddress(T1), \ 1865 Immediate(Imm)); \ 1866 \ 1867 AssembledTest test = assemble(); \ 1868 test.setDqwordTo(T0, V0); \ 1869 test.setDqwordTo(T1, V1); \ 1870 test.run(); \ 1871 \ 1872 ASSERT_EQ(Dqword Expected, test.Dst<Dqword>()) << TestString; \ 1873 reset(); \ 1874 } while (0) 1875 1876#define TestInsertps(Dst, Src) \ 1877 do { \ 1878 TestInsertpsXmmXmmImm( \ 1879 Dst, (uint64_t(-1), uint64_t(-1)), Src, \ 1880 (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xCCCCCCCCDDDDDDDDull)), \ 1881 0x99, \ 1882 (uint64_t(0xDDDDDDDD00000000ull), uint64_t(0x00000000FFFFFFFFull))); \ 1883 TestInsertpsXmmAddrImm( \ 1884 Dst, (uint64_t(-1), uint64_t(-1)), \ 1885 (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xCCCCCCCCDDDDDDDDull)), \ 1886 0x99, \ 1887 (uint64_t(0xBBBBBBBB00000000ull), uint64_t(0x00000000FFFFFFFFull))); \ 1888 TestInsertpsXmmXmmImm( \ 1889 Dst, (uint64_t(-1), uint64_t(-1)), Src, \ 1890 (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xCCCCCCCCDDDDDDDDull)), \ 1891 0x9D, \ 1892 (uint64_t(0xDDDDDDDD00000000ull), uint64_t(0x0000000000000000ull))); \ 1893 TestInsertpsXmmAddrImm( \ 1894 Dst, (uint64_t(-1), uint64_t(-1)), \ 1895 (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xCCCCCCCCDDDDDDDDull)), \ 1896 0x9D, \ 1897 (uint64_t(0xBBBBBBBB00000000ull), uint64_t(0x0000000000000000ull))); \ 1898 } while (0) 1899 1900 TestInsertps(xmm0, xmm1); 1901 TestInsertps(xmm1, xmm2); 1902 TestInsertps(xmm2, xmm3); 1903 TestInsertps(xmm3, xmm4); 1904 TestInsertps(xmm4, xmm5); 1905 TestInsertps(xmm5, xmm6); 1906 TestInsertps(xmm6, xmm7); 1907 TestInsertps(xmm7, xmm8); 1908 TestInsertps(xmm8, xmm9); 1909 TestInsertps(xmm9, xmm10); 1910 TestInsertps(xmm10, xmm11); 1911 TestInsertps(xmm11, xmm12); 1912 TestInsertps(xmm12, xmm13); 1913 TestInsertps(xmm13, xmm14); 1914 TestInsertps(xmm14, xmm15); 1915 TestInsertps(xmm15, xmm0); 1916 1917#undef TestInsertps 1918#undef TestInsertpsXmmXmmAddr 1919#undef TestInsertpsXmmXmmImm 1920} 1921 1922TEST_F(AssemblerX8664Test, Pinsr) { 1923 static constexpr uint8_t Mask32 = 0x03; 1924 static constexpr uint8_t Mask16 = 0x07; 1925 static constexpr uint8_t Mask8 = 0x0F; 1926 1927#define TestPinsrXmmGPRImm(Dst, Value0, GPR, Value1, Imm, Size) \ 1928 do { \ 1929 static constexpr char TestString[] = \ 1930 "(" #Dst ", " #Value0 ", " #GPR ", " #Value1 ", " #Imm ", " #Size ")"; \ 1931 const uint32_t T0 = allocateDqword(); \ 1932 const Dqword V0 Value0; \ 1933 \ 1934 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ 1935 __ mov(IceType_i32, Encoded_GPR_##GPR(), Immediate(Value1)); \ 1936 __ pinsr(IceType_i##Size, Encoded_Xmm_##Dst(), Encoded_GPR_##GPR(), \ 1937 Immediate(Imm)); \ 1938 \ 1939 AssembledTest test = assemble(); \ 1940 test.setDqwordTo(T0, V0); \ 1941 test.run(); \ 1942 \ 1943 constexpr uint8_t sel = (Imm)&Mask##Size; \ 1944 Dqword Expected = V0; \ 1945 Expected.U##Size[sel] = Value1; \ 1946 ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \ 1947 reset(); \ 1948 } while (0) 1949 1950#define TestPinsrXmmAddrImm(Dst, Value0, Value1, Imm, Size) \ 1951 do { \ 1952 static constexpr char TestString[] = \ 1953 "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Imm ", " #Size ")"; \ 1954 const uint32_t T0 = allocateDqword(); \ 1955 const Dqword V0 Value0; \ 1956 const uint32_t T1 = allocateDword(); \ 1957 const uint32_t V1 = Value1; \ 1958 \ 1959 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ 1960 __ pinsr(IceType_i##Size, Encoded_Xmm_##Dst(), dwordAddress(T1), \ 1961 Immediate(Imm)); \ 1962 \ 1963 AssembledTest test = assemble(); \ 1964 test.setDqwordTo(T0, V0); \ 1965 test.setDwordTo(T1, V1); \ 1966 test.run(); \ 1967 \ 1968 constexpr uint8_t sel = (Imm)&Mask##Size; \ 1969 Dqword Expected = V0; \ 1970 Expected.U##Size[sel] = Value1; \ 1971 ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \ 1972 reset(); \ 1973 } while (0) 1974 1975#define TestPinsrSize(Dst, GPR, Value1, Imm, Size) \ 1976 do { \ 1977 TestPinsrXmmGPRImm(Dst, (uint64_t(0xAAAAAAAABBBBBBBBull), \ 1978 uint64_t(0xFFFFFFFFDDDDDDDDull)), \ 1979 GPR, Value1, Imm, Size); \ 1980 TestPinsrXmmAddrImm(Dst, (uint64_t(0xAAAAAAAABBBBBBBBull), \ 1981 uint64_t(0xFFFFFFFFDDDDDDDDull)), \ 1982 Value1, Imm, Size); \ 1983 } while (0) 1984 1985#define TestPinsr(Src, Dst) \ 1986 do { \ 1987 TestPinsrSize(Src, Dst, 0xEE, 0x03, 8); \ 1988 TestPinsrSize(Src, Dst, 0xFFEE, 0x03, 16); \ 1989 TestPinsrSize(Src, Dst, 0xC0FFEE, 0x03, 32); \ 1990 } while (0) 1991 1992 TestPinsr(xmm0, r1); 1993 TestPinsr(xmm1, r2); 1994 TestPinsr(xmm2, r3); 1995 TestPinsr(xmm3, r4); 1996 TestPinsr(xmm4, r5); 1997 TestPinsr(xmm5, r6); 1998 TestPinsr(xmm6, r7); 1999 TestPinsr(xmm7, r8); 2000 TestPinsr(xmm8, r10); 2001 TestPinsr(xmm9, r11); 2002 TestPinsr(xmm10, r12); 2003 TestPinsr(xmm11, r13); 2004 TestPinsr(xmm12, r14); 2005 TestPinsr(xmm13, r15); 2006 TestPinsr(xmm14, r1); 2007 TestPinsr(xmm15, r2); 2008 2009#undef TestPinsr 2010#undef TestPinsrSize 2011#undef TestPinsrXmmAddrImm 2012#undef TestPinsrXmmGPRImm 2013} 2014 2015TEST_F(AssemblerX8664Test, Pextr) { 2016 static constexpr uint8_t Mask32 = 0x03; 2017 static constexpr uint8_t Mask16 = 0x07; 2018 static constexpr uint8_t Mask8 = 0x0F; 2019 2020#define TestPextrGPRXmmImm(GPR, Src, Value1, Imm, Size) \ 2021 do { \ 2022 static constexpr char TestString[] = \ 2023 "(" #GPR ", " #Src ", " #Value1 ", " #Imm ", " #Size ")"; \ 2024 const uint32_t T0 = allocateDqword(); \ 2025 const Dqword V0 Value1; \ 2026 \ 2027 __ movups(Encoded_Xmm_##Src(), dwordAddress(T0)); \ 2028 __ pextr(IceType_i##Size, Encoded_GPR_##GPR(), Encoded_Xmm_##Src(), \ 2029 Immediate(Imm)); \ 2030 \ 2031 AssembledTest test = assemble(); \ 2032 test.setDqwordTo(T0, V0); \ 2033 test.run(); \ 2034 \ 2035 constexpr uint8_t sel = (Imm)&Mask##Size; \ 2036 ASSERT_EQ(V0.U##Size[sel], test.GPR()) << TestString; \ 2037 reset(); \ 2038 } while (0) 2039 2040#define TestPextrSize(GPR, Src, Value1, Imm, Size) \ 2041 do { \ 2042 TestPextrGPRXmmImm(GPR, Src, (uint64_t(0xAAAAAAAABBBBBBBBull), \ 2043 uint64_t(0xFFFFFFFFDDDDDDDDull)), \ 2044 Imm, Size); \ 2045 } while (0) 2046 2047#define TestPextr(Src, Dst) \ 2048 do { \ 2049 TestPextrSize(Src, Dst, 0xEE, 0x03, 8); \ 2050 TestPextrSize(Src, Dst, 0xFFEE, 0x03, 16); \ 2051 TestPextrSize(Src, Dst, 0xC0FFEE, 0x03, 32); \ 2052 } while (0) 2053 2054 TestPextr(r1, xmm0); 2055 TestPextr(r2, xmm1); 2056 TestPextr(r3, xmm2); 2057 TestPextr(r4, xmm3); 2058 TestPextr(r5, xmm4); 2059 TestPextr(r6, xmm5); 2060 TestPextr(r7, xmm6); 2061 TestPextr(r8, xmm7); 2062 TestPextr(r10, xmm8); 2063 TestPextr(r11, xmm9); 2064 TestPextr(r12, xmm10); 2065 TestPextr(r13, xmm11); 2066 TestPextr(r14, xmm12); 2067 TestPextr(r15, xmm13); 2068 TestPextr(r1, xmm14); 2069 TestPextr(r2, xmm15); 2070 2071#undef TestPextr 2072#undef TestPextrSize 2073#undef TestPextrXmmGPRImm 2074} 2075 2076TEST_F(AssemblerX8664Test, Pcmpeq_Pcmpgt) { 2077#define TestPcmpXmmXmm(Dst, Value0, Src, Value1, Size, Inst, Op) \ 2078 do { \ 2079 static constexpr char TestString[] = \ 2080 "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Size ", " #Op ")"; \ 2081 const uint32_t T0 = allocateDqword(); \ 2082 const Dqword V0 Value0; \ 2083 const uint32_t T1 = allocateDqword(); \ 2084 const Dqword V1 Value1; \ 2085 \ 2086 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ 2087 __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \ 2088 __ Inst(IceType_i##Size, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \ 2089 \ 2090 AssembledTest test = assemble(); \ 2091 test.setDqwordTo(T0, V0); \ 2092 test.setDqwordTo(T1, V1); \ 2093 test.run(); \ 2094 \ 2095 Dqword Expected(uint64_t(0), uint64_t(0)); \ 2096 static constexpr uint8_t ArraySize = \ 2097 sizeof(Dqword) / sizeof(uint##Size##_t); \ 2098 for (uint8_t i = 0; i < ArraySize; ++i) { \ 2099 Expected.I##Size[i] = (V1.I##Size[i] Op V0.I##Size[i]) ? -1 : 0; \ 2100 } \ 2101 ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \ 2102 reset(); \ 2103 } while (0) 2104 2105#define TestPcmpXmmAddr(Dst, Value0, Value1, Size, Inst, Op) \ 2106 do { \ 2107 static constexpr char TestString[] = \ 2108 "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Size ", " #Op ")"; \ 2109 const uint32_t T0 = allocateDqword(); \ 2110 const Dqword V0 Value0; \ 2111 const uint32_t T1 = allocateDqword(); \ 2112 const Dqword V1 Value1; \ 2113 \ 2114 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ 2115 __ Inst(IceType_i##Size, Encoded_Xmm_##Dst(), dwordAddress(T1)); \ 2116 \ 2117 AssembledTest test = assemble(); \ 2118 test.setDqwordTo(T0, V0); \ 2119 test.setDqwordTo(T1, V1); \ 2120 test.run(); \ 2121 \ 2122 Dqword Expected(uint64_t(0), uint64_t(0)); \ 2123 static constexpr uint8_t ArraySize = \ 2124 sizeof(Dqword) / sizeof(uint##Size##_t); \ 2125 for (uint8_t i = 0; i < ArraySize; ++i) { \ 2126 Expected.I##Size[i] = (V1.I##Size[i] Op V0.I##Size[i]) ? -1 : 0; \ 2127 } \ 2128 ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \ 2129 reset(); \ 2130 } while (0) 2131 2132#define TestPcmpValues(Dst, Value0, Src, Value1, Size) \ 2133 do { \ 2134 TestPcmpXmmXmm(Dst, Value0, Src, Value1, Size, pcmpeq, == ); \ 2135 TestPcmpXmmAddr(Dst, Value0, Value1, Size, pcmpeq, == ); \ 2136 TestPcmpXmmXmm(Dst, Value0, Src, Value1, Size, pcmpgt, < ); \ 2137 TestPcmpXmmAddr(Dst, Value0, Value1, Size, pcmpgt, < ); \ 2138 } while (0) 2139 2140#define TestPcmpSize(Dst, Src, Size) \ 2141 do { \ 2142 TestPcmpValues(Dst, (uint64_t(0x8888888888888888ull), \ 2143 uint64_t(0x0000000000000000ull)), \ 2144 Src, (uint64_t(0x0000008800008800ull), \ 2145 uint64_t(0xFFFFFFFFFFFFFFFFull)), \ 2146 Size); \ 2147 TestPcmpValues(Dst, (uint64_t(0x123567ABAB55DE01ull), \ 2148 uint64_t(0x12345abcde12345Aull)), \ 2149 Src, (uint64_t(0x0000008800008800ull), \ 2150 uint64_t(0xAABBCCDD1234321Aull)), \ 2151 Size); \ 2152 } while (0) 2153 2154#define TestPcmp(Dst, Src) \ 2155 do { \ 2156 TestPcmpSize(xmm0, xmm1, 8); \ 2157 TestPcmpSize(xmm0, xmm1, 16); \ 2158 TestPcmpSize(xmm0, xmm1, 32); \ 2159 } while (0) 2160 2161 TestPcmp(xmm0, xmm1); 2162 TestPcmp(xmm1, xmm2); 2163 TestPcmp(xmm2, xmm3); 2164 TestPcmp(xmm3, xmm4); 2165 TestPcmp(xmm4, xmm5); 2166 TestPcmp(xmm5, xmm6); 2167 TestPcmp(xmm6, xmm7); 2168 TestPcmp(xmm7, xmm8); 2169 TestPcmp(xmm8, xmm9); 2170 TestPcmp(xmm9, xmm10); 2171 TestPcmp(xmm10, xmm11); 2172 TestPcmp(xmm11, xmm12); 2173 TestPcmp(xmm12, xmm13); 2174 TestPcmp(xmm13, xmm14); 2175 TestPcmp(xmm14, xmm15); 2176 TestPcmp(xmm15, xmm0); 2177 2178#undef TestPcmp 2179#undef TestPcmpSize 2180#undef TestPcmpValues 2181#undef TestPcmpXmmAddr 2182#undef TestPcmpXmmXmm 2183} 2184 2185TEST_F(AssemblerX8664Test, Roundsd) { 2186#define TestRoundsdXmmXmm(Dst, Src, Mode, Input, RN) \ 2187 do { \ 2188 static constexpr char TestString[] = \ 2189 "(" #Dst ", " #Src ", " #Mode ", " #Input ", " #RN ")"; \ 2190 const uint32_t T0 = allocateDqword(); \ 2191 const Dqword V0(-3.0, -3.0); \ 2192 const uint32_t T1 = allocateDqword(); \ 2193 const Dqword V1(double(Input), -123.4); \ 2194 \ 2195 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ 2196 __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \ 2197 __ round(IceType_f64, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src(), \ 2198 Immediate(AssemblerX8664::k##Mode)); \ 2199 \ 2200 AssembledTest test = assemble(); \ 2201 test.setDqwordTo(T0, V0); \ 2202 test.setDqwordTo(T1, V1); \ 2203 test.run(); \ 2204 \ 2205 const Dqword Expected(double(RN), -3.0); \ 2206 EXPECT_EQ(Expected, test.Dst<Dqword>()) << TestString; \ 2207 reset(); \ 2208 } while (0) 2209 2210#define TestRoundsd(Dst, Src) \ 2211 do { \ 2212 TestRoundsdXmmXmm(Dst, Src, RoundToNearest, 5.51, 6); \ 2213 TestRoundsdXmmXmm(Dst, Src, RoundToNearest, 5.49, 5); \ 2214 TestRoundsdXmmXmm(Dst, Src, RoundDown, 5.51, 5); \ 2215 TestRoundsdXmmXmm(Dst, Src, RoundUp, 5.49, 6); \ 2216 TestRoundsdXmmXmm(Dst, Src, RoundToZero, 5.49, 5); \ 2217 TestRoundsdXmmXmm(Dst, Src, RoundToZero, 5.51, 5); \ 2218 } while (0) 2219 2220 TestRoundsd(xmm0, xmm1); 2221 TestRoundsd(xmm1, xmm2); 2222 TestRoundsd(xmm2, xmm3); 2223 TestRoundsd(xmm3, xmm4); 2224 TestRoundsd(xmm4, xmm5); 2225 TestRoundsd(xmm5, xmm6); 2226 TestRoundsd(xmm6, xmm7); 2227 TestRoundsd(xmm7, xmm8); 2228 TestRoundsd(xmm8, xmm9); 2229 TestRoundsd(xmm9, xmm10); 2230 TestRoundsd(xmm10, xmm11); 2231 TestRoundsd(xmm11, xmm12); 2232 TestRoundsd(xmm12, xmm13); 2233 TestRoundsd(xmm13, xmm14); 2234 TestRoundsd(xmm14, xmm15); 2235 TestRoundsd(xmm15, xmm0); 2236 2237#undef TestRoundsd 2238#undef TestRoundsdXmmXmm 2239} 2240 2241TEST_F(AssemblerX8664Test, Set1ps) { 2242#define TestImpl(Xmm, Src, Imm) \ 2243 do { \ 2244 __ set1ps(Encoded_Xmm_##Xmm(), Encoded_GPR_##Src(), Immediate(Imm)); \ 2245 \ 2246 AssembledTest test = assemble(); \ 2247 test.run(); \ 2248 \ 2249 const Dqword Expected((uint64_t(Imm) << 32) | uint32_t(Imm), \ 2250 (uint64_t(Imm) << 32) | uint32_t(Imm)); \ 2251 ASSERT_EQ(Expected, test.Xmm<Dqword>()) \ 2252 << "(" #Xmm ", " #Src ", " #Imm ")"; \ 2253 reset(); \ 2254 } while (0) 2255 2256 TestImpl(xmm0, r1, 1); 2257 TestImpl(xmm1, r2, 12); 2258 TestImpl(xmm2, r3, 22); 2259 TestImpl(xmm3, r4, 54); 2260 TestImpl(xmm4, r5, 80); 2261 TestImpl(xmm5, r6, 32); 2262 TestImpl(xmm6, r7, 55); 2263 TestImpl(xmm7, r8, 44); 2264 TestImpl(xmm8, r10, 10); 2265 TestImpl(xmm9, r11, 155); 2266 TestImpl(xmm10, r12, 165); 2267 TestImpl(xmm11, r13, 170); 2268 TestImpl(xmm12, r14, 200); 2269 TestImpl(xmm13, r15, 124); 2270 TestImpl(xmm14, r1, 101); 2271 TestImpl(xmm15, r2, 166); 2272 2273#undef TestImpl 2274} 2275 2276} // end of anonymous namespace 2277} // end of namespace Test 2278} // end of namespace X8664 2279} // end of namespace Ice 2280