1// Copyright 2015, ARM Limited 2// All rights reserved. 3// 4// Redistribution and use in source and binary forms, with or without 5// modification, are permitted provided that the following conditions are met: 6// 7// * Redistributions of source code must retain the above copyright notice, 8// this list of conditions and the following disclaimer. 9// * Redistributions in binary form must reproduce the above copyright notice, 10// this list of conditions and the following disclaimer in the documentation 11// and/or other materials provided with the distribution. 12// * Neither the name of ARM Limited nor the names of its contributors may be 13// used to endorse or promote products derived from this software without 14// specific prior written permission. 15// 16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND 17// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 20// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 27#include <stdio.h> 28#include <float.h> 29 30#include "test-runner.h" 31#include "test-utils-a64.h" 32#include "test-simulator-inputs-a64.h" 33#include "test-simulator-traces-a64.h" 34#include "vixl/a64/macro-assembler-a64.h" 35#include "vixl/a64/simulator-a64.h" 36 37namespace vixl { 38 39// ==== Simulator Tests ==== 40// 41// These simulator tests check instruction behaviour against a trace taken from 42// real AArch64 hardware. The same test code is used to generate the trace; the 43// results are printed to stdout when the test is run with --sim_test_trace. 44// 45// The input lists and expected results are stored in test/traces. The expected 46// results can be regenerated using tools/generate_simulator_traces.py. Adding 47// a test for a new instruction is described at the top of 48// test-simulator-traces-a64.h. 49 50#define __ masm. 51#define TEST(name) TEST_(SIM_##name) 52 53#define BUF_SIZE (256) 54 55#ifdef USE_SIMULATOR 56 57#define SETUP() \ 58 MacroAssembler masm(BUF_SIZE); \ 59 Decoder decoder; \ 60 Simulator* simulator = Test::run_debugger() ? new Debugger(&decoder) \ 61 : new Simulator(&decoder); \ 62 simulator->set_coloured_trace(Test::coloured_trace()); \ 63 simulator->set_instruction_stats(Test::instruction_stats()); \ 64 65#define START() \ 66 masm.Reset(); \ 67 simulator->ResetState(); \ 68 __ PushCalleeSavedRegisters(); \ 69 if (Test::trace_reg()) { \ 70 __ Trace(LOG_STATE, TRACE_ENABLE); \ 71 } \ 72 if (Test::trace_write()) { \ 73 __ Trace(LOG_WRITE, TRACE_ENABLE); \ 74 } \ 75 if (Test::trace_sim()) { \ 76 __ Trace(LOG_DISASM, TRACE_ENABLE); \ 77 } \ 78 if (Test::instruction_stats()) { \ 79 __ EnableInstrumentation(); \ 80 } 81 82#define END() \ 83 if (Test::instruction_stats()) { \ 84 __ DisableInstrumentation(); \ 85 } \ 86 __ Trace(LOG_ALL, TRACE_DISABLE); \ 87 __ PopCalleeSavedRegisters(); \ 88 __ Ret(); \ 89 masm.FinalizeCode() 90 91#define RUN() \ 92 simulator->RunFrom(masm.GetStartAddress<Instruction*>()) 93 94#define TEARDOWN() \ 95 delete simulator; 96 97#else // USE_SIMULATOR 98 99#define SETUP() \ 100 MacroAssembler masm(BUF_SIZE); \ 101 CPU::SetUp() 102 103#define START() \ 104 masm.Reset(); \ 105 __ PushCalleeSavedRegisters() 106 107#define END() \ 108 __ PopCalleeSavedRegisters(); \ 109 __ Ret(); \ 110 masm.FinalizeCode() 111 112#define RUN() \ 113 { \ 114 byte* buffer_start = masm.GetStartAddress<byte*>(); \ 115 size_t buffer_length = masm.CursorOffset(); \ 116 void (*test_function)(void); \ 117 \ 118 CPU::EnsureIAndDCacheCoherency(buffer_start, buffer_length); \ 119 VIXL_STATIC_ASSERT(sizeof(buffer_start) == sizeof(test_function)); \ 120 memcpy(&test_function, &buffer_start, sizeof(buffer_start)); \ 121 test_function(); \ 122 } 123 124#define TEARDOWN() 125 126#endif // USE_SIMULATOR 127 128 129// The maximum number of errors to report in detail for each test. 130static const unsigned kErrorReportLimit = 8; 131 132 133// Overloaded versions of rawbits_to_double and rawbits_to_float for use in the 134// templated test functions. 135static float rawbits_to_fp(uint32_t bits) { 136 return rawbits_to_float(bits); 137} 138 139static double rawbits_to_fp(uint64_t bits) { 140 return rawbits_to_double(bits); 141} 142 143 144// MacroAssembler member function pointers to pass to the test dispatchers. 145typedef void (MacroAssembler::*Test1OpFPHelper_t)(const FPRegister& fd, 146 const FPRegister& fn); 147typedef void (MacroAssembler::*Test2OpFPHelper_t)(const FPRegister& fd, 148 const FPRegister& fn, 149 const FPRegister& fm); 150typedef void (MacroAssembler::*Test3OpFPHelper_t)(const FPRegister& fd, 151 const FPRegister& fn, 152 const FPRegister& fm, 153 const FPRegister& fa); 154typedef void (MacroAssembler::*TestFPCmpHelper_t)(const FPRegister& fn, 155 const FPRegister& fm); 156typedef void (MacroAssembler::*TestFPCmpZeroHelper_t)(const FPRegister& fn, 157 double value); 158typedef void (MacroAssembler::*TestFPToIntHelper_t)(const Register& rd, 159 const FPRegister& fn); 160typedef void (MacroAssembler::*TestFPToFixedHelper_t)(const Register& rd, 161 const FPRegister& fn, 162 int fbits); 163typedef void (MacroAssembler::*TestFixedToFPHelper_t)(const FPRegister& fd, 164 const Register& rn, 165 int fbits); 166// TODO: 'Test2OpNEONHelper_t' and 'Test2OpFPHelper_t' can be 167// consolidated into one routine. 168typedef void (MacroAssembler::*Test1OpNEONHelper_t)( 169 const VRegister& vd, const VRegister& vn); 170typedef void (MacroAssembler::*Test2OpNEONHelper_t)( 171 const VRegister& vd, const VRegister& vn, const VRegister& vm); 172typedef void (MacroAssembler::*TestByElementNEONHelper_t)( 173 const VRegister& vd, const VRegister& vn, const VRegister& vm, int vm_index); 174typedef void (MacroAssembler::*TestOpImmOpImmVdUpdateNEONHelper_t)( 175 const VRegister& vd, int imm1, const VRegister& vn, int imm2); 176 177// This helps using the same typename for both the function pointer 178// and the array of immediates passed to helper routines. 179template <typename T> 180class Test2OpImmediateNEONHelper_t { 181 public: 182 typedef void (MacroAssembler::*mnemonic)( 183 const VRegister& vd, const VRegister& vn, T imm); 184}; 185 186 187// Standard test dispatchers. 188 189 190static void Test1Op_Helper(Test1OpFPHelper_t helper, uintptr_t inputs, 191 unsigned inputs_length, uintptr_t results, 192 unsigned d_size, unsigned n_size) { 193 VIXL_ASSERT((d_size == kDRegSize) || (d_size == kSRegSize)); 194 VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize)); 195 196 SETUP(); 197 START(); 198 199 // Roll up the loop to keep the code size down. 200 Label loop_n; 201 202 Register out = x0; 203 Register inputs_base = x1; 204 Register length = w2; 205 Register index_n = w3; 206 207 const int n_index_shift = 208 (n_size == kDRegSize) ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2; 209 210 FPRegister fd = (d_size == kDRegSize) ? d0 : s0; 211 FPRegister fn = (n_size == kDRegSize) ? d1 : s1; 212 213 __ Mov(out, results); 214 __ Mov(inputs_base, inputs); 215 __ Mov(length, inputs_length); 216 217 __ Mov(index_n, 0); 218 __ Bind(&loop_n); 219 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift)); 220 221 { 222 SingleEmissionCheckScope guard(&masm); 223 (masm.*helper)(fd, fn); 224 } 225 __ Str(fd, MemOperand(out, fd.SizeInBytes(), PostIndex)); 226 227 __ Add(index_n, index_n, 1); 228 __ Cmp(index_n, inputs_length); 229 __ B(lo, &loop_n); 230 231 END(); 232 RUN(); 233 TEARDOWN(); 234} 235 236 237// Test FP instructions. The inputs[] and expected[] arrays should be arrays of 238// rawbits representations of doubles or floats. This ensures that exact bit 239// comparisons can be performed. 240template <typename Tn, typename Td> 241static void Test1Op(const char * name, Test1OpFPHelper_t helper, 242 const Tn inputs[], unsigned inputs_length, 243 const Td expected[], unsigned expected_length) { 244 VIXL_ASSERT(inputs_length > 0); 245 246 const unsigned results_length = inputs_length; 247 Td * results = new Td[results_length]; 248 249 const unsigned d_bits = sizeof(Td) * 8; 250 const unsigned n_bits = sizeof(Tn) * 8; 251 252 Test1Op_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length, 253 reinterpret_cast<uintptr_t>(results), d_bits, n_bits); 254 255 if (Test::sim_test_trace()) { 256 // Print the results. 257 printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name); 258 for (unsigned d = 0; d < results_length; d++) { 259 printf(" 0x%0*" PRIx64 ",\n", 260 d_bits / 4, static_cast<uint64_t>(results[d])); 261 } 262 printf("};\n"); 263 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length); 264 } else { 265 // Check the results. 266 VIXL_CHECK(expected_length == results_length); 267 unsigned error_count = 0; 268 unsigned d = 0; 269 for (unsigned n = 0; n < inputs_length; n++, d++) { 270 if (results[d] != expected[d]) { 271 if (++error_count > kErrorReportLimit) continue; 272 273 printf("%s 0x%0*" PRIx64 " (%s %g):\n", 274 name, n_bits / 4, static_cast<uint64_t>(inputs[n]), 275 name, rawbits_to_fp(inputs[n])); 276 printf(" Expected: 0x%0*" PRIx64 " (%g)\n", 277 d_bits / 4, static_cast<uint64_t>(expected[d]), 278 rawbits_to_fp(expected[d])); 279 printf(" Found: 0x%0*" PRIx64 " (%g)\n", 280 d_bits / 4, static_cast<uint64_t>(results[d]), 281 rawbits_to_fp(results[d])); 282 printf("\n"); 283 } 284 } 285 VIXL_ASSERT(d == expected_length); 286 if (error_count > kErrorReportLimit) { 287 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 288 } 289 VIXL_CHECK(error_count == 0); 290 } 291 delete[] results; 292} 293 294 295static void Test2Op_Helper(Test2OpFPHelper_t helper, 296 uintptr_t inputs, unsigned inputs_length, 297 uintptr_t results, unsigned reg_size) { 298 VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize)); 299 300 SETUP(); 301 START(); 302 303 // Roll up the loop to keep the code size down. 304 Label loop_n, loop_m; 305 306 Register out = x0; 307 Register inputs_base = x1; 308 Register length = w2; 309 Register index_n = w3; 310 Register index_m = w4; 311 312 bool double_op = reg_size == kDRegSize; 313 const int index_shift = 314 double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2; 315 316 FPRegister fd = double_op ? d0 : s0; 317 FPRegister fn = double_op ? d1 : s1; 318 FPRegister fm = double_op ? d2 : s2; 319 320 __ Mov(out, results); 321 __ Mov(inputs_base, inputs); 322 __ Mov(length, inputs_length); 323 324 __ Mov(index_n, 0); 325 __ Bind(&loop_n); 326 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift)); 327 328 __ Mov(index_m, 0); 329 __ Bind(&loop_m); 330 __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift)); 331 332 { 333 SingleEmissionCheckScope guard(&masm); 334 (masm.*helper)(fd, fn, fm); 335 } 336 __ Str(fd, MemOperand(out, fd.SizeInBytes(), PostIndex)); 337 338 __ Add(index_m, index_m, 1); 339 __ Cmp(index_m, inputs_length); 340 __ B(lo, &loop_m); 341 342 __ Add(index_n, index_n, 1); 343 __ Cmp(index_n, inputs_length); 344 __ B(lo, &loop_n); 345 346 END(); 347 RUN(); 348 TEARDOWN(); 349} 350 351 352// Test FP instructions. The inputs[] and expected[] arrays should be arrays of 353// rawbits representations of doubles or floats. This ensures that exact bit 354// comparisons can be performed. 355template <typename T> 356static void Test2Op(const char * name, Test2OpFPHelper_t helper, 357 const T inputs[], unsigned inputs_length, 358 const T expected[], unsigned expected_length) { 359 VIXL_ASSERT(inputs_length > 0); 360 361 const unsigned results_length = inputs_length * inputs_length; 362 T * results = new T[results_length]; 363 364 const unsigned bits = sizeof(T) * 8; 365 366 Test2Op_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length, 367 reinterpret_cast<uintptr_t>(results), bits); 368 369 if (Test::sim_test_trace()) { 370 // Print the results. 371 printf("const uint%u_t kExpected_%s[] = {\n", bits, name); 372 for (unsigned d = 0; d < results_length; d++) { 373 printf(" 0x%0*" PRIx64 ",\n", 374 bits / 4, static_cast<uint64_t>(results[d])); 375 } 376 printf("};\n"); 377 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length); 378 } else { 379 // Check the results. 380 VIXL_CHECK(expected_length == results_length); 381 unsigned error_count = 0; 382 unsigned d = 0; 383 for (unsigned n = 0; n < inputs_length; n++) { 384 for (unsigned m = 0; m < inputs_length; m++, d++) { 385 if (results[d] != expected[d]) { 386 if (++error_count > kErrorReportLimit) continue; 387 388 printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 " (%s %g %g):\n", 389 name, 390 bits / 4, static_cast<uint64_t>(inputs[n]), 391 bits / 4, static_cast<uint64_t>(inputs[m]), 392 name, 393 rawbits_to_fp(inputs[n]), 394 rawbits_to_fp(inputs[m])); 395 printf(" Expected: 0x%0*" PRIx64 " (%g)\n", 396 bits / 4, static_cast<uint64_t>(expected[d]), 397 rawbits_to_fp(expected[d])); 398 printf(" Found: 0x%0*" PRIx64 " (%g)\n", 399 bits / 4, static_cast<uint64_t>(results[d]), 400 rawbits_to_fp(results[d])); 401 printf("\n"); 402 } 403 } 404 } 405 VIXL_ASSERT(d == expected_length); 406 if (error_count > kErrorReportLimit) { 407 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 408 } 409 VIXL_CHECK(error_count == 0); 410 } 411 delete[] results; 412} 413 414 415static void Test3Op_Helper(Test3OpFPHelper_t helper, 416 uintptr_t inputs, unsigned inputs_length, 417 uintptr_t results, unsigned reg_size) { 418 VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize)); 419 420 SETUP(); 421 START(); 422 423 // Roll up the loop to keep the code size down. 424 Label loop_n, loop_m, loop_a; 425 426 Register out = x0; 427 Register inputs_base = x1; 428 Register length = w2; 429 Register index_n = w3; 430 Register index_m = w4; 431 Register index_a = w5; 432 433 bool double_op = reg_size == kDRegSize; 434 const int index_shift = 435 double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2; 436 437 FPRegister fd = double_op ? d0 : s0; 438 FPRegister fn = double_op ? d1 : s1; 439 FPRegister fm = double_op ? d2 : s2; 440 FPRegister fa = double_op ? d3 : s3; 441 442 __ Mov(out, results); 443 __ Mov(inputs_base, inputs); 444 __ Mov(length, inputs_length); 445 446 __ Mov(index_n, 0); 447 __ Bind(&loop_n); 448 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift)); 449 450 __ Mov(index_m, 0); 451 __ Bind(&loop_m); 452 __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift)); 453 454 __ Mov(index_a, 0); 455 __ Bind(&loop_a); 456 __ Ldr(fa, MemOperand(inputs_base, index_a, UXTW, index_shift)); 457 458 { 459 SingleEmissionCheckScope guard(&masm); 460 (masm.*helper)(fd, fn, fm, fa); 461 } 462 __ Str(fd, MemOperand(out, fd.SizeInBytes(), PostIndex)); 463 464 __ Add(index_a, index_a, 1); 465 __ Cmp(index_a, inputs_length); 466 __ B(lo, &loop_a); 467 468 __ Add(index_m, index_m, 1); 469 __ Cmp(index_m, inputs_length); 470 __ B(lo, &loop_m); 471 472 __ Add(index_n, index_n, 1); 473 __ Cmp(index_n, inputs_length); 474 __ B(lo, &loop_n); 475 476 END(); 477 RUN(); 478 TEARDOWN(); 479} 480 481 482// Test FP instructions. The inputs[] and expected[] arrays should be arrays of 483// rawbits representations of doubles or floats. This ensures that exact bit 484// comparisons can be performed. 485template <typename T> 486static void Test3Op(const char * name, Test3OpFPHelper_t helper, 487 const T inputs[], unsigned inputs_length, 488 const T expected[], unsigned expected_length) { 489 VIXL_ASSERT(inputs_length > 0); 490 491 const unsigned results_length = inputs_length * inputs_length * inputs_length; 492 T * results = new T[results_length]; 493 494 const unsigned bits = sizeof(T) * 8; 495 496 Test3Op_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length, 497 reinterpret_cast<uintptr_t>(results), bits); 498 499 if (Test::sim_test_trace()) { 500 // Print the results. 501 printf("const uint%u_t kExpected_%s[] = {\n", bits, name); 502 for (unsigned d = 0; d < results_length; d++) { 503 printf(" 0x%0*" PRIx64 ",\n", 504 bits / 4, static_cast<uint64_t>(results[d])); 505 } 506 printf("};\n"); 507 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length); 508 } else { 509 // Check the results. 510 VIXL_CHECK(expected_length == results_length); 511 unsigned error_count = 0; 512 unsigned d = 0; 513 for (unsigned n = 0; n < inputs_length; n++) { 514 for (unsigned m = 0; m < inputs_length; m++) { 515 for (unsigned a = 0; a < inputs_length; a++, d++) { 516 if (results[d] != expected[d]) { 517 if (++error_count > kErrorReportLimit) continue; 518 519 printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 ", 0x%0*" PRIx64 520 " (%s %g %g %g):\n", 521 name, 522 bits / 4, static_cast<uint64_t>(inputs[n]), 523 bits / 4, static_cast<uint64_t>(inputs[m]), 524 bits / 4, static_cast<uint64_t>(inputs[a]), 525 name, 526 rawbits_to_fp(inputs[n]), 527 rawbits_to_fp(inputs[m]), 528 rawbits_to_fp(inputs[a])); 529 printf(" Expected: 0x%0*" PRIx64 " (%g)\n", 530 bits / 4, static_cast<uint64_t>(expected[d]), 531 rawbits_to_fp(expected[d])); 532 printf(" Found: 0x%0*" PRIx64 " (%g)\n", 533 bits / 4, static_cast<uint64_t>(results[d]), 534 rawbits_to_fp(results[d])); 535 printf("\n"); 536 } 537 } 538 } 539 } 540 VIXL_ASSERT(d == expected_length); 541 if (error_count > kErrorReportLimit) { 542 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 543 } 544 VIXL_CHECK(error_count == 0); 545 } 546 delete[] results; 547} 548 549 550static void TestCmp_Helper(TestFPCmpHelper_t helper, 551 uintptr_t inputs, unsigned inputs_length, 552 uintptr_t results, unsigned reg_size) { 553 VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize)); 554 555 SETUP(); 556 START(); 557 558 // Roll up the loop to keep the code size down. 559 Label loop_n, loop_m; 560 561 Register out = x0; 562 Register inputs_base = x1; 563 Register length = w2; 564 Register index_n = w3; 565 Register index_m = w4; 566 Register flags = x5; 567 568 bool double_op = reg_size == kDRegSize; 569 const int index_shift = 570 double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2; 571 572 FPRegister fn = double_op ? d1 : s1; 573 FPRegister fm = double_op ? d2 : s2; 574 575 __ Mov(out, results); 576 __ Mov(inputs_base, inputs); 577 __ Mov(length, inputs_length); 578 579 __ Mov(index_n, 0); 580 __ Bind(&loop_n); 581 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift)); 582 583 __ Mov(index_m, 0); 584 __ Bind(&loop_m); 585 __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift)); 586 587 { 588 SingleEmissionCheckScope guard(&masm); 589 (masm.*helper)(fn, fm); 590 } 591 __ Mrs(flags, NZCV); 592 __ Ubfx(flags, flags, 28, 4); 593 __ Strb(flags, MemOperand(out, 1, PostIndex)); 594 595 __ Add(index_m, index_m, 1); 596 __ Cmp(index_m, inputs_length); 597 __ B(lo, &loop_m); 598 599 __ Add(index_n, index_n, 1); 600 __ Cmp(index_n, inputs_length); 601 __ B(lo, &loop_n); 602 603 END(); 604 RUN(); 605 TEARDOWN(); 606} 607 608 609// Test FP instructions. The inputs[] and expected[] arrays should be arrays of 610// rawbits representations of doubles or floats. This ensures that exact bit 611// comparisons can be performed. 612template <typename T> 613static void TestCmp(const char * name, TestFPCmpHelper_t helper, 614 const T inputs[], unsigned inputs_length, 615 const uint8_t expected[], unsigned expected_length) { 616 VIXL_ASSERT(inputs_length > 0); 617 618 const unsigned results_length = inputs_length * inputs_length; 619 uint8_t * results = new uint8_t[results_length]; 620 621 const unsigned bits = sizeof(T) * 8; 622 623 TestCmp_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length, 624 reinterpret_cast<uintptr_t>(results), bits); 625 626 if (Test::sim_test_trace()) { 627 // Print the results. 628 printf("const uint8_t kExpected_%s[] = {\n", name); 629 for (unsigned d = 0; d < results_length; d++) { 630 // Each NZCV result only requires 4 bits. 631 VIXL_ASSERT((results[d] & 0xf) == results[d]); 632 printf(" 0x%" PRIx8 ",\n", results[d]); 633 } 634 printf("};\n"); 635 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length); 636 } else { 637 // Check the results. 638 VIXL_CHECK(expected_length == results_length); 639 unsigned error_count = 0; 640 unsigned d = 0; 641 for (unsigned n = 0; n < inputs_length; n++) { 642 for (unsigned m = 0; m < inputs_length; m++, d++) { 643 if (results[d] != expected[d]) { 644 if (++error_count > kErrorReportLimit) continue; 645 646 printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 " (%s %g %g):\n", 647 name, 648 bits / 4, static_cast<uint64_t>(inputs[n]), 649 bits / 4, static_cast<uint64_t>(inputs[m]), 650 name, 651 rawbits_to_fp(inputs[n]), 652 rawbits_to_fp(inputs[m])); 653 printf(" Expected: %c%c%c%c (0x%" PRIx8 ")\n", 654 (expected[d] & 0x8) ? 'N' : 'n', 655 (expected[d] & 0x4) ? 'Z' : 'z', 656 (expected[d] & 0x2) ? 'C' : 'c', 657 (expected[d] & 0x1) ? 'V' : 'v', 658 expected[d]); 659 printf(" Found: %c%c%c%c (0x%" PRIx8 ")\n", 660 (results[d] & 0x8) ? 'N' : 'n', 661 (results[d] & 0x4) ? 'Z' : 'z', 662 (results[d] & 0x2) ? 'C' : 'c', 663 (results[d] & 0x1) ? 'V' : 'v', 664 results[d]); 665 printf("\n"); 666 } 667 } 668 } 669 VIXL_ASSERT(d == expected_length); 670 if (error_count > kErrorReportLimit) { 671 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 672 } 673 VIXL_CHECK(error_count == 0); 674 } 675 delete[] results; 676} 677 678 679static void TestCmpZero_Helper(TestFPCmpZeroHelper_t helper, 680 uintptr_t inputs, unsigned inputs_length, 681 uintptr_t results, unsigned reg_size) { 682 VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize)); 683 684 SETUP(); 685 START(); 686 687 // Roll up the loop to keep the code size down. 688 Label loop_n, loop_m; 689 690 Register out = x0; 691 Register inputs_base = x1; 692 Register length = w2; 693 Register index_n = w3; 694 Register flags = x4; 695 696 bool double_op = reg_size == kDRegSize; 697 const int index_shift = 698 double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2; 699 700 FPRegister fn = double_op ? d1 : s1; 701 702 __ Mov(out, results); 703 __ Mov(inputs_base, inputs); 704 __ Mov(length, inputs_length); 705 706 __ Mov(index_n, 0); 707 __ Bind(&loop_n); 708 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift)); 709 710 { 711 SingleEmissionCheckScope guard(&masm); 712 (masm.*helper)(fn, 0.0); 713 } 714 __ Mrs(flags, NZCV); 715 __ Ubfx(flags, flags, 28, 4); 716 __ Strb(flags, MemOperand(out, 1, PostIndex)); 717 718 __ Add(index_n, index_n, 1); 719 __ Cmp(index_n, inputs_length); 720 __ B(lo, &loop_n); 721 722 END(); 723 RUN(); 724 TEARDOWN(); 725} 726 727 728// Test FP instructions. The inputs[] and expected[] arrays should be arrays of 729// rawbits representations of doubles or floats. This ensures that exact bit 730// comparisons can be performed. 731template <typename T> 732static void TestCmpZero(const char * name, TestFPCmpZeroHelper_t helper, 733 const T inputs[], unsigned inputs_length, 734 const uint8_t expected[], unsigned expected_length) { 735 VIXL_ASSERT(inputs_length > 0); 736 737 const unsigned results_length = inputs_length; 738 uint8_t * results = new uint8_t[results_length]; 739 740 const unsigned bits = sizeof(T) * 8; 741 742 TestCmpZero_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length, 743 reinterpret_cast<uintptr_t>(results), bits); 744 745 if (Test::sim_test_trace()) { 746 // Print the results. 747 printf("const uint8_t kExpected_%s[] = {\n", name); 748 for (unsigned d = 0; d < results_length; d++) { 749 // Each NZCV result only requires 4 bits. 750 VIXL_ASSERT((results[d] & 0xf) == results[d]); 751 printf(" 0x%" PRIx8 ",\n", results[d]); 752 } 753 printf("};\n"); 754 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length); 755 } else { 756 // Check the results. 757 VIXL_CHECK(expected_length == results_length); 758 unsigned error_count = 0; 759 unsigned d = 0; 760 for (unsigned n = 0; n < inputs_length; n++, d++) { 761 if (results[d] != expected[d]) { 762 if (++error_count > kErrorReportLimit) continue; 763 764 printf("%s 0x%0*" PRIx64 ", 0x%0*u (%s %g #0.0):\n", 765 name, 766 bits / 4, static_cast<uint64_t>(inputs[n]), 767 bits / 4, 0, 768 name, 769 rawbits_to_fp(inputs[n])); 770 printf(" Expected: %c%c%c%c (0x%" PRIx8 ")\n", 771 (expected[d] & 0x8) ? 'N' : 'n', 772 (expected[d] & 0x4) ? 'Z' : 'z', 773 (expected[d] & 0x2) ? 'C' : 'c', 774 (expected[d] & 0x1) ? 'V' : 'v', 775 expected[d]); 776 printf(" Found: %c%c%c%c (0x%" PRIx8 ")\n", 777 (results[d] & 0x8) ? 'N' : 'n', 778 (results[d] & 0x4) ? 'Z' : 'z', 779 (results[d] & 0x2) ? 'C' : 'c', 780 (results[d] & 0x1) ? 'V' : 'v', 781 results[d]); 782 printf("\n"); 783 } 784 } 785 VIXL_ASSERT(d == expected_length); 786 if (error_count > kErrorReportLimit) { 787 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 788 } 789 VIXL_CHECK(error_count == 0); 790 } 791 delete[] results; 792} 793 794 795static void TestFPToFixed_Helper(TestFPToFixedHelper_t helper, 796 uintptr_t inputs, unsigned inputs_length, 797 uintptr_t results, 798 unsigned d_size, unsigned n_size) { 799 VIXL_ASSERT((d_size == kXRegSize) || (d_size == kWRegSize)); 800 VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize)); 801 802 SETUP(); 803 START(); 804 805 // Roll up the loop to keep the code size down. 806 Label loop_n; 807 808 Register out = x0; 809 Register inputs_base = x1; 810 Register length = w2; 811 Register index_n = w3; 812 813 const int n_index_shift = 814 (n_size == kDRegSize) ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2; 815 816 Register rd = (d_size == kXRegSize) ? x10 : w10; 817 FPRegister fn = (n_size == kDRegSize) ? d1 : s1; 818 819 __ Mov(out, results); 820 __ Mov(inputs_base, inputs); 821 __ Mov(length, inputs_length); 822 823 __ Mov(index_n, 0); 824 __ Bind(&loop_n); 825 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift)); 826 827 for (unsigned fbits = 0; fbits <= d_size; ++fbits) { 828 { 829 SingleEmissionCheckScope guard(&masm); 830 (masm.*helper)(rd, fn, fbits); 831 } 832 __ Str(rd, MemOperand(out, rd.SizeInBytes(), PostIndex)); 833 } 834 835 __ Add(index_n, index_n, 1); 836 __ Cmp(index_n, inputs_length); 837 __ B(lo, &loop_n); 838 839 END(); 840 RUN(); 841 TEARDOWN(); 842} 843 844 845static void TestFPToInt_Helper(TestFPToIntHelper_t helper, uintptr_t inputs, 846 unsigned inputs_length, uintptr_t results, 847 unsigned d_size, unsigned n_size) { 848 VIXL_ASSERT((d_size == kXRegSize) || (d_size == kWRegSize)); 849 VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize)); 850 851 SETUP(); 852 START(); 853 854 // Roll up the loop to keep the code size down. 855 Label loop_n; 856 857 Register out = x0; 858 Register inputs_base = x1; 859 Register length = w2; 860 Register index_n = w3; 861 862 const int n_index_shift = 863 (n_size == kDRegSize) ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2; 864 865 Register rd = (d_size == kXRegSize) ? x10 : w10; 866 FPRegister fn = (n_size == kDRegSize) ? d1 : s1; 867 868 __ Mov(out, results); 869 __ Mov(inputs_base, inputs); 870 __ Mov(length, inputs_length); 871 872 __ Mov(index_n, 0); 873 __ Bind(&loop_n); 874 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift)); 875 876 { 877 SingleEmissionCheckScope guard(&masm); 878 (masm.*helper)(rd, fn); 879 } 880 __ Str(rd, MemOperand(out, rd.SizeInBytes(), PostIndex)); 881 882 __ Add(index_n, index_n, 1); 883 __ Cmp(index_n, inputs_length); 884 __ B(lo, &loop_n); 885 886 END(); 887 RUN(); 888 TEARDOWN(); 889} 890 891 892// Test FP instructions. 893// - The inputs[] array should be an array of rawbits representations of 894// doubles or floats. This ensures that exact bit comparisons can be 895// performed. 896// - The expected[] array should be an array of signed integers. 897template <typename Tn, typename Td> 898static void TestFPToS(const char * name, TestFPToIntHelper_t helper, 899 const Tn inputs[], unsigned inputs_length, 900 const Td expected[], unsigned expected_length) { 901 VIXL_ASSERT(inputs_length > 0); 902 903 const unsigned results_length = inputs_length; 904 Td * results = new Td[results_length]; 905 906 const unsigned d_bits = sizeof(Td) * 8; 907 const unsigned n_bits = sizeof(Tn) * 8; 908 909 TestFPToInt_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length, 910 reinterpret_cast<uintptr_t>(results), d_bits, n_bits); 911 912 if (Test::sim_test_trace()) { 913 // Print the results. 914 printf("const int%u_t kExpected_%s[] = {\n", d_bits, name); 915 // There is no simple C++ literal for INT*_MIN that doesn't produce 916 // warnings, so we use an appropriate constant in that case instead. 917 // Deriving int_d_min in this way (rather than just checking INT64_MIN and 918 // the like) avoids warnings about comparing values with differing ranges. 919 const int64_t int_d_max = (UINT64_C(1) << (d_bits - 1)) - 1; 920 const int64_t int_d_min = -(int_d_max) - 1; 921 for (unsigned d = 0; d < results_length; d++) { 922 if (results[d] == int_d_min) { 923 printf(" -INT%u_C(%" PRId64 ") - 1,\n", d_bits, int_d_max); 924 } else { 925 // Some constants (such as those between INT32_MAX and UINT32_MAX) 926 // trigger compiler warnings. To avoid these warnings, use an 927 // appropriate macro to make the type explicit. 928 int64_t result_int64 = static_cast<int64_t>(results[d]); 929 if (result_int64 >= 0) { 930 printf(" INT%u_C(%" PRId64 "),\n", d_bits, result_int64); 931 } else { 932 printf(" -INT%u_C(%" PRId64 "),\n", d_bits, -result_int64); 933 } 934 } 935 } 936 printf("};\n"); 937 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length); 938 } else { 939 // Check the results. 940 VIXL_CHECK(expected_length == results_length); 941 unsigned error_count = 0; 942 unsigned d = 0; 943 for (unsigned n = 0; n < inputs_length; n++, d++) { 944 if (results[d] != expected[d]) { 945 if (++error_count > kErrorReportLimit) continue; 946 947 printf("%s 0x%0*" PRIx64 " (%s %g):\n", 948 name, n_bits / 4, static_cast<uint64_t>(inputs[n]), 949 name, rawbits_to_fp(inputs[n])); 950 printf(" Expected: 0x%0*" PRIx64 " (%" PRId64 ")\n", 951 d_bits / 4, static_cast<uint64_t>(expected[d]), 952 static_cast<int64_t>(expected[d])); 953 printf(" Found: 0x%0*" PRIx64 " (%" PRId64 ")\n", 954 d_bits / 4, static_cast<uint64_t>(results[d]), 955 static_cast<int64_t>(results[d])); 956 printf("\n"); 957 } 958 } 959 VIXL_ASSERT(d == expected_length); 960 if (error_count > kErrorReportLimit) { 961 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 962 } 963 VIXL_CHECK(error_count == 0); 964 } 965 delete[] results; 966} 967 968 969// Test FP instructions. 970// - The inputs[] array should be an array of rawbits representations of 971// doubles or floats. This ensures that exact bit comparisons can be 972// performed. 973// - The expected[] array should be an array of unsigned integers. 974template <typename Tn, typename Td> 975static void TestFPToU(const char * name, TestFPToIntHelper_t helper, 976 const Tn inputs[], unsigned inputs_length, 977 const Td expected[], unsigned expected_length) { 978 VIXL_ASSERT(inputs_length > 0); 979 980 const unsigned results_length = inputs_length; 981 Td * results = new Td[results_length]; 982 983 const unsigned d_bits = sizeof(Td) * 8; 984 const unsigned n_bits = sizeof(Tn) * 8; 985 986 TestFPToInt_Helper(helper, 987 reinterpret_cast<uintptr_t>(inputs), inputs_length, 988 reinterpret_cast<uintptr_t>(results), d_bits, n_bits); 989 990 if (Test::sim_test_trace()) { 991 // Print the results. 992 printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name); 993 for (unsigned d = 0; d < results_length; d++) { 994 printf(" %" PRIu64 "u,\n", static_cast<uint64_t>(results[d])); 995 } 996 printf("};\n"); 997 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length); 998 } else { 999 // Check the results. 1000 VIXL_CHECK(expected_length == results_length); 1001 unsigned error_count = 0; 1002 unsigned d = 0; 1003 for (unsigned n = 0; n < inputs_length; n++, d++) { 1004 if (results[d] != expected[d]) { 1005 if (++error_count > kErrorReportLimit) continue; 1006 1007 printf("%s 0x%0*" PRIx64 " (%s %g):\n", 1008 name, n_bits / 4, static_cast<uint64_t>(inputs[n]), 1009 name, rawbits_to_fp(inputs[n])); 1010 printf(" Expected: 0x%0*" PRIx64 " (%" PRIu64 ")\n", 1011 d_bits / 4, static_cast<uint64_t>(expected[d]), 1012 static_cast<uint64_t>(expected[d])); 1013 printf(" Found: 0x%0*" PRIx64 " (%" PRIu64 ")\n", 1014 d_bits / 4, static_cast<uint64_t>(results[d]), 1015 static_cast<uint64_t>(results[d])); 1016 printf("\n"); 1017 } 1018 } 1019 VIXL_ASSERT(d == expected_length); 1020 if (error_count > kErrorReportLimit) { 1021 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 1022 } 1023 VIXL_CHECK(error_count == 0); 1024 } 1025 delete[] results; 1026} 1027 1028 1029// Test FP instructions. 1030// - The inputs[] array should be an array of rawbits representations of 1031// doubles or floats. This ensures that exact bit comparisons can be 1032// performed. 1033// - The expected[] array should be an array of signed integers. 1034template <typename Tn, typename Td> 1035static void TestFPToFixedS(const char * name, TestFPToFixedHelper_t helper, 1036 const Tn inputs[], unsigned inputs_length, 1037 const Td expected[], unsigned expected_length) { 1038 VIXL_ASSERT(inputs_length > 0); 1039 1040 const unsigned d_bits = sizeof(Td) * 8; 1041 const unsigned n_bits = sizeof(Tn) * 8; 1042 1043 const unsigned results_length = inputs_length * (d_bits + 1); 1044 Td * results = new Td[results_length]; 1045 1046 TestFPToFixed_Helper(helper, 1047 reinterpret_cast<uintptr_t>(inputs), inputs_length, 1048 reinterpret_cast<uintptr_t>(results), d_bits, n_bits); 1049 1050 if (Test::sim_test_trace()) { 1051 // Print the results. 1052 printf("const int%u_t kExpected_%s[] = {\n", d_bits, name); 1053 // There is no simple C++ literal for INT*_MIN that doesn't produce 1054 // warnings, so we use an appropriate constant in that case instead. 1055 // Deriving int_d_min in this way (rather than just checking INT64_MIN and 1056 // the like) avoids warnings about comparing values with differing ranges. 1057 const int64_t int_d_max = (UINT64_C(1) << (d_bits - 1)) - 1; 1058 const int64_t int_d_min = -(int_d_max) - 1; 1059 for (unsigned d = 0; d < results_length; d++) { 1060 if (results[d] == int_d_min) { 1061 printf(" -INT%u_C(%" PRId64 ") - 1,\n", d_bits, int_d_max); 1062 } else { 1063 // Some constants (such as those between INT32_MAX and UINT32_MAX) 1064 // trigger compiler warnings. To avoid these warnings, use an 1065 // appropriate macro to make the type explicit. 1066 int64_t result_int64 = static_cast<int64_t>(results[d]); 1067 if (result_int64 >= 0) { 1068 printf(" INT%u_C(%" PRId64 "),\n", d_bits, result_int64); 1069 } else { 1070 printf(" -INT%u_C(%" PRId64 "),\n", d_bits, -result_int64); 1071 } 1072 } 1073 } 1074 printf("};\n"); 1075 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length); 1076 } else { 1077 // Check the results. 1078 VIXL_CHECK(expected_length == results_length); 1079 unsigned error_count = 0; 1080 unsigned d = 0; 1081 for (unsigned n = 0; n < inputs_length; n++) { 1082 for (unsigned fbits = 0; fbits <= d_bits; ++fbits, d++) { 1083 if (results[d] != expected[d]) { 1084 if (++error_count > kErrorReportLimit) continue; 1085 1086 printf("%s 0x%0*" PRIx64 " #%d (%s %g #%d):\n", 1087 name, n_bits / 4, static_cast<uint64_t>(inputs[n]), fbits, 1088 name, rawbits_to_fp(inputs[n]), fbits); 1089 printf(" Expected: 0x%0*" PRIx64 " (%" PRId64 ")\n", 1090 d_bits / 4, static_cast<uint64_t>(expected[d]), 1091 static_cast<int64_t>(expected[d])); 1092 printf(" Found: 0x%0*" PRIx64 " (%" PRId64 ")\n", 1093 d_bits / 4, static_cast<uint64_t>(results[d]), 1094 static_cast<int64_t>(results[d])); 1095 printf("\n"); 1096 } 1097 } 1098 } 1099 VIXL_ASSERT(d == expected_length); 1100 if (error_count > kErrorReportLimit) { 1101 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 1102 } 1103 VIXL_CHECK(error_count == 0); 1104 } 1105 delete[] results; 1106} 1107 1108 1109// Test FP instructions. 1110// - The inputs[] array should be an array of rawbits representations of 1111// doubles or floats. This ensures that exact bit comparisons can be 1112// performed. 1113// - The expected[] array should be an array of unsigned integers. 1114template <typename Tn, typename Td> 1115static void TestFPToFixedU(const char * name, TestFPToFixedHelper_t helper, 1116 const Tn inputs[], unsigned inputs_length, 1117 const Td expected[], unsigned expected_length) { 1118 VIXL_ASSERT(inputs_length > 0); 1119 1120 const unsigned d_bits = sizeof(Td) * 8; 1121 const unsigned n_bits = sizeof(Tn) * 8; 1122 1123 const unsigned results_length = inputs_length * (d_bits + 1); 1124 Td * results = new Td[results_length]; 1125 1126 TestFPToFixed_Helper(helper, 1127 reinterpret_cast<uintptr_t>(inputs), inputs_length, 1128 reinterpret_cast<uintptr_t>(results), d_bits, n_bits); 1129 1130 if (Test::sim_test_trace()) { 1131 // Print the results. 1132 printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name); 1133 for (unsigned d = 0; d < results_length; d++) { 1134 printf(" %" PRIu64 "u,\n", static_cast<uint64_t>(results[d])); 1135 } 1136 printf("};\n"); 1137 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length); 1138 } else { 1139 // Check the results. 1140 VIXL_CHECK(expected_length == results_length); 1141 unsigned error_count = 0; 1142 unsigned d = 0; 1143 for (unsigned n = 0; n < inputs_length; n++) { 1144 for (unsigned fbits = 0; fbits <= d_bits; ++fbits, d++) { 1145 if (results[d] != expected[d]) { 1146 if (++error_count > kErrorReportLimit) continue; 1147 1148 printf("%s 0x%0*" PRIx64 " #%d (%s %g #%d):\n", 1149 name, n_bits / 4, static_cast<uint64_t>(inputs[n]), fbits, 1150 name, rawbits_to_fp(inputs[n]), fbits); 1151 printf(" Expected: 0x%0*" PRIx64 " (%" PRIu64 ")\n", 1152 d_bits / 4, static_cast<uint64_t>(expected[d]), 1153 static_cast<uint64_t>(expected[d])); 1154 printf(" Found: 0x%0*" PRIx64 " (%" PRIu64 ")\n", 1155 d_bits / 4, static_cast<uint64_t>(results[d]), 1156 static_cast<uint64_t>(results[d])); 1157 printf("\n"); 1158 } 1159 } 1160 } 1161 VIXL_ASSERT(d == expected_length); 1162 if (error_count > kErrorReportLimit) { 1163 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 1164 } 1165 VIXL_CHECK(error_count == 0); 1166 } 1167 delete[] results; 1168} 1169 1170 1171// ==== Tests for instructions of the form <INST> VReg, VReg. ==== 1172 1173 1174static void Test1OpNEON_Helper(Test1OpNEONHelper_t helper, 1175 uintptr_t inputs_n, unsigned inputs_n_length, 1176 uintptr_t results, 1177 VectorFormat vd_form, 1178 VectorFormat vn_form) { 1179 VIXL_ASSERT(vd_form != kFormatUndefined); 1180 VIXL_ASSERT(vn_form != kFormatUndefined); 1181 1182 SETUP(); 1183 START(); 1184 1185 // Roll up the loop to keep the code size down. 1186 Label loop_n; 1187 1188 Register out = x0; 1189 Register inputs_n_base = x1; 1190 Register inputs_n_last_16bytes = x3; 1191 Register index_n = x5; 1192 1193 // TODO: Refactor duplicate definitions below with a VRegister::As() routine. 1194 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); 1195 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); 1196 1197 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); 1198 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); 1199 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); 1200 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); 1201 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); 1202 1203 1204 // These will be either a D- or a Q-register form, with a single lane 1205 // (for use in scalar load and store operations). 1206 VRegister vd = VRegister(0, vd_bits); 1207 VRegister vn = v1.V16B(); 1208 VRegister vntmp = v3.V16B(); 1209 1210 // These will have the correct format for use when calling 'helper'. 1211 VRegister vd_helper = VRegister(0, vd_bits, vd_lane_count); 1212 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count); 1213 1214 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. 1215 VRegister vntmp_single = VRegister(3, vn_lane_bits); 1216 1217 __ Mov(out, results); 1218 1219 __ Mov(inputs_n_base, inputs_n); 1220 __ Mov(inputs_n_last_16bytes, 1221 inputs_n + (vn_lane_bytes * inputs_n_length) - 16); 1222 1223 __ Ldr(vn, MemOperand(inputs_n_last_16bytes)); 1224 1225 __ Mov(index_n, 0); 1226 __ Bind(&loop_n); 1227 1228 __ Ldr(vntmp_single, MemOperand(inputs_n_base, index_n, LSL, 1229 vn_lane_bytes_log2)); 1230 __ Ext(vn, vn, vntmp, vn_lane_bytes); 1231 1232 // Set the destination to zero. 1233 // TODO: Setting the destination to values other than zero 1234 // might be a better test for instructions such as sqxtn2 1235 // which may leave parts of V registers unchanged. 1236 __ Movi(vd.V16B(), 0); 1237 1238 { 1239 SingleEmissionCheckScope guard(&masm); 1240 (masm.*helper)(vd_helper, vn_helper); 1241 } 1242 __ Str(vd, MemOperand(out, vd.SizeInBytes(), PostIndex)); 1243 1244 __ Add(index_n, index_n, 1); 1245 __ Cmp(index_n, inputs_n_length); 1246 __ B(lo, &loop_n); 1247 1248 END(); 1249 RUN(); 1250 TEARDOWN(); 1251} 1252 1253 1254// Test NEON instructions. The inputs_*[] and expected[] arrays should be 1255// arrays of rawbit representation of input values. This ensures that 1256// exact bit comparisons can be performed. 1257template <typename Td, typename Tn> 1258static void Test1OpNEON(const char * name, Test1OpNEONHelper_t helper, 1259 const Tn inputs_n[], unsigned inputs_n_length, 1260 const Td expected[], unsigned expected_length, 1261 VectorFormat vd_form, 1262 VectorFormat vn_form) { 1263 VIXL_ASSERT(inputs_n_length > 0); 1264 1265 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); 1266 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); 1267 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); 1268 1269 const unsigned results_length = inputs_n_length; 1270 Td* results = new Td[results_length * vd_lane_count]; 1271 const unsigned lane_bit = sizeof(Td) * 8; 1272 const unsigned lane_len_in_hex = (std::max(sizeof(Td), sizeof(Tn)) * 8) / 4; 1273 1274 Test1OpNEON_Helper(helper, 1275 reinterpret_cast<uintptr_t>(inputs_n), 1276 inputs_n_length, 1277 reinterpret_cast<uintptr_t>(results), 1278 vd_form, vn_form); 1279 1280 if (Test::sim_test_trace()) { 1281 // Print the results. 1282 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name); 1283 for (unsigned iteration = 0; iteration < results_length; iteration++) { 1284 printf(" "); 1285 // Output a separate result for each element of the result vector. 1286 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 1287 unsigned index = lane + (iteration * vd_lane_count); 1288 printf(" 0x%0*" PRIx64 ",", 1289 lane_len_in_hex, 1290 static_cast<uint64_t>(results[index])); 1291 } 1292 printf("\n"); 1293 } 1294 1295 printf("};\n"); 1296 printf("const unsigned kExpectedCount_NEON_%s = %u;\n", 1297 name, 1298 results_length); 1299 } else { 1300 // Check the results. 1301 VIXL_CHECK(expected_length == results_length); 1302 unsigned error_count = 0; 1303 unsigned d = 0; 1304 const char* padding = " "; 1305 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1)); 1306 for (unsigned n = 0; n < inputs_n_length; n++, d++) { 1307 bool error_in_vector = false; 1308 1309 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 1310 unsigned output_index = (n * vd_lane_count) + lane; 1311 1312 if (results[output_index] != expected[output_index]) { 1313 error_in_vector = true; 1314 break; 1315 } 1316 } 1317 1318 if (error_in_vector && (++error_count <= kErrorReportLimit)) { 1319 printf("%s\n", name); 1320 printf(" Vn%.*s| Vd%.*s| Expected\n", 1321 lane_len_in_hex+1, padding, 1322 lane_len_in_hex+1, padding); 1323 1324 const unsigned first_index_n = 1325 inputs_n_length - (16 / vn_lane_bytes) + n + 1; 1326 1327 for (unsigned lane = 0; 1328 lane < std::max(vd_lane_count, vn_lane_count); 1329 lane++) { 1330 unsigned output_index = (n * vd_lane_count) + lane; 1331 unsigned input_index_n = (first_index_n + lane) % inputs_n_length; 1332 1333 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " " 1334 "| 0x%0*" PRIx64 "\n", 1335 results[output_index] != expected[output_index] ? '*' : ' ', 1336 lane_len_in_hex, 1337 static_cast<uint64_t>(inputs_n[input_index_n]), 1338 lane_len_in_hex, 1339 static_cast<uint64_t>(results[output_index]), 1340 lane_len_in_hex, 1341 static_cast<uint64_t>(expected[output_index])); 1342 } 1343 } 1344 } 1345 VIXL_ASSERT(d == expected_length); 1346 if (error_count > kErrorReportLimit) { 1347 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 1348 } 1349 VIXL_CHECK(error_count == 0); 1350 } 1351 delete[] results; 1352} 1353 1354 1355// ==== Tests for instructions of the form <mnemonic> <V><d>, <Vn>.<T> ==== 1356// where <V> is one of B, H, S or D registers. 1357// e.g. saddlv H1, v0.8B 1358 1359// TODO: Change tests to store all lanes of the resulting V register. 1360// Some tests store all 128 bits of the resulting V register to 1361// check the simulator's behaviour on the rest of the register. 1362// This is better than storing the affected lanes only. 1363// Change any tests such as the 'Across' template to do the same. 1364 1365static void Test1OpAcrossNEON_Helper(Test1OpNEONHelper_t helper, 1366 uintptr_t inputs_n, 1367 unsigned inputs_n_length, 1368 uintptr_t results, 1369 VectorFormat vd_form, 1370 VectorFormat vn_form) { 1371 VIXL_ASSERT(vd_form != kFormatUndefined); 1372 VIXL_ASSERT(vn_form != kFormatUndefined); 1373 1374 SETUP(); 1375 START(); 1376 1377 // Roll up the loop to keep the code size down. 1378 Label loop_n; 1379 1380 Register out = x0; 1381 Register inputs_n_base = x1; 1382 Register inputs_n_last_vector = x3; 1383 Register index_n = x5; 1384 1385 // TODO: Refactor duplicate definitions below with a VRegister::As() routine. 1386 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); 1387 1388 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); 1389 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); 1390 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); 1391 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); 1392 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); 1393 1394 1395 // These will be either a D- or a Q-register form, with a single lane 1396 // (for use in scalar load and store operations). 1397 VRegister vd = VRegister(0, vd_bits); 1398 VRegister vn = VRegister(1, vn_bits); 1399 VRegister vntmp = VRegister(3, vn_bits); 1400 1401 // These will have the correct format for use when calling 'helper'. 1402 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count); 1403 1404 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. 1405 VRegister vntmp_single = VRegister(3, vn_lane_bits); 1406 1407 // Same registers for use in the 'ext' instructions. 1408 VRegister vn_ext = (kDRegSize == vn_bits) ? vn.V8B() : vn.V16B(); 1409 VRegister vntmp_ext = (kDRegSize == vn_bits) ? vntmp.V8B() : vntmp.V16B(); 1410 1411 __ Mov(out, results); 1412 1413 __ Mov(inputs_n_base, inputs_n); 1414 __ Mov(inputs_n_last_vector, 1415 inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count)); 1416 1417 __ Ldr(vn, MemOperand(inputs_n_last_vector)); 1418 1419 __ Mov(index_n, 0); 1420 __ Bind(&loop_n); 1421 1422 __ Ldr(vntmp_single, MemOperand(inputs_n_base, index_n, LSL, 1423 vn_lane_bytes_log2)); 1424 __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes); 1425 1426 // Set the destination to zero for tests such as '[r]shrn2'. 1427 // TODO: Setting the destination to values other than zero 1428 // might be a better test for instructions such as sqxtn2 1429 // which may leave parts of V registers unchanged. 1430 __ Movi(vd.V16B(), 0); 1431 1432 { 1433 SingleEmissionCheckScope guard(&masm); 1434 (masm.*helper)(vd, vn_helper); 1435 } 1436 __ Str(vd, MemOperand(out, vd.SizeInBytes(), PostIndex)); 1437 1438 __ Add(index_n, index_n, 1); 1439 __ Cmp(index_n, inputs_n_length); 1440 __ B(lo, &loop_n); 1441 1442 END(); 1443 RUN(); 1444 TEARDOWN(); 1445} 1446 1447// Test NEON instructions. The inputs_*[] and expected[] arrays should be 1448// arrays of rawbit representation of input values. This ensures that 1449// exact bit comparisons can be performed. 1450template <typename Td, typename Tn> 1451static void Test1OpAcrossNEON(const char * name, Test1OpNEONHelper_t helper, 1452 const Tn inputs_n[], unsigned inputs_n_length, 1453 const Td expected[], unsigned expected_length, 1454 VectorFormat vd_form, 1455 VectorFormat vn_form) { 1456 VIXL_ASSERT(inputs_n_length > 0); 1457 1458 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); 1459 1460 const unsigned results_length = inputs_n_length; 1461 Td* results = new Td[results_length * vd_lane_count]; 1462 const unsigned lane_bit = sizeof(Td) * 8; 1463 const unsigned lane_len_in_hex = (std::max(sizeof(Td), sizeof(Tn)) * 8) / 4; 1464 1465 Test1OpAcrossNEON_Helper(helper, 1466 reinterpret_cast<uintptr_t>(inputs_n), 1467 inputs_n_length, 1468 reinterpret_cast<uintptr_t>(results), 1469 vd_form, vn_form); 1470 1471 if (Test::sim_test_trace()) { 1472 // Print the results. 1473 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name); 1474 for (unsigned iteration = 0; iteration < results_length; iteration++) { 1475 printf(" "); 1476 // Output a separate result for each element of the result vector. 1477 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 1478 unsigned index = lane + (iteration * vd_lane_count); 1479 printf(" 0x%0*" PRIx64 ",", 1480 lane_len_in_hex, 1481 static_cast<uint64_t>(results[index])); 1482 } 1483 printf("\n"); 1484 } 1485 1486 printf("};\n"); 1487 printf("const unsigned kExpectedCount_NEON_%s = %u;\n", 1488 name, 1489 results_length); 1490 } else { 1491 // Check the results. 1492 VIXL_CHECK(expected_length == results_length); 1493 unsigned error_count = 0; 1494 unsigned d = 0; 1495 const char* padding = " "; 1496 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1)); 1497 for (unsigned n = 0; n < inputs_n_length; n++, d++) { 1498 bool error_in_vector = false; 1499 1500 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 1501 unsigned output_index = (n * vd_lane_count) + lane; 1502 1503 if (results[output_index] != expected[output_index]) { 1504 error_in_vector = true; 1505 break; 1506 } 1507 } 1508 1509 if (error_in_vector && (++error_count <= kErrorReportLimit)) { 1510 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); 1511 1512 printf("%s\n", name); 1513 printf(" Vn%.*s| Vd%.*s| Expected\n", 1514 lane_len_in_hex+1, padding, 1515 lane_len_in_hex+1, padding); 1516 1517 // TODO: In case of an error, all tests print out as many elements as 1518 // there are lanes in the output or input vectors. This way 1519 // the viewer can read all the values that were needed for the 1520 // operation but the output contains also unnecessary values. 1521 // These prints can be improved according to the arguments 1522 // passed to test functions. 1523 // This output for the 'Across' category has the required 1524 // modifications. 1525 for (unsigned lane = 0; lane < vn_lane_count; lane++) { 1526 unsigned output_index = n * vd_lane_count; 1527 unsigned input_index_n = (inputs_n_length - vn_lane_count + 1528 n + 1 + lane) % inputs_n_length; 1529 1530 if (vn_lane_count-1 == lane) { // Is this the last lane? 1531 // Print the result element(s) in the last lane only. 1532 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " " 1533 "| 0x%0*" PRIx64 "\n", 1534 results[output_index] != expected[output_index] ? '*' : ' ', 1535 lane_len_in_hex, 1536 static_cast<uint64_t>(inputs_n[input_index_n]), 1537 lane_len_in_hex, 1538 static_cast<uint64_t>(results[output_index]), 1539 lane_len_in_hex, 1540 static_cast<uint64_t>(expected[output_index])); 1541 } else { 1542 printf(" 0x%0*" PRIx64 " | %.*s| %.*s\n", 1543 lane_len_in_hex, 1544 static_cast<uint64_t>(inputs_n[input_index_n]), 1545 lane_len_in_hex+1, padding, 1546 lane_len_in_hex+1, padding); 1547 } 1548 } 1549 } 1550 } 1551 VIXL_ASSERT(d == expected_length); 1552 if (error_count > kErrorReportLimit) { 1553 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 1554 } 1555 VIXL_CHECK(error_count == 0); 1556 } 1557 delete[] results; 1558} 1559 1560 1561// ==== Tests for instructions of the form <INST> VReg, VReg, VReg. ==== 1562 1563// TODO: Iterate over inputs_d once the traces file is split. 1564 1565static void Test2OpNEON_Helper(Test2OpNEONHelper_t helper, 1566 uintptr_t inputs_d, 1567 uintptr_t inputs_n, unsigned inputs_n_length, 1568 uintptr_t inputs_m, unsigned inputs_m_length, 1569 uintptr_t results, 1570 VectorFormat vd_form, 1571 VectorFormat vn_form, 1572 VectorFormat vm_form) { 1573 VIXL_ASSERT(vd_form != kFormatUndefined); 1574 VIXL_ASSERT(vn_form != kFormatUndefined); 1575 VIXL_ASSERT(vm_form != kFormatUndefined); 1576 1577 SETUP(); 1578 START(); 1579 1580 // Roll up the loop to keep the code size down. 1581 Label loop_n, loop_m; 1582 1583 Register out = x0; 1584 Register inputs_n_base = x1; 1585 Register inputs_m_base = x2; 1586 Register inputs_d_base = x3; 1587 Register inputs_n_last_16bytes = x4; 1588 Register inputs_m_last_16bytes = x5; 1589 Register index_n = x6; 1590 Register index_m = x7; 1591 1592 // TODO: Refactor duplicate definitions below with a VRegister::As() routine. 1593 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); 1594 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); 1595 1596 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); 1597 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); 1598 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); 1599 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); 1600 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); 1601 1602 const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form); 1603 const unsigned vm_lane_count = LaneCountFromFormat(vm_form); 1604 const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form); 1605 const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form); 1606 const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form); 1607 1608 1609 // Always load and store 128 bits regardless of the format. 1610 VRegister vd = v0.V16B(); 1611 VRegister vn = v1.V16B(); 1612 VRegister vm = v2.V16B(); 1613 VRegister vntmp = v3.V16B(); 1614 VRegister vmtmp = v4.V16B(); 1615 VRegister vres = v5.V16B(); 1616 1617 // These will have the correct format for calling the 'helper'. 1618 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count); 1619 VRegister vm_helper = VRegister(2, vm_bits, vm_lane_count); 1620 VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count); 1621 1622 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. 1623 VRegister vntmp_single = VRegister(3, vn_lane_bits); 1624 VRegister vmtmp_single = VRegister(4, vm_lane_bits); 1625 1626 __ Mov(out, results); 1627 1628 __ Mov(inputs_d_base, inputs_d); 1629 1630 __ Mov(inputs_n_base, inputs_n); 1631 __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16)); 1632 __ Mov(inputs_m_base, inputs_m); 1633 __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16)); 1634 1635 __ Ldr(vd, MemOperand(inputs_d_base)); 1636 __ Ldr(vn, MemOperand(inputs_n_last_16bytes)); 1637 __ Ldr(vm, MemOperand(inputs_m_last_16bytes)); 1638 1639 __ Mov(index_n, 0); 1640 __ Bind(&loop_n); 1641 1642 __ Ldr(vntmp_single, MemOperand(inputs_n_base, index_n, LSL, 1643 vn_lane_bytes_log2)); 1644 __ Ext(vn, vn, vntmp, vn_lane_bytes); 1645 1646 __ Mov(index_m, 0); 1647 __ Bind(&loop_m); 1648 1649 __ Ldr(vmtmp_single, MemOperand(inputs_m_base, index_m, LSL, 1650 vm_lane_bytes_log2)); 1651 __ Ext(vm, vm, vmtmp, vm_lane_bytes); 1652 1653 __ Mov(vres, vd); 1654 { 1655 SingleEmissionCheckScope guard(&masm); 1656 (masm.*helper)(vres_helper, vn_helper, vm_helper); 1657 } 1658 __ Str(vres, MemOperand(out, vd.SizeInBytes(), PostIndex)); 1659 1660 __ Add(index_m, index_m, 1); 1661 __ Cmp(index_m, inputs_m_length); 1662 __ B(lo, &loop_m); 1663 1664 __ Add(index_n, index_n, 1); 1665 __ Cmp(index_n, inputs_n_length); 1666 __ B(lo, &loop_n); 1667 1668 END(); 1669 RUN(); 1670 TEARDOWN(); 1671} 1672 1673 1674// Test NEON instructions. The inputs_*[] and expected[] arrays should be 1675// arrays of rawbit representation of input values. This ensures that 1676// exact bit comparisons can be performed. 1677template <typename Td, typename Tn, typename Tm> 1678static void Test2OpNEON(const char * name, Test2OpNEONHelper_t helper, 1679 const Td inputs_d[], 1680 const Tn inputs_n[], unsigned inputs_n_length, 1681 const Tm inputs_m[], unsigned inputs_m_length, 1682 const Td expected[], unsigned expected_length, 1683 VectorFormat vd_form, 1684 VectorFormat vn_form, 1685 VectorFormat vm_form) { 1686 VIXL_ASSERT(inputs_n_length > 0 && inputs_m_length > 0); 1687 1688 const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form); 1689 1690 const unsigned results_length = inputs_n_length * inputs_m_length; 1691 Td* results = new Td[results_length * vd_lane_count]; 1692 const unsigned lane_bit = sizeof(Td) * 8; 1693 const unsigned lane_len_in_hex = (std::max(sizeof(Td), sizeof(Tm)) * 8) / 4; 1694 1695 Test2OpNEON_Helper(helper, 1696 reinterpret_cast<uintptr_t>(inputs_d), 1697 reinterpret_cast<uintptr_t>(inputs_n), inputs_n_length, 1698 reinterpret_cast<uintptr_t>(inputs_m), inputs_m_length, 1699 reinterpret_cast<uintptr_t>(results), 1700 vd_form, vn_form, vm_form); 1701 1702 if (Test::sim_test_trace()) { 1703 // Print the results. 1704 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name); 1705 for (unsigned iteration = 0; iteration < results_length; iteration++) { 1706 printf(" "); 1707 // Output a separate result for each element of the result vector. 1708 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 1709 unsigned index = lane + (iteration * vd_lane_count); 1710 printf(" 0x%0*" PRIx64 ",", 1711 lane_len_in_hex, 1712 static_cast<uint64_t>(results[index])); 1713 } 1714 printf("\n"); 1715 } 1716 1717 printf("};\n"); 1718 printf("const unsigned kExpectedCount_NEON_%s = %u;\n", 1719 name, 1720 results_length); 1721 } else { 1722 // Check the results. 1723 VIXL_CHECK(expected_length == results_length); 1724 unsigned error_count = 0; 1725 unsigned d = 0; 1726 const char* padding = " "; 1727 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1)); 1728 for (unsigned n = 0; n < inputs_n_length; n++) { 1729 for (unsigned m = 0; m < inputs_m_length; m++, d++) { 1730 bool error_in_vector = false; 1731 1732 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 1733 unsigned output_index = (n * inputs_m_length * vd_lane_count) + 1734 (m * vd_lane_count) + lane; 1735 1736 if (results[output_index] != expected[output_index]) { 1737 error_in_vector = true; 1738 break; 1739 } 1740 } 1741 1742 if (error_in_vector && (++error_count <= kErrorReportLimit)) { 1743 printf("%s\n", name); 1744 printf(" Vd%.*s| Vn%.*s| Vm%.*s| Vd%.*s| Expected\n", 1745 lane_len_in_hex+1, padding, 1746 lane_len_in_hex+1, padding, 1747 lane_len_in_hex+1, padding, 1748 lane_len_in_hex+1, padding); 1749 1750 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 1751 unsigned output_index = (n * inputs_m_length * vd_lane_count) + 1752 (m * vd_lane_count) + lane; 1753 unsigned input_index_n = (inputs_n_length - vd_lane_count + 1754 n + 1 + lane) % inputs_n_length; 1755 unsigned input_index_m = (inputs_m_length - vd_lane_count + 1756 m + 1 + lane) % inputs_m_length; 1757 1758 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 " " 1759 "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n", 1760 results[output_index] != expected[output_index] ? '*' : ' ', 1761 lane_len_in_hex, 1762 static_cast<uint64_t>(inputs_d[lane]), 1763 lane_len_in_hex, 1764 static_cast<uint64_t>(inputs_n[input_index_n]), 1765 lane_len_in_hex, 1766 static_cast<uint64_t>(inputs_m[input_index_m]), 1767 lane_len_in_hex, 1768 static_cast<uint64_t>(results[output_index]), 1769 lane_len_in_hex, 1770 static_cast<uint64_t>(expected[output_index])); 1771 } 1772 } 1773 } 1774 } 1775 VIXL_ASSERT(d == expected_length); 1776 if (error_count > kErrorReportLimit) { 1777 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 1778 } 1779 VIXL_CHECK(error_count == 0); 1780 } 1781 delete[] results; 1782} 1783 1784 1785// ==== Tests for instructions of the form <INST> Vd, Vn, Vm[<#index>]. ==== 1786 1787static void TestByElementNEON_Helper(TestByElementNEONHelper_t helper, 1788 uintptr_t inputs_d, 1789 uintptr_t inputs_n, 1790 unsigned inputs_n_length, 1791 uintptr_t inputs_m, 1792 unsigned inputs_m_length, 1793 const int indices[], 1794 unsigned indices_length, 1795 uintptr_t results, 1796 VectorFormat vd_form, 1797 VectorFormat vn_form, 1798 VectorFormat vm_form) { 1799 VIXL_ASSERT(vd_form != kFormatUndefined); 1800 VIXL_ASSERT(vn_form != kFormatUndefined); 1801 VIXL_ASSERT(vm_form != kFormatUndefined); 1802 1803 SETUP(); 1804 START(); 1805 1806 // Roll up the loop to keep the code size down. 1807 Label loop_n, loop_m; 1808 1809 Register out = x0; 1810 Register inputs_n_base = x1; 1811 Register inputs_m_base = x2; 1812 Register inputs_d_base = x3; 1813 Register inputs_n_last_16bytes = x4; 1814 Register inputs_m_last_16bytes = x5; 1815 Register index_n = x6; 1816 Register index_m = x7; 1817 1818 // TODO: Refactor duplicate definitions below with a VRegister::As() routine. 1819 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); 1820 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); 1821 1822 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); 1823 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); 1824 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); 1825 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); 1826 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); 1827 1828 const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form); 1829 const unsigned vm_lane_count = LaneCountFromFormat(vm_form); 1830 const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form); 1831 const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form); 1832 const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form); 1833 1834 1835 // Always load and store 128 bits regardless of the format. 1836 VRegister vd = v0.V16B(); 1837 VRegister vn = v1.V16B(); 1838 VRegister vm = v2.V16B(); 1839 VRegister vntmp = v3.V16B(); 1840 VRegister vmtmp = v4.V16B(); 1841 VRegister vres = v5.V16B(); 1842 1843 // These will have the correct format for calling the 'helper'. 1844 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count); 1845 VRegister vm_helper = VRegister(2, vm_bits, vm_lane_count); 1846 VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count); 1847 1848 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. 1849 VRegister vntmp_single = VRegister(3, vn_lane_bits); 1850 VRegister vmtmp_single = VRegister(4, vm_lane_bits); 1851 1852 __ Mov(out, results); 1853 1854 __ Mov(inputs_d_base, inputs_d); 1855 1856 __ Mov(inputs_n_base, inputs_n); 1857 __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16)); 1858 __ Mov(inputs_m_base, inputs_m); 1859 __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16)); 1860 1861 __ Ldr(vd, MemOperand(inputs_d_base)); 1862 __ Ldr(vn, MemOperand(inputs_n_last_16bytes)); 1863 __ Ldr(vm, MemOperand(inputs_m_last_16bytes)); 1864 1865 __ Mov(index_n, 0); 1866 __ Bind(&loop_n); 1867 1868 __ Ldr(vntmp_single, MemOperand(inputs_n_base, index_n, LSL, 1869 vn_lane_bytes_log2)); 1870 __ Ext(vn, vn, vntmp, vn_lane_bytes); 1871 1872 __ Mov(index_m, 0); 1873 __ Bind(&loop_m); 1874 1875 __ Ldr(vmtmp_single, MemOperand(inputs_m_base, index_m, LSL, 1876 vm_lane_bytes_log2)); 1877 __ Ext(vm, vm, vmtmp, vm_lane_bytes); 1878 1879 __ Mov(vres, vd); 1880 { 1881 for (unsigned i = 0; i < indices_length; i++) { 1882 { 1883 SingleEmissionCheckScope guard(&masm); 1884 (masm.*helper)(vres_helper, vn_helper, vm_helper, indices[i]); 1885 } 1886 __ Str(vres, MemOperand(out, vd.SizeInBytes(), PostIndex)); 1887 } 1888 } 1889 1890 __ Add(index_m, index_m, 1); 1891 __ Cmp(index_m, inputs_m_length); 1892 __ B(lo, &loop_m); 1893 1894 __ Add(index_n, index_n, 1); 1895 __ Cmp(index_n, inputs_n_length); 1896 __ B(lo, &loop_n); 1897 1898 END(); 1899 RUN(); 1900 TEARDOWN(); 1901} 1902 1903 1904 1905// Test NEON instructions. The inputs_*[] and expected[] arrays should be 1906// arrays of rawbit representation of input values. This ensures that 1907// exact bit comparisons can be performed. 1908template <typename Td, typename Tn, typename Tm> 1909static void TestByElementNEON(const char *name, 1910 TestByElementNEONHelper_t helper, 1911 const Td inputs_d[], 1912 const Tn inputs_n[], unsigned inputs_n_length, 1913 const Tm inputs_m[], unsigned inputs_m_length, 1914 const int indices[], unsigned indices_length, 1915 const Td expected[], unsigned expected_length, 1916 VectorFormat vd_form, 1917 VectorFormat vn_form, 1918 VectorFormat vm_form) { 1919 VIXL_ASSERT(inputs_n_length > 0); 1920 VIXL_ASSERT(inputs_m_length > 0); 1921 VIXL_ASSERT(indices_length > 0); 1922 1923 const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form); 1924 1925 const unsigned results_length = inputs_n_length * inputs_m_length * 1926 indices_length; 1927 Td* results = new Td[results_length * vd_lane_count]; 1928 const unsigned lane_bit = sizeof(Td) * 8; 1929 const unsigned lane_len_in_hex = (std::max(sizeof(Td), sizeof(Tm)) * 8) / 4; 1930 1931 TestByElementNEON_Helper(helper, 1932 reinterpret_cast<uintptr_t>(inputs_d), 1933 reinterpret_cast<uintptr_t>(inputs_n), inputs_n_length, 1934 reinterpret_cast<uintptr_t>(inputs_m), inputs_m_length, 1935 indices, indices_length, 1936 reinterpret_cast<uintptr_t>(results), 1937 vd_form, vn_form, vm_form); 1938 1939 if (Test::sim_test_trace()) { 1940 // Print the results. 1941 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name); 1942 for (unsigned iteration = 0; iteration < results_length; iteration++) { 1943 printf(" "); 1944 // Output a separate result for each element of the result vector. 1945 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 1946 unsigned index = lane + (iteration * vd_lane_count); 1947 printf(" 0x%0*" PRIx64 ",", 1948 lane_len_in_hex, 1949 static_cast<uint64_t>(results[index])); 1950 } 1951 printf("\n"); 1952 } 1953 1954 printf("};\n"); 1955 printf("const unsigned kExpectedCount_NEON_%s = %u;\n", 1956 name, 1957 results_length); 1958 } else { 1959 // Check the results. 1960 VIXL_CHECK(expected_length == results_length); 1961 unsigned error_count = 0; 1962 unsigned d = 0; 1963 const char* padding = " "; 1964 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1)); 1965 for (unsigned n = 0; n < inputs_n_length; n++) { 1966 for (unsigned m = 0; m < inputs_m_length; m++) { 1967 for (unsigned index = 0; index < indices_length; index++, d++) { 1968 bool error_in_vector = false; 1969 1970 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 1971 unsigned output_index = 1972 (n * inputs_m_length * indices_length * vd_lane_count) + 1973 (m * indices_length * vd_lane_count) + 1974 (index * vd_lane_count) + lane; 1975 1976 if (results[output_index] != expected[output_index]) { 1977 error_in_vector = true; 1978 break; 1979 } 1980 } 1981 1982 if (error_in_vector && (++error_count <= kErrorReportLimit)) { 1983 printf("%s\n", name); 1984 printf(" Vd%.*s| Vn%.*s| Vm%.*s| Index | Vd%.*s| Expected\n", 1985 lane_len_in_hex+1, padding, 1986 lane_len_in_hex+1, padding, 1987 lane_len_in_hex+1, padding, 1988 lane_len_in_hex+1, padding); 1989 1990 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 1991 unsigned output_index = 1992 (n * inputs_m_length * indices_length * vd_lane_count) + 1993 (m * indices_length * vd_lane_count) + 1994 (index * vd_lane_count) + lane; 1995 unsigned input_index_n = (inputs_n_length - vd_lane_count + 1996 n + 1 + lane) % inputs_n_length; 1997 unsigned input_index_m = (inputs_m_length - vd_lane_count + 1998 m + 1 + lane) % inputs_m_length; 1999 2000 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 " " 2001 "| [%3d] | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n", 2002 results[output_index] != expected[output_index] ? '*' : ' ', 2003 lane_len_in_hex, 2004 static_cast<uint64_t>(inputs_d[lane]), 2005 lane_len_in_hex, 2006 static_cast<uint64_t>(inputs_n[input_index_n]), 2007 lane_len_in_hex, 2008 static_cast<uint64_t>(inputs_m[input_index_m]), 2009 indices[index], 2010 lane_len_in_hex, 2011 static_cast<uint64_t>(results[output_index]), 2012 lane_len_in_hex, 2013 static_cast<uint64_t>(expected[output_index])); 2014 } 2015 } 2016 } 2017 } 2018 } 2019 VIXL_ASSERT(d == expected_length); 2020 if (error_count > kErrorReportLimit) { 2021 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 2022 } 2023 VIXL_CHECK(error_count == 0); 2024 } 2025 delete[] results; 2026} 2027 2028 2029// ==== Tests for instructions of the form <INST> VReg, VReg, #Immediate. ==== 2030 2031 2032template <typename Tm> 2033void Test2OpImmNEON_Helper( 2034 typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper, 2035 uintptr_t inputs_n, 2036 unsigned inputs_n_length, 2037 const Tm inputs_m[], 2038 unsigned inputs_m_length, 2039 uintptr_t results, 2040 VectorFormat vd_form, 2041 VectorFormat vn_form) { 2042 VIXL_ASSERT(vd_form != kFormatUndefined && 2043 vn_form != kFormatUndefined); 2044 2045 SETUP(); 2046 START(); 2047 2048 // Roll up the loop to keep the code size down. 2049 Label loop_n; 2050 2051 Register out = x0; 2052 Register inputs_n_base = x1; 2053 Register inputs_n_last_16bytes = x3; 2054 Register index_n = x5; 2055 2056 // TODO: Refactor duplicate definitions below with a VRegister::As() routine. 2057 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); 2058 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); 2059 2060 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); 2061 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); 2062 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); 2063 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); 2064 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); 2065 2066 2067 // These will be either a D- or a Q-register form, with a single lane 2068 // (for use in scalar load and store operations). 2069 VRegister vd = VRegister(0, vd_bits); 2070 VRegister vn = v1.V16B(); 2071 VRegister vntmp = v3.V16B(); 2072 2073 // These will have the correct format for use when calling 'helper'. 2074 VRegister vd_helper = VRegister(0, vd_bits, vd_lane_count); 2075 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count); 2076 2077 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. 2078 VRegister vntmp_single = VRegister(3, vn_lane_bits); 2079 2080 __ Mov(out, results); 2081 2082 __ Mov(inputs_n_base, inputs_n); 2083 __ Mov(inputs_n_last_16bytes, 2084 inputs_n + (vn_lane_bytes * inputs_n_length) - 16); 2085 2086 __ Ldr(vn, MemOperand(inputs_n_last_16bytes)); 2087 2088 __ Mov(index_n, 0); 2089 __ Bind(&loop_n); 2090 2091 __ Ldr(vntmp_single, MemOperand(inputs_n_base, index_n, LSL, 2092 vn_lane_bytes_log2)); 2093 __ Ext(vn, vn, vntmp, vn_lane_bytes); 2094 2095 // Set the destination to zero for tests such as '[r]shrn2'. 2096 // TODO: Setting the destination to values other than zero might be a better 2097 // test for shift and accumulate instructions (srsra/ssra/usra/ursra). 2098 __ Movi(vd.V16B(), 0); 2099 2100 { 2101 for (unsigned i = 0; i < inputs_m_length; i++) { 2102 { 2103 SingleEmissionCheckScope guard(&masm); 2104 (masm.*helper)(vd_helper, vn_helper, inputs_m[i]); 2105 } 2106 __ Str(vd, MemOperand(out, vd.SizeInBytes(), PostIndex)); 2107 } 2108 } 2109 2110 __ Add(index_n, index_n, 1); 2111 __ Cmp(index_n, inputs_n_length); 2112 __ B(lo, &loop_n); 2113 2114 END(); 2115 RUN(); 2116 TEARDOWN(); 2117} 2118 2119 2120// Test NEON instructions. The inputs_*[] and expected[] arrays should be 2121// arrays of rawbit representation of input values. This ensures that 2122// exact bit comparisons can be performed. 2123template <typename Td, typename Tn, typename Tm> 2124static void Test2OpImmNEON( 2125 const char * name, 2126 typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper, 2127 const Tn inputs_n[], unsigned inputs_n_length, 2128 const Tm inputs_m[], unsigned inputs_m_length, 2129 const Td expected[], unsigned expected_length, 2130 VectorFormat vd_form, 2131 VectorFormat vn_form) { 2132 VIXL_ASSERT(inputs_n_length > 0 && inputs_m_length > 0); 2133 2134 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); 2135 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); 2136 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); 2137 2138 const unsigned results_length = inputs_n_length * inputs_m_length; 2139 Td* results = new Td[results_length * vd_lane_count]; 2140 const unsigned lane_bit = sizeof(Td) * 8; 2141 const unsigned lane_len_in_hex = (std::max(sizeof(Td), sizeof(Tn)) * 8) / 4; 2142 2143 Test2OpImmNEON_Helper(helper, 2144 reinterpret_cast<uintptr_t>(inputs_n), inputs_n_length, 2145 inputs_m, inputs_m_length, 2146 reinterpret_cast<uintptr_t>(results), 2147 vd_form, vn_form); 2148 2149 if (Test::sim_test_trace()) { 2150 // Print the results. 2151 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name); 2152 for (unsigned iteration = 0; iteration < results_length; iteration++) { 2153 printf(" "); 2154 // Output a separate result for each element of the result vector. 2155 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 2156 unsigned index = lane + (iteration * vd_lane_count); 2157 printf(" 0x%0*" PRIx64 ",", 2158 lane_len_in_hex, 2159 static_cast<uint64_t>(results[index])); 2160 } 2161 printf("\n"); 2162 } 2163 2164 printf("};\n"); 2165 printf("const unsigned kExpectedCount_NEON_%s = %u;\n", 2166 name, 2167 results_length); 2168 } else { 2169 // Check the results. 2170 VIXL_CHECK(expected_length == results_length); 2171 unsigned error_count = 0; 2172 unsigned d = 0; 2173 const char* padding = " "; 2174 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1)); 2175 for (unsigned n = 0; n < inputs_n_length; n++) { 2176 for (unsigned m = 0; m < inputs_m_length; m++, d++) { 2177 bool error_in_vector = false; 2178 2179 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 2180 unsigned output_index = (n * inputs_m_length * vd_lane_count) + 2181 (m * vd_lane_count) + lane; 2182 2183 if (results[output_index] != expected[output_index]) { 2184 error_in_vector = true; 2185 break; 2186 } 2187 } 2188 2189 if (error_in_vector && (++error_count <= kErrorReportLimit)) { 2190 printf("%s\n", name); 2191 printf(" Vn%.*s| Imm%.*s| Vd%.*s| Expected\n", 2192 lane_len_in_hex+1, padding, 2193 lane_len_in_hex, padding, 2194 lane_len_in_hex+1, padding); 2195 2196 const unsigned first_index_n = 2197 inputs_n_length - (16 / vn_lane_bytes) + n + 1; 2198 2199 for (unsigned lane = 0; 2200 lane < std::max(vd_lane_count, vn_lane_count); 2201 lane++) { 2202 unsigned output_index = (n * inputs_m_length * vd_lane_count) + 2203 (m * vd_lane_count) + lane; 2204 unsigned input_index_n = (first_index_n + lane) % inputs_n_length; 2205 unsigned input_index_m = m; 2206 2207 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " " 2208 "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n", 2209 results[output_index] != expected[output_index] ? '*' : ' ', 2210 lane_len_in_hex, 2211 static_cast<uint64_t>(inputs_n[input_index_n]), 2212 lane_len_in_hex, 2213 static_cast<uint64_t>(inputs_m[input_index_m]), 2214 lane_len_in_hex, 2215 static_cast<uint64_t>(results[output_index]), 2216 lane_len_in_hex, 2217 static_cast<uint64_t>(expected[output_index])); 2218 } 2219 } 2220 } 2221 } 2222 VIXL_ASSERT(d == expected_length); 2223 if (error_count > kErrorReportLimit) { 2224 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 2225 } 2226 VIXL_CHECK(error_count == 0); 2227 } 2228 delete[] results; 2229} 2230 2231 2232// ==== Tests for instructions of the form <INST> VReg, #Imm, VReg, #Imm. ==== 2233 2234 2235static void TestOpImmOpImmNEON_Helper( 2236 TestOpImmOpImmVdUpdateNEONHelper_t helper, 2237 uintptr_t inputs_d, 2238 const int inputs_imm1[], unsigned inputs_imm1_length, 2239 uintptr_t inputs_n, unsigned inputs_n_length, 2240 const int inputs_imm2[], unsigned inputs_imm2_length, 2241 uintptr_t results, 2242 VectorFormat vd_form, VectorFormat vn_form) { 2243 VIXL_ASSERT(vd_form != kFormatUndefined); 2244 VIXL_ASSERT(vn_form != kFormatUndefined); 2245 2246 SETUP(); 2247 START(); 2248 2249 // Roll up the loop to keep the code size down. 2250 Label loop_n; 2251 2252 Register out = x0; 2253 Register inputs_d_base = x1; 2254 Register inputs_n_base = x2; 2255 Register inputs_n_last_vector = x4; 2256 Register index_n = x6; 2257 2258 // TODO: Refactor duplicate definitions below with a VRegister::As() routine. 2259 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); 2260 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); 2261 2262 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); 2263 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); 2264 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); 2265 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); 2266 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); 2267 2268 2269 // These will be either a D- or a Q-register form, with a single lane 2270 // (for use in scalar load and store operations). 2271 VRegister vd = VRegister(0, vd_bits); 2272 VRegister vn = VRegister(1, vn_bits); 2273 VRegister vntmp = VRegister(4, vn_bits); 2274 VRegister vres = VRegister(5, vn_bits); 2275 2276 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count); 2277 VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count); 2278 2279 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. 2280 VRegister vntmp_single = VRegister(4, vn_lane_bits); 2281 2282 // Same registers for use in the 'ext' instructions. 2283 VRegister vn_ext = (kDRegSize == vn_bits) ? vn.V8B() : vn.V16B(); 2284 VRegister vntmp_ext = (kDRegSize == vn_bits) ? vntmp.V8B() : vntmp.V16B(); 2285 2286 __ Mov(out, results); 2287 2288 __ Mov(inputs_d_base, inputs_d); 2289 2290 __ Mov(inputs_n_base, inputs_n); 2291 __ Mov(inputs_n_last_vector, 2292 inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count)); 2293 2294 __ Ldr(vd, MemOperand(inputs_d_base)); 2295 2296 __ Ldr(vn, MemOperand(inputs_n_last_vector)); 2297 2298 __ Mov(index_n, 0); 2299 __ Bind(&loop_n); 2300 2301 __ Ldr(vntmp_single, MemOperand(inputs_n_base, index_n, LSL, 2302 vn_lane_bytes_log2)); 2303 __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes); 2304 2305 { 2306 EmissionCheckScope guard(&masm, 2307 kInstructionSize * inputs_imm1_length * inputs_imm2_length * 3); 2308 for (unsigned i = 0; i < inputs_imm1_length; i++) { 2309 for (unsigned j = 0; j < inputs_imm2_length; j++) { 2310 __ Mov(vres, vd); 2311 (masm.*helper)(vres_helper, inputs_imm1[i], vn_helper, inputs_imm2[j]); 2312 __ Str(vres, MemOperand(out, vd.SizeInBytes(), PostIndex)); 2313 } 2314 } 2315 } 2316 2317 __ Add(index_n, index_n, 1); 2318 __ Cmp(index_n, inputs_n_length); 2319 __ B(lo, &loop_n); 2320 2321 END(); 2322 RUN(); 2323 TEARDOWN(); 2324} 2325 2326 2327// Test NEON instructions. The inputs_*[] and expected[] arrays should be 2328// arrays of rawbit representation of input values. This ensures that 2329// exact bit comparisons can be performed. 2330template <typename Td, typename Tn> 2331static void TestOpImmOpImmNEON(const char * name, 2332 TestOpImmOpImmVdUpdateNEONHelper_t helper, 2333 const Td inputs_d[], 2334 const int inputs_imm1[], 2335 unsigned inputs_imm1_length, 2336 const Tn inputs_n[], 2337 unsigned inputs_n_length, 2338 const int inputs_imm2[], 2339 unsigned inputs_imm2_length, 2340 const Td expected[], 2341 unsigned expected_length, 2342 VectorFormat vd_form, 2343 VectorFormat vn_form) { 2344 VIXL_ASSERT(inputs_n_length > 0); 2345 VIXL_ASSERT(inputs_imm1_length > 0); 2346 VIXL_ASSERT(inputs_imm2_length > 0); 2347 2348 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); 2349 2350 const unsigned results_length = inputs_n_length * 2351 inputs_imm1_length * inputs_imm2_length; 2352 2353 Td* results = new Td[results_length * vd_lane_count]; 2354 const unsigned lane_bit = sizeof(Td) * 8; 2355 const unsigned lane_len_in_hex = (std::max(sizeof(Td), sizeof(Tn)) * 8) / 4; 2356 2357 TestOpImmOpImmNEON_Helper(helper, 2358 reinterpret_cast<uintptr_t>(inputs_d), 2359 inputs_imm1, 2360 inputs_imm1_length, 2361 reinterpret_cast<uintptr_t>(inputs_n), 2362 inputs_n_length, 2363 inputs_imm2, 2364 inputs_imm2_length, 2365 reinterpret_cast<uintptr_t>(results), 2366 vd_form, vn_form); 2367 2368 if (Test::sim_test_trace()) { 2369 // Print the results. 2370 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name); 2371 for (unsigned iteration = 0; iteration < results_length; iteration++) { 2372 printf(" "); 2373 // Output a separate result for each element of the result vector. 2374 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 2375 unsigned index = lane + (iteration * vd_lane_count); 2376 printf(" 0x%0*" PRIx64 ",", 2377 lane_len_in_hex, 2378 static_cast<uint64_t>(results[index])); 2379 } 2380 printf("\n"); 2381 } 2382 2383 printf("};\n"); 2384 printf("const unsigned kExpectedCount_NEON_%s = %u;\n", 2385 name, 2386 results_length); 2387 } else { 2388 // Check the results. 2389 VIXL_CHECK(expected_length == results_length); 2390 unsigned error_count = 0; 2391 unsigned counted_length = 0; 2392 const char* padding = " "; 2393 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1)); 2394 for (unsigned n = 0; n < inputs_n_length; n++) { 2395 for (unsigned imm1 = 0; imm1 < inputs_imm1_length; imm1++) { 2396 for (unsigned imm2 = 0; imm2 < inputs_imm2_length; imm2++) { 2397 bool error_in_vector = false; 2398 2399 counted_length++; 2400 2401 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 2402 unsigned output_index = 2403 (n * inputs_imm1_length * 2404 inputs_imm2_length * vd_lane_count) + 2405 (imm1 * inputs_imm2_length * vd_lane_count) + 2406 (imm2 * vd_lane_count) + lane; 2407 2408 if (results[output_index] != expected[output_index]) { 2409 error_in_vector = true; 2410 break; 2411 } 2412 } 2413 2414 if (error_in_vector && (++error_count <= kErrorReportLimit)) { 2415 printf("%s\n", name); 2416 printf(" Vd%.*s| Imm%.*s| Vn%.*s| Imm%.*s| Vd%.*s| Expected\n", 2417 lane_len_in_hex+1, padding, 2418 lane_len_in_hex, padding, 2419 lane_len_in_hex+1, padding, 2420 lane_len_in_hex, padding, 2421 lane_len_in_hex+1, padding); 2422 2423 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 2424 unsigned output_index = 2425 (n * inputs_imm1_length * 2426 inputs_imm2_length * vd_lane_count) + 2427 (imm1 * inputs_imm2_length * vd_lane_count) + 2428 (imm2 * vd_lane_count) + lane; 2429 unsigned input_index_n = (inputs_n_length - vd_lane_count + 2430 n + 1 + lane) % inputs_n_length; 2431 unsigned input_index_imm1 = imm1; 2432 unsigned input_index_imm2 = imm2; 2433 2434 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 " " 2435 "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n", 2436 results[output_index] != 2437 expected[output_index] ? '*' : ' ', 2438 lane_len_in_hex, 2439 static_cast<uint64_t>(inputs_d[lane]), 2440 lane_len_in_hex, 2441 static_cast<uint64_t>(inputs_imm1[input_index_imm1]), 2442 lane_len_in_hex, 2443 static_cast<uint64_t>(inputs_n[input_index_n]), 2444 lane_len_in_hex, 2445 static_cast<uint64_t>(inputs_imm2[input_index_imm2]), 2446 lane_len_in_hex, 2447 static_cast<uint64_t>(results[output_index]), 2448 lane_len_in_hex, 2449 static_cast<uint64_t>(expected[output_index])); 2450 } 2451 } 2452 } 2453 } 2454 } 2455 VIXL_ASSERT(counted_length == expected_length); 2456 if (error_count > kErrorReportLimit) { 2457 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 2458 } 2459 VIXL_CHECK(error_count == 0); 2460 } 2461 delete[] results; 2462} 2463 2464 2465// ==== Floating-point tests. ==== 2466 2467 2468// Standard floating-point test expansion for both double- and single-precision 2469// operations. 2470#define STRINGIFY(s) #s 2471 2472#define CALL_TEST_FP_HELPER(mnemonic, variant, type, input) \ 2473 Test##type(STRINGIFY(mnemonic) "_" STRINGIFY(variant), \ 2474 &MacroAssembler::mnemonic, \ 2475 input, sizeof(input) / sizeof(input[0]), \ 2476 kExpected_##mnemonic##_##variant, \ 2477 kExpectedCount_##mnemonic##_##variant) 2478 2479#define DEFINE_TEST_FP(mnemonic, type, input) \ 2480 TEST(mnemonic##_d) { \ 2481 CALL_TEST_FP_HELPER(mnemonic, d, type, kInputDouble##input); \ 2482 } \ 2483 TEST(mnemonic##_s) { \ 2484 CALL_TEST_FP_HELPER(mnemonic, s, type, kInputFloat##input); \ 2485 } 2486 2487// TODO: Test with a newer version of valgrind. 2488// 2489// Note: valgrind-3.10.0 does not properly interpret libm's fma() on x86_64. 2490// Therefore this test will be exiting though an ASSERT and thus leaking 2491// memory. 2492DEFINE_TEST_FP(fmadd, 3Op, Basic) 2493DEFINE_TEST_FP(fmsub, 3Op, Basic) 2494DEFINE_TEST_FP(fnmadd, 3Op, Basic) 2495DEFINE_TEST_FP(fnmsub, 3Op, Basic) 2496 2497DEFINE_TEST_FP(fadd, 2Op, Basic) 2498DEFINE_TEST_FP(fdiv, 2Op, Basic) 2499DEFINE_TEST_FP(fmax, 2Op, Basic) 2500DEFINE_TEST_FP(fmaxnm, 2Op, Basic) 2501DEFINE_TEST_FP(fmin, 2Op, Basic) 2502DEFINE_TEST_FP(fminnm, 2Op, Basic) 2503DEFINE_TEST_FP(fmul, 2Op, Basic) 2504DEFINE_TEST_FP(fsub, 2Op, Basic) 2505DEFINE_TEST_FP(fnmul, 2Op, Basic) 2506 2507DEFINE_TEST_FP(fabs, 1Op, Basic) 2508DEFINE_TEST_FP(fmov, 1Op, Basic) 2509DEFINE_TEST_FP(fneg, 1Op, Basic) 2510DEFINE_TEST_FP(fsqrt, 1Op, Basic) 2511DEFINE_TEST_FP(frinta, 1Op, Conversions) 2512DEFINE_TEST_FP(frinti, 1Op, Conversions) 2513DEFINE_TEST_FP(frintm, 1Op, Conversions) 2514DEFINE_TEST_FP(frintn, 1Op, Conversions) 2515DEFINE_TEST_FP(frintp, 1Op, Conversions) 2516DEFINE_TEST_FP(frintx, 1Op, Conversions) 2517DEFINE_TEST_FP(frintz, 1Op, Conversions) 2518 2519TEST(fcmp_d) { CALL_TEST_FP_HELPER(fcmp, d, Cmp, kInputDoubleBasic); } 2520TEST(fcmp_s) { CALL_TEST_FP_HELPER(fcmp, s, Cmp, kInputFloatBasic); } 2521TEST(fcmp_dz) { CALL_TEST_FP_HELPER(fcmp, dz, CmpZero, kInputDoubleBasic); } 2522TEST(fcmp_sz) { CALL_TEST_FP_HELPER(fcmp, sz, CmpZero, kInputFloatBasic); } 2523 2524TEST(fcvt_sd) { CALL_TEST_FP_HELPER(fcvt, sd, 1Op, kInputDoubleConversions); } 2525TEST(fcvt_ds) { CALL_TEST_FP_HELPER(fcvt, ds, 1Op, kInputFloatConversions); } 2526 2527#define DEFINE_TEST_FP_TO_INT(mnemonic, type, input) \ 2528 TEST(mnemonic##_xd) { \ 2529 CALL_TEST_FP_HELPER(mnemonic, xd, type, kInputDouble##input); \ 2530 } \ 2531 TEST(mnemonic##_xs) { \ 2532 CALL_TEST_FP_HELPER(mnemonic, xs, type, kInputFloat##input); \ 2533 } \ 2534 TEST(mnemonic##_wd) { \ 2535 CALL_TEST_FP_HELPER(mnemonic, wd, type, kInputDouble##input); \ 2536 } \ 2537 TEST(mnemonic##_ws) { \ 2538 CALL_TEST_FP_HELPER(mnemonic, ws, type, kInputFloat##input); \ 2539 } 2540 2541DEFINE_TEST_FP_TO_INT(fcvtas, FPToS, Conversions) 2542DEFINE_TEST_FP_TO_INT(fcvtau, FPToU, Conversions) 2543DEFINE_TEST_FP_TO_INT(fcvtms, FPToS, Conversions) 2544DEFINE_TEST_FP_TO_INT(fcvtmu, FPToU, Conversions) 2545DEFINE_TEST_FP_TO_INT(fcvtns, FPToS, Conversions) 2546DEFINE_TEST_FP_TO_INT(fcvtnu, FPToU, Conversions) 2547DEFINE_TEST_FP_TO_INT(fcvtzs, FPToFixedS, Conversions) 2548DEFINE_TEST_FP_TO_INT(fcvtzu, FPToFixedU, Conversions) 2549 2550// TODO: Scvtf-fixed-point 2551// TODO: Scvtf-integer 2552// TODO: Ucvtf-fixed-point 2553// TODO: Ucvtf-integer 2554 2555// TODO: Fccmp 2556// TODO: Fcsel 2557 2558 2559// ==== NEON Tests. ==== 2560 2561#define CALL_TEST_NEON_HELPER_1Op(mnemonic, \ 2562 vdform, vnform, \ 2563 input_n) \ 2564 Test1OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), \ 2565 &MacroAssembler::mnemonic, \ 2566 input_n, \ 2567 (sizeof(input_n) / sizeof(input_n[0])), \ 2568 kExpected_NEON_##mnemonic##_##vdform, \ 2569 kExpectedCount_NEON_##mnemonic##_##vdform, \ 2570 kFormat##vdform, \ 2571 kFormat##vnform) 2572 2573#define CALL_TEST_NEON_HELPER_1OpAcross(mnemonic, \ 2574 vdform, vnform, \ 2575 input_n) \ 2576 Test1OpAcrossNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) \ 2577 "_" STRINGIFY(vnform), \ 2578 &MacroAssembler::mnemonic, \ 2579 input_n, \ 2580 (sizeof(input_n) / sizeof(input_n[0])), \ 2581 kExpected_NEON_##mnemonic##_##vdform##_##vnform, \ 2582 kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform, \ 2583 kFormat##vdform, \ 2584 kFormat##vnform) 2585 2586#define CALL_TEST_NEON_HELPER_2Op(mnemonic, \ 2587 vdform, vnform, vmform, \ 2588 input_d, input_n, input_m) \ 2589 Test2OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), \ 2590 &MacroAssembler::mnemonic, \ 2591 input_d, \ 2592 input_n, \ 2593 (sizeof(input_n) / sizeof(input_n[0])), \ 2594 input_m, \ 2595 (sizeof(input_m) / sizeof(input_m[0])), \ 2596 kExpected_NEON_##mnemonic##_##vdform, \ 2597 kExpectedCount_NEON_##mnemonic##_##vdform, \ 2598 kFormat##vdform, \ 2599 kFormat##vnform, \ 2600 kFormat##vmform) 2601 2602#define CALL_TEST_NEON_HELPER_2OpImm(mnemonic, \ 2603 vdform, vnform, \ 2604 input_n, input_m) \ 2605 Test2OpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_2OPIMM", \ 2606 &MacroAssembler::mnemonic, \ 2607 input_n, \ 2608 (sizeof(input_n) / sizeof(input_n[0])), \ 2609 input_m, \ 2610 (sizeof(input_m) / sizeof(input_m[0])), \ 2611 kExpected_NEON_##mnemonic##_##vdform##_2OPIMM, \ 2612 kExpectedCount_NEON_##mnemonic##_##vdform##_2OPIMM, \ 2613 kFormat##vdform, \ 2614 kFormat##vnform) 2615 2616#define CALL_TEST_NEON_HELPER_ByElement(mnemonic, \ 2617 vdform, vnform, vmform, \ 2618 input_d, input_n, input_m, indices) \ 2619 TestByElementNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) \ 2620 "_" STRINGIFY(vnform) "_" STRINGIFY(vmform), \ 2621 &MacroAssembler::mnemonic, \ 2622 input_d, \ 2623 input_n, \ 2624 (sizeof(input_n) / sizeof(input_n[0])), \ 2625 input_m, \ 2626 (sizeof(input_m) / sizeof(input_m[0])), \ 2627 indices, \ 2628 (sizeof(indices) / sizeof(indices[0])), \ 2629 kExpected_NEON_##mnemonic##_##vdform##_##vnform##_##vmform, \ 2630 kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform##_##vmform, \ 2631 kFormat##vdform, \ 2632 kFormat##vnform, \ 2633 kFormat##vmform) 2634 2635#define CALL_TEST_NEON_HELPER_OpImmOpImm(helper, \ 2636 mnemonic, \ 2637 vdform, vnform, \ 2638 input_d, input_imm1, \ 2639 input_n, input_imm2) \ 2640 TestOpImmOpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), \ 2641 helper, \ 2642 input_d, \ 2643 input_imm1, \ 2644 (sizeof(input_imm1) / sizeof(input_imm1[0])), \ 2645 input_n, \ 2646 (sizeof(input_n) / sizeof(input_n[0])), \ 2647 input_imm2, \ 2648 (sizeof(input_imm2) / sizeof(input_imm2[0])), \ 2649 kExpected_NEON_##mnemonic##_##vdform, \ 2650 kExpectedCount_NEON_##mnemonic##_##vdform, \ 2651 kFormat##vdform, \ 2652 kFormat##vnform) 2653 2654#define CALL_TEST_NEON_HELPER_2SAME(mnemonic, variant, input) \ 2655 CALL_TEST_NEON_HELPER_1Op(mnemonic, \ 2656 variant, variant, \ 2657 input) 2658 2659#define DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input) \ 2660 TEST(mnemonic##_8B) { \ 2661 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8B, kInput8bits##input); \ 2662 } \ 2663 TEST(mnemonic##_16B) { \ 2664 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 16B, kInput8bits##input); \ 2665 } 2666 2667#define DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input) \ 2668 TEST(mnemonic##_4H) { \ 2669 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4H, kInput16bits##input); \ 2670 } \ 2671 TEST(mnemonic##_8H) { \ 2672 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8H, kInput16bits##input); \ 2673 } 2674 2675#define DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input) \ 2676 TEST(mnemonic##_2S) { \ 2677 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInput32bits##input); \ 2678 } \ 2679 TEST(mnemonic##_4S) { \ 2680 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInput32bits##input); \ 2681 } 2682 2683#define DEFINE_TEST_NEON_2SAME_BH(mnemonic, input) \ 2684 DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input) \ 2685 DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input) 2686 2687#define DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input) \ 2688 DEFINE_TEST_NEON_2SAME_BH(mnemonic, input) \ 2689 DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input) 2690 2691#define DEFINE_TEST_NEON_2SAME(mnemonic, input) \ 2692 DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input) \ 2693 TEST(mnemonic##_2D) { \ 2694 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input); \ 2695 } 2696#define DEFINE_TEST_NEON_2SAME_SD(mnemonic, input) \ 2697 DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input) \ 2698 TEST(mnemonic##_2D) { \ 2699 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input); \ 2700 } 2701 2702#define DEFINE_TEST_NEON_2SAME_FP(mnemonic, input) \ 2703 TEST(mnemonic##_2S) { \ 2704 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInputFloat##input); \ 2705 } \ 2706 TEST(mnemonic##_4S) { \ 2707 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInputFloat##input); \ 2708 } \ 2709 TEST(mnemonic##_2D) { \ 2710 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInputDouble##input); \ 2711 } 2712 2713#define DEFINE_TEST_NEON_2SAME_FP_SCALAR(mnemonic, input) \ 2714 TEST(mnemonic##_S) { \ 2715 CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInputFloat##input); \ 2716 } \ 2717 TEST(mnemonic##_D) { \ 2718 CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInputDouble##input); \ 2719 } 2720 2721#define DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input) \ 2722 TEST(mnemonic##_B) { \ 2723 CALL_TEST_NEON_HELPER_2SAME(mnemonic, B, kInput8bits##input); \ 2724 } 2725#define DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input) \ 2726 TEST(mnemonic##_H) { \ 2727 CALL_TEST_NEON_HELPER_2SAME(mnemonic, H, kInput16bits##input); \ 2728 } 2729#define DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input) \ 2730 TEST(mnemonic##_S) { \ 2731 CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInput32bits##input); \ 2732 } 2733#define DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input) \ 2734 TEST(mnemonic##_D) { \ 2735 CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInput64bits##input); \ 2736 } 2737 2738#define DEFINE_TEST_NEON_2SAME_SCALAR(mnemonic, input) \ 2739 DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input) \ 2740 DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input) \ 2741 DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input) \ 2742 DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input) 2743 2744#define DEFINE_TEST_NEON_2SAME_SCALAR_SD(mnemonic, input) \ 2745 DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input) \ 2746 DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input) 2747 2748 2749#define CALL_TEST_NEON_HELPER_ACROSS(mnemonic, vd_form, vn_form, input_n) \ 2750 CALL_TEST_NEON_HELPER_1OpAcross(mnemonic, \ 2751 vd_form, vn_form, \ 2752 input_n) 2753 2754#define DEFINE_TEST_NEON_ACROSS(mnemonic, input) \ 2755 TEST(mnemonic##_B_8B) { \ 2756 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 8B, kInput8bits##input); \ 2757 } \ 2758 TEST(mnemonic##_B_16B) { \ 2759 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 16B, kInput8bits##input); \ 2760 } \ 2761 TEST(mnemonic##_H_4H) { \ 2762 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 4H, kInput16bits##input); \ 2763 } \ 2764 TEST(mnemonic##_H_8H) { \ 2765 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8H, kInput16bits##input); \ 2766 } \ 2767 TEST(mnemonic##_S_4S) { \ 2768 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInput32bits##input); \ 2769 } 2770 2771#define DEFINE_TEST_NEON_ACROSS_LONG(mnemonic, input) \ 2772 TEST(mnemonic##_H_8B) { \ 2773 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8B, kInput8bits##input); \ 2774 } \ 2775 TEST(mnemonic##_H_16B) { \ 2776 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 16B, kInput8bits##input); \ 2777 } \ 2778 TEST(mnemonic##_S_4H) { \ 2779 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4H, kInput16bits##input); \ 2780 } \ 2781 TEST(mnemonic##_S_8H) { \ 2782 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 8H, kInput16bits##input); \ 2783 } \ 2784 TEST(mnemonic##_D_4S) { \ 2785 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, D, 4S, kInput32bits##input); \ 2786 } 2787 2788#define DEFINE_TEST_NEON_ACROSS_FP(mnemonic, input) \ 2789 TEST(mnemonic##_S_4S) { \ 2790 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInputFloat##input); \ 2791 } 2792 2793#define CALL_TEST_NEON_HELPER_2DIFF(mnemonic, \ 2794 vdform, vnform, \ 2795 input_n) \ 2796 CALL_TEST_NEON_HELPER_1Op(mnemonic, \ 2797 vdform, vnform, \ 2798 input_n) 2799 2800#define DEFINE_TEST_NEON_2DIFF_LONG(mnemonic, input) \ 2801 TEST(mnemonic##_4H) { \ 2802 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 8B, kInput8bits##input); \ 2803 } \ 2804 TEST(mnemonic##_8H) { \ 2805 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8H, 16B, kInput8bits##input); \ 2806 } \ 2807 TEST(mnemonic##_2S) { \ 2808 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 4H, kInput16bits##input); \ 2809 } \ 2810 TEST(mnemonic##_4S) { \ 2811 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 8H, kInput16bits##input); \ 2812 } \ 2813 TEST(mnemonic##_1D) { \ 2814 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 1D, 2S, kInput32bits##input); \ 2815 } \ 2816 TEST(mnemonic##_2D) { \ 2817 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 4S, kInput32bits##input); \ 2818 } 2819 2820#define DEFINE_TEST_NEON_2DIFF_NARROW(mnemonic, input) \ 2821 TEST(mnemonic##_8B) { \ 2822 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8B, 8H, kInput16bits##input); \ 2823 } \ 2824 TEST(mnemonic##_4H) { \ 2825 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInput32bits##input); \ 2826 } \ 2827 TEST(mnemonic##_2S) { \ 2828 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInput64bits##input); \ 2829 } \ 2830 TEST(mnemonic##2_16B) { \ 2831 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 16B, 8H, kInput16bits##input);\ 2832 } \ 2833 TEST(mnemonic##2_8H) { \ 2834 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInput32bits##input); \ 2835 } \ 2836 TEST(mnemonic##2_4S) { \ 2837 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInput64bits##input); \ 2838 } 2839 2840#define DEFINE_TEST_NEON_2DIFF_FP_LONG(mnemonic, input) \ 2841 TEST(mnemonic##_4S) { \ 2842 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 4H, kInputFloat16##input); \ 2843 } \ 2844 TEST(mnemonic##_2D) { \ 2845 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 2S, kInputFloat##input); \ 2846 } \ 2847 TEST(mnemonic##2_4S) { \ 2848 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 8H, kInputFloat16##input);\ 2849 } \ 2850 TEST(mnemonic##2_2D) { \ 2851 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 2D, 4S, kInputFloat##input); \ 2852 } 2853 2854#define DEFINE_TEST_NEON_2DIFF_FP_NARROW(mnemonic, input) \ 2855 TEST(mnemonic##_4H) { \ 2856 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInputFloat##input); \ 2857 } \ 2858 TEST(mnemonic##_2S) { \ 2859 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input); \ 2860 } \ 2861 TEST(mnemonic##2_8H) { \ 2862 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInputFloat##input); \ 2863 } \ 2864 TEST(mnemonic##2_4S) { \ 2865 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \ 2866 } 2867 2868#define DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(mnemonic, input) \ 2869 TEST(mnemonic##_2S) { \ 2870 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input); \ 2871 } \ 2872 TEST(mnemonic##2_4S) { \ 2873 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \ 2874 } 2875 2876#define DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(mnemonic, input) \ 2877 TEST(mnemonic##_B) { \ 2878 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, B, H, kInput16bits##input); \ 2879 } \ 2880 TEST(mnemonic##_H) { \ 2881 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, H, S, kInput32bits##input); \ 2882 } \ 2883 TEST(mnemonic##_S) { \ 2884 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, D, kInput64bits##input); \ 2885 } 2886 2887#define DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(mnemonic, input) \ 2888 TEST(mnemonic##_S) { \ 2889 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, 2S, kInputFloat##input); \ 2890 } \ 2891 TEST(mnemonic##_D) { \ 2892 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, D, 2D, kInputDouble##input); \ 2893 } 2894 2895#define CALL_TEST_NEON_HELPER_3SAME(mnemonic, variant, input_d, input_nm) { \ 2896 CALL_TEST_NEON_HELPER_2Op(mnemonic, \ 2897 variant, variant, variant, \ 2898 input_d, input_nm, input_nm); \ 2899 } 2900 2901#define DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input) \ 2902 TEST(mnemonic##_8B) { \ 2903 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 8B, \ 2904 kInput8bitsAccDestination, \ 2905 kInput8bits##input); \ 2906 } \ 2907 TEST(mnemonic##_16B) { \ 2908 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 16B, \ 2909 kInput8bitsAccDestination, \ 2910 kInput8bits##input); \ 2911 } \ 2912 2913#define DEFINE_TEST_NEON_3SAME_HS(mnemonic, input) \ 2914 TEST(mnemonic##_4H) { \ 2915 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 4H, \ 2916 kInput16bitsAccDestination, \ 2917 kInput16bits##input); \ 2918 } \ 2919 TEST(mnemonic##_8H) { \ 2920 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 8H, \ 2921 kInput16bitsAccDestination, \ 2922 kInput16bits##input); \ 2923 } \ 2924 TEST(mnemonic##_2S) { \ 2925 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2S, \ 2926 kInput32bitsAccDestination, \ 2927 kInput32bits##input); \ 2928 } \ 2929 TEST(mnemonic##_4S) { \ 2930 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 4S, \ 2931 kInput32bitsAccDestination, \ 2932 kInput32bits##input); \ 2933 } 2934 2935#define DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input) \ 2936 DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input) \ 2937 DEFINE_TEST_NEON_3SAME_HS(mnemonic, input) 2938 2939#define DEFINE_TEST_NEON_3SAME(mnemonic, input) \ 2940 DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input) \ 2941 TEST(mnemonic##_2D) { \ 2942 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2D, \ 2943 kInput64bitsAccDestination, \ 2944 kInput64bits##input); \ 2945 } 2946 2947#define DEFINE_TEST_NEON_3SAME_FP(mnemonic, input) \ 2948 TEST(mnemonic##_2S) { \ 2949 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2S, \ 2950 kInputFloatAccDestination, \ 2951 kInputFloat##input); \ 2952 } \ 2953 TEST(mnemonic##_4S) { \ 2954 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 4S, \ 2955 kInputFloatAccDestination, \ 2956 kInputFloat##input); \ 2957 } \ 2958 TEST(mnemonic##_2D) { \ 2959 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2D, \ 2960 kInputDoubleAccDestination, \ 2961 kInputDouble##input); \ 2962 } 2963 2964#define DEFINE_TEST_NEON_3SAME_SCALAR_D(mnemonic, input) \ 2965 TEST(mnemonic##_D) { \ 2966 CALL_TEST_NEON_HELPER_3SAME(mnemonic, D, \ 2967 kInput64bitsAccDestination, \ 2968 kInput64bits##input); \ 2969 } 2970 2971#define DEFINE_TEST_NEON_3SAME_SCALAR_HS(mnemonic, input) \ 2972 TEST(mnemonic##_H) { \ 2973 CALL_TEST_NEON_HELPER_3SAME(mnemonic, H, \ 2974 kInput16bitsAccDestination, \ 2975 kInput16bits##input); \ 2976 } \ 2977 TEST(mnemonic##_S) { \ 2978 CALL_TEST_NEON_HELPER_3SAME(mnemonic, S, \ 2979 kInput32bitsAccDestination, \ 2980 kInput32bits##input); \ 2981 } \ 2982 2983#define DEFINE_TEST_NEON_3SAME_SCALAR(mnemonic, input) \ 2984 TEST(mnemonic##_B) { \ 2985 CALL_TEST_NEON_HELPER_3SAME(mnemonic, B, \ 2986 kInput8bitsAccDestination, \ 2987 kInput8bits##input); \ 2988 } \ 2989 TEST(mnemonic##_H) { \ 2990 CALL_TEST_NEON_HELPER_3SAME(mnemonic, H, \ 2991 kInput16bitsAccDestination, \ 2992 kInput16bits##input); \ 2993 } \ 2994 TEST(mnemonic##_S) { \ 2995 CALL_TEST_NEON_HELPER_3SAME(mnemonic, S, \ 2996 kInput32bitsAccDestination, \ 2997 kInput32bits##input); \ 2998 } \ 2999 TEST(mnemonic##_D) { \ 3000 CALL_TEST_NEON_HELPER_3SAME(mnemonic, D, \ 3001 kInput64bitsAccDestination, \ 3002 kInput64bits##input); \ 3003 } 3004 3005#define DEFINE_TEST_NEON_3SAME_FP_SCALAR(mnemonic, input) \ 3006 TEST(mnemonic##_S) { \ 3007 CALL_TEST_NEON_HELPER_3SAME(mnemonic, S, \ 3008 kInputFloatAccDestination, \ 3009 kInputFloat##input); \ 3010 } \ 3011 TEST(mnemonic##_D) { \ 3012 CALL_TEST_NEON_HELPER_3SAME(mnemonic, D, \ 3013 kInputDoubleAccDestination, \ 3014 kInputDouble##input); \ 3015 } 3016 3017#define CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \ 3018 vdform, vnform, vmform, \ 3019 input_d, input_n, input_m) { \ 3020 CALL_TEST_NEON_HELPER_2Op(mnemonic, \ 3021 vdform, vnform, vmform, \ 3022 input_d, input_n, input_m); \ 3023 } 3024 3025#define DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input) \ 3026 TEST(mnemonic##_8H) { \ 3027 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 8H, 8B, 8B, \ 3028 kInput16bitsAccDestination, \ 3029 kInput8bits##input, kInput8bits##input); \ 3030 } \ 3031 TEST(mnemonic##2_8H) { \ 3032 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 8H, 16B, 16B, \ 3033 kInput16bitsAccDestination, \ 3034 kInput8bits##input, kInput8bits##input); \ 3035 } 3036 3037#define DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input) \ 3038 TEST(mnemonic##_4S) { \ 3039 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 4S, 4H, 4H, \ 3040 kInput32bitsAccDestination, \ 3041 kInput16bits##input, kInput16bits##input); \ 3042 } \ 3043 TEST(mnemonic##2_4S) { \ 3044 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 4S, 8H, 8H, \ 3045 kInput32bitsAccDestination, \ 3046 kInput16bits##input, kInput16bits##input); \ 3047 } 3048 3049#define DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input) \ 3050 TEST(mnemonic##_2D) { \ 3051 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 2D, 2S, 2S, \ 3052 kInput64bitsAccDestination, \ 3053 kInput32bits##input, kInput32bits##input); \ 3054 } \ 3055 TEST(mnemonic##2_2D) { \ 3056 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 2D, 4S, 4S, \ 3057 kInput64bitsAccDestination, \ 3058 kInput32bits##input, kInput32bits##input); \ 3059 } 3060 3061#define DEFINE_TEST_NEON_3DIFF_LONG_SD(mnemonic, input) \ 3062 DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input) \ 3063 DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input) 3064 3065#define DEFINE_TEST_NEON_3DIFF_LONG(mnemonic, input) \ 3066 DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input) \ 3067 DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input) \ 3068 DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input) 3069 3070#define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input) \ 3071 TEST(mnemonic##_S) { \ 3072 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, S, H, H, \ 3073 kInput32bitsAccDestination, \ 3074 kInput16bits##input, \ 3075 kInput16bits##input); \ 3076 } 3077 3078#define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input) \ 3079 TEST(mnemonic##_D) { \ 3080 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, D, S, S, \ 3081 kInput64bitsAccDestination, \ 3082 kInput32bits##input, \ 3083 kInput32bits##input); \ 3084 } 3085 3086#define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(mnemonic, input) \ 3087 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input) \ 3088 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input) 3089 3090#define DEFINE_TEST_NEON_3DIFF_WIDE(mnemonic, input) \ 3091 TEST(mnemonic##_8H) { \ 3092 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 8H, 8H, 8B, \ 3093 kInput16bitsAccDestination, \ 3094 kInput16bits##input, kInput8bits##input); \ 3095 } \ 3096 TEST(mnemonic##_4S) { \ 3097 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 4S, 4S, 4H, \ 3098 kInput32bitsAccDestination, \ 3099 kInput32bits##input, kInput16bits##input); \ 3100 } \ 3101 TEST(mnemonic##_2D) { \ 3102 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 2D, 2D, 2S, \ 3103 kInput64bitsAccDestination, \ 3104 kInput64bits##input, kInput32bits##input); \ 3105 } \ 3106 TEST(mnemonic##2_8H) { \ 3107 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 8H, 8H, 16B, \ 3108 kInput16bitsAccDestination, \ 3109 kInput16bits##input, kInput8bits##input); \ 3110 } \ 3111 TEST(mnemonic##2_4S) { \ 3112 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 4S, 4S, 8H, \ 3113 kInput32bitsAccDestination, \ 3114 kInput32bits##input, kInput16bits##input); \ 3115 } \ 3116 TEST(mnemonic##2_2D) { \ 3117 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 2D, 2D, 4S, \ 3118 kInput64bitsAccDestination, \ 3119 kInput64bits##input, kInput32bits##input); \ 3120 } 3121 3122#define DEFINE_TEST_NEON_3DIFF_NARROW(mnemonic, input) \ 3123 TEST(mnemonic##_8B) { \ 3124 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 8B, 8H, 8H, \ 3125 kInput8bitsAccDestination, \ 3126 kInput16bits##input, kInput16bits##input); \ 3127 } \ 3128 TEST(mnemonic##_4H) { \ 3129 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 4H, 4S, 4S, \ 3130 kInput16bitsAccDestination, \ 3131 kInput32bits##input, kInput32bits##input); \ 3132 } \ 3133 TEST(mnemonic##_2S) { \ 3134 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 2S, 2D, 2D, \ 3135 kInput32bitsAccDestination, \ 3136 kInput64bits##input, kInput64bits##input); \ 3137 } \ 3138 TEST(mnemonic##2_16B) { \ 3139 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 16B, 8H, 8H, \ 3140 kInput8bitsAccDestination, \ 3141 kInput16bits##input, kInput16bits##input); \ 3142 } \ 3143 TEST(mnemonic##2_8H) { \ 3144 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 8H, 4S, 4S, \ 3145 kInput16bitsAccDestination, \ 3146 kInput32bits##input, kInput32bits##input); \ 3147 } \ 3148 TEST(mnemonic##2_4S) { \ 3149 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 4S, 2D, 2D, \ 3150 kInput32bitsAccDestination, \ 3151 kInput64bits##input, kInput64bits##input); \ 3152 } 3153 3154#define CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3155 vdform, vnform, \ 3156 input_n, \ 3157 input_imm) { \ 3158 CALL_TEST_NEON_HELPER_2OpImm(mnemonic, \ 3159 vdform, vnform, \ 3160 input_n, input_imm); \ 3161 } 3162 3163#define DEFINE_TEST_NEON_2OPIMM(mnemonic, input, input_imm) \ 3164 TEST(mnemonic##_8B_2OPIMM) { \ 3165 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3166 8B, 8B, \ 3167 kInput8bits##input, \ 3168 kInput8bitsImm##input_imm); \ 3169 } \ 3170 TEST(mnemonic##_16B_2OPIMM) { \ 3171 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3172 16B, 16B, \ 3173 kInput8bits##input, \ 3174 kInput8bitsImm##input_imm); \ 3175 } \ 3176 TEST(mnemonic##_4H_2OPIMM) { \ 3177 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3178 4H, 4H, \ 3179 kInput16bits##input, \ 3180 kInput16bitsImm##input_imm); \ 3181 } \ 3182 TEST(mnemonic##_8H_2OPIMM) { \ 3183 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3184 8H, 8H, \ 3185 kInput16bits##input, \ 3186 kInput16bitsImm##input_imm); \ 3187 } \ 3188 TEST(mnemonic##_2S_2OPIMM) { \ 3189 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3190 2S, 2S, \ 3191 kInput32bits##input, \ 3192 kInput32bitsImm##input_imm); \ 3193 } \ 3194 TEST(mnemonic##_4S_2OPIMM) { \ 3195 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3196 4S, 4S, \ 3197 kInput32bits##input, \ 3198 kInput32bitsImm##input_imm); \ 3199 } \ 3200 TEST(mnemonic##_2D_2OPIMM) { \ 3201 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3202 2D, 2D, \ 3203 kInput64bits##input, \ 3204 kInput64bitsImm##input_imm); \ 3205 } 3206 3207#define DEFINE_TEST_NEON_2OPIMM_COPY(mnemonic, input, input_imm) \ 3208 TEST(mnemonic##_8B_2OPIMM) { \ 3209 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3210 8B, B, \ 3211 kInput8bits##input, \ 3212 kInput8bitsImm##input_imm); \ 3213 } \ 3214 TEST(mnemonic##_16B_2OPIMM) { \ 3215 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3216 16B, B, \ 3217 kInput8bits##input, \ 3218 kInput8bitsImm##input_imm); \ 3219 } \ 3220 TEST(mnemonic##_4H_2OPIMM) { \ 3221 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3222 4H, H, \ 3223 kInput16bits##input, \ 3224 kInput16bitsImm##input_imm); \ 3225 } \ 3226 TEST(mnemonic##_8H_2OPIMM) { \ 3227 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3228 8H, H, \ 3229 kInput16bits##input, \ 3230 kInput16bitsImm##input_imm); \ 3231 } \ 3232 TEST(mnemonic##_2S_2OPIMM) { \ 3233 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3234 2S, S, \ 3235 kInput32bits##input, \ 3236 kInput32bitsImm##input_imm); \ 3237 } \ 3238 TEST(mnemonic##_4S_2OPIMM) { \ 3239 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3240 4S, S, \ 3241 kInput32bits##input, \ 3242 kInput32bitsImm##input_imm); \ 3243 } \ 3244 TEST(mnemonic##_2D_2OPIMM) { \ 3245 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3246 2D, D, \ 3247 kInput64bits##input, \ 3248 kInput64bitsImm##input_imm); \ 3249 } 3250 3251#define DEFINE_TEST_NEON_2OPIMM_NARROW(mnemonic, input, input_imm) \ 3252 TEST(mnemonic##_8B_2OPIMM) { \ 3253 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3254 8B, 8H, \ 3255 kInput16bits##input, \ 3256 kInput8bitsImm##input_imm); \ 3257 } \ 3258 TEST(mnemonic##_4H_2OPIMM) { \ 3259 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3260 4H, 4S, \ 3261 kInput32bits##input, \ 3262 kInput16bitsImm##input_imm); \ 3263 } \ 3264 TEST(mnemonic##_2S_2OPIMM) { \ 3265 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3266 2S, 2D, \ 3267 kInput64bits##input, \ 3268 kInput32bitsImm##input_imm); \ 3269 } \ 3270 TEST(mnemonic##2_16B_2OPIMM) { \ 3271 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \ 3272 16B, 8H, \ 3273 kInput16bits##input, \ 3274 kInput8bitsImm##input_imm); \ 3275 } \ 3276 TEST(mnemonic##2_8H_2OPIMM) { \ 3277 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \ 3278 8H, 4S, \ 3279 kInput32bits##input, \ 3280 kInput16bitsImm##input_imm); \ 3281 } \ 3282 TEST(mnemonic##2_4S_2OPIMM) { \ 3283 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \ 3284 4S, 2D, \ 3285 kInput64bits##input, \ 3286 kInput32bitsImm##input_imm); \ 3287 } 3288 3289#define DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(mnemonic, input, input_imm) \ 3290 TEST(mnemonic##_B_2OPIMM) { \ 3291 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3292 B, H, \ 3293 kInput16bits##input, \ 3294 kInput8bitsImm##input_imm); \ 3295 } \ 3296 TEST(mnemonic##_H_2OPIMM) { \ 3297 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3298 H, S, \ 3299 kInput32bits##input, \ 3300 kInput16bitsImm##input_imm); \ 3301 } \ 3302 TEST(mnemonic##_S_2OPIMM) { \ 3303 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3304 S, D, \ 3305 kInput64bits##input, \ 3306 kInput32bitsImm##input_imm); \ 3307 } 3308 3309#define DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(mnemonic, input, input_imm) \ 3310 TEST(mnemonic##_2S_2OPIMM) { \ 3311 CALL_TEST_NEON_HELPER_2OPIMM( \ 3312 mnemonic, \ 3313 2S, 2S, \ 3314 kInputFloat##Basic, \ 3315 kInputDoubleImm##input_imm) \ 3316 } \ 3317 TEST(mnemonic##_4S_2OPIMM) { \ 3318 CALL_TEST_NEON_HELPER_2OPIMM( \ 3319 mnemonic, \ 3320 4S, 4S, \ 3321 kInputFloat##input, \ 3322 kInputDoubleImm##input_imm); \ 3323 } \ 3324 TEST(mnemonic##_2D_2OPIMM) { \ 3325 CALL_TEST_NEON_HELPER_2OPIMM( \ 3326 mnemonic, \ 3327 2D, 2D, \ 3328 kInputDouble##input, \ 3329 kInputDoubleImm##input_imm); \ 3330 } 3331 3332#define DEFINE_TEST_NEON_2OPIMM_FP(mnemonic, input, input_imm) \ 3333 TEST(mnemonic##_2S_2OPIMM) { \ 3334 CALL_TEST_NEON_HELPER_2OPIMM( \ 3335 mnemonic, \ 3336 2S, 2S, \ 3337 kInputFloat##Basic, \ 3338 kInput32bitsImm##input_imm) \ 3339 } \ 3340 TEST(mnemonic##_4S_2OPIMM) { \ 3341 CALL_TEST_NEON_HELPER_2OPIMM( \ 3342 mnemonic, \ 3343 4S, 4S, \ 3344 kInputFloat##input, \ 3345 kInput32bitsImm##input_imm) \ 3346 } \ 3347 TEST(mnemonic##_2D_2OPIMM) { \ 3348 CALL_TEST_NEON_HELPER_2OPIMM( \ 3349 mnemonic, \ 3350 2D, 2D, \ 3351 kInputDouble##input, \ 3352 kInput64bitsImm##input_imm) \ 3353 } 3354 3355#define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(mnemonic, input, input_imm) \ 3356 TEST(mnemonic##_S_2OPIMM) { \ 3357 CALL_TEST_NEON_HELPER_2OPIMM( \ 3358 mnemonic, \ 3359 S, S, \ 3360 kInputFloat##Basic, \ 3361 kInput32bitsImm##input_imm) \ 3362 } \ 3363 TEST(mnemonic##_D_2OPIMM) { \ 3364 CALL_TEST_NEON_HELPER_2OPIMM( \ 3365 mnemonic, \ 3366 D, D, \ 3367 kInputDouble##input, \ 3368 kInput64bitsImm##input_imm) \ 3369 } 3370 3371#define DEFINE_TEST_NEON_2OPIMM_SD(mnemonic, input, input_imm) \ 3372 TEST(mnemonic##_2S_2OPIMM) { \ 3373 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3374 2S, 2S, \ 3375 kInput32bits##input, \ 3376 kInput32bitsImm##input_imm); \ 3377 } \ 3378 TEST(mnemonic##_4S_2OPIMM) { \ 3379 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3380 4S, 4S, \ 3381 kInput32bits##input, \ 3382 kInput32bitsImm##input_imm); \ 3383 } \ 3384 TEST(mnemonic##_2D_2OPIMM) { \ 3385 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3386 2D, 2D, \ 3387 kInput64bits##input, \ 3388 kInput64bitsImm##input_imm); \ 3389 } 3390 3391#define DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm) \ 3392 TEST(mnemonic##_D_2OPIMM) { \ 3393 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3394 D, D, \ 3395 kInput64bits##input, \ 3396 kInput64bitsImm##input_imm); \ 3397 } 3398 3399#define DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(mnemonic, input, input_imm) \ 3400 TEST(mnemonic##_S_2OPIMM) { \ 3401 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3402 S, S, \ 3403 kInput32bits##input, \ 3404 kInput32bitsImm##input_imm); \ 3405 } \ 3406 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm) 3407 3408#define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm) \ 3409 TEST(mnemonic##_D_2OPIMM) { \ 3410 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3411 D, D, \ 3412 kInputDouble##input, \ 3413 kInputDoubleImm##input_imm); \ 3414 } 3415 3416#define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(mnemonic, input, input_imm) \ 3417 TEST(mnemonic##_S_2OPIMM) { \ 3418 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3419 S, S, \ 3420 kInputFloat##input, \ 3421 kInputDoubleImm##input_imm); \ 3422 } \ 3423 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm) 3424 3425#define DEFINE_TEST_NEON_2OPIMM_SCALAR(mnemonic, input, input_imm) \ 3426 TEST(mnemonic##_B_2OPIMM) { \ 3427 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3428 B, B, \ 3429 kInput8bits##input, \ 3430 kInput8bitsImm##input_imm); \ 3431 } \ 3432 TEST(mnemonic##_H_2OPIMM) { \ 3433 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3434 H, H, \ 3435 kInput16bits##input, \ 3436 kInput16bitsImm##input_imm); \ 3437 } \ 3438 DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(mnemonic, input, input_imm) 3439 3440#define DEFINE_TEST_NEON_2OPIMM_LONG(mnemonic, input, input_imm) \ 3441 TEST(mnemonic##_8H_2OPIMM) { \ 3442 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3443 8H, 8B, \ 3444 kInput8bits##input, \ 3445 kInput8bitsImm##input_imm); \ 3446 } \ 3447 TEST(mnemonic##_4S_2OPIMM) { \ 3448 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3449 4S, 4H, \ 3450 kInput16bits##input, \ 3451 kInput16bitsImm##input_imm); \ 3452 } \ 3453 TEST(mnemonic##_2D_2OPIMM) { \ 3454 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3455 2D, 2S, \ 3456 kInput32bits##input, \ 3457 kInput32bitsImm##input_imm); \ 3458 } \ 3459 TEST(mnemonic##2_8H_2OPIMM) { \ 3460 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \ 3461 8H, 16B, \ 3462 kInput8bits##input, \ 3463 kInput8bitsImm##input_imm); \ 3464 } \ 3465 TEST(mnemonic##2_4S_2OPIMM) { \ 3466 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \ 3467 4S, 8H, \ 3468 kInput16bits##input, \ 3469 kInput16bitsImm##input_imm); \ 3470 } \ 3471 TEST(mnemonic##2_2D_2OPIMM) { \ 3472 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \ 3473 2D, 4S, \ 3474 kInput32bits##input, \ 3475 kInput32bitsImm##input_imm); \ 3476 } 3477 3478#define CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 3479 vdform, vnform, vmform, \ 3480 input_d, input_n, \ 3481 input_m, indices) { \ 3482 CALL_TEST_NEON_HELPER_ByElement(mnemonic, \ 3483 vdform, vnform, vmform, \ 3484 input_d, input_n, \ 3485 input_m, indices); \ 3486 } 3487 3488#define DEFINE_TEST_NEON_BYELEMENT(mnemonic, input_d, input_n, input_m) \ 3489 TEST(mnemonic##_4H_4H_H) { \ 3490 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 3491 4H, 4H, H, \ 3492 kInput16bits##input_d, \ 3493 kInput16bits##input_n, \ 3494 kInput16bits##input_m, \ 3495 kInputHIndices); \ 3496 } \ 3497 TEST(mnemonic##_8H_8H_H) { \ 3498 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 3499 8H, 8H, H, \ 3500 kInput16bits##input_d, \ 3501 kInput16bits##input_n, \ 3502 kInput16bits##input_m, \ 3503 kInputHIndices); \ 3504 } \ 3505 TEST(mnemonic##_2S_2S_S) { \ 3506 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 3507 2S, 2S, S, \ 3508 kInput32bits##input_d, \ 3509 kInput32bits##input_n, \ 3510 kInput32bits##input_m, \ 3511 kInputSIndices); \ 3512 } \ 3513 TEST(mnemonic##_4S_4S_S) { \ 3514 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 3515 4S, 4S, S, \ 3516 kInput32bits##input_d, \ 3517 kInput32bits##input_n, \ 3518 kInput32bits##input_m, \ 3519 kInputSIndices); \ 3520 } 3521 3522#define DEFINE_TEST_NEON_BYELEMENT_SCALAR(mnemonic, \ 3523 input_d, input_n, input_m) \ 3524 TEST(mnemonic##_H_H_H) { \ 3525 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 3526 H, H, H, \ 3527 kInput16bits##input_d, \ 3528 kInput16bits##input_n, \ 3529 kInput16bits##input_m, \ 3530 kInputHIndices); \ 3531 } \ 3532 TEST(mnemonic##_S_S_S) { \ 3533 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 3534 S, S, S, \ 3535 kInput32bits##input_d, \ 3536 kInput32bits##input_n, \ 3537 kInput32bits##input_m, \ 3538 kInputSIndices); \ 3539 } 3540 3541#define DEFINE_TEST_NEON_FP_BYELEMENT(mnemonic, input_d, input_n, input_m) \ 3542 TEST(mnemonic##_2S_2S_S) { \ 3543 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 3544 2S, 2S, S, \ 3545 kInputFloat##input_d, \ 3546 kInputFloat##input_n, \ 3547 kInputFloat##input_m, \ 3548 kInputSIndices); \ 3549 } \ 3550 TEST(mnemonic##_4S_4S_S) { \ 3551 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 3552 4S, 4S, S, \ 3553 kInputFloat##input_d, \ 3554 kInputFloat##input_n, \ 3555 kInputFloat##input_m, \ 3556 kInputSIndices); \ 3557 } \ 3558 TEST(mnemonic##_2D_2D_D) { \ 3559 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 3560 2D, 2D, D, \ 3561 kInputDouble##input_d, \ 3562 kInputDouble##input_n, \ 3563 kInputDouble##input_m, \ 3564 kInputDIndices); \ 3565 } \ 3566 3567#define DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(mnemonic, inp_d, inp_n, inp_m) \ 3568 TEST(mnemonic##_S_S_S) { \ 3569 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 3570 S, S, S, \ 3571 kInputFloat##inp_d, \ 3572 kInputFloat##inp_n, \ 3573 kInputFloat##inp_m, \ 3574 kInputSIndices); \ 3575 } \ 3576 TEST(mnemonic##_D_D_D) { \ 3577 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 3578 D, D, D, \ 3579 kInputDouble##inp_d, \ 3580 kInputDouble##inp_n, \ 3581 kInputDouble##inp_m, \ 3582 kInputDIndices); \ 3583 } \ 3584 3585 3586#define DEFINE_TEST_NEON_BYELEMENT_DIFF(mnemonic, input_d, input_n, input_m) \ 3587 TEST(mnemonic##_4S_4H_H) { \ 3588 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 3589 4S, 4H, H, \ 3590 kInput32bits##input_d, \ 3591 kInput16bits##input_n, \ 3592 kInput16bits##input_m, \ 3593 kInputHIndices); \ 3594 } \ 3595 TEST(mnemonic##2_4S_8H_H) { \ 3596 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic##2, \ 3597 4S, 8H, H, \ 3598 kInput32bits##input_d, \ 3599 kInput16bits##input_n, \ 3600 kInput16bits##input_m, \ 3601 kInputHIndices); \ 3602 } \ 3603 TEST(mnemonic##_2D_2S_S) { \ 3604 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 3605 2D, 2S, S, \ 3606 kInput64bits##input_d, \ 3607 kInput32bits##input_n, \ 3608 kInput32bits##input_m, \ 3609 kInputSIndices); \ 3610 } \ 3611 TEST(mnemonic##2_2D_4S_S) { \ 3612 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic##2, \ 3613 2D, 4S, S, \ 3614 kInput64bits##input_d, \ 3615 kInput32bits##input_n, \ 3616 kInput32bits##input_m, \ 3617 kInputSIndices); \ 3618 } 3619 3620#define DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(mnemonic, \ 3621 input_d, input_n, input_m) \ 3622 TEST(mnemonic##_S_H_H) { \ 3623 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 3624 S, H, H, \ 3625 kInput32bits##input_d, \ 3626 kInput16bits##input_n, \ 3627 kInput16bits##input_m, \ 3628 kInputHIndices); \ 3629 } \ 3630 TEST(mnemonic##_D_S_S) { \ 3631 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 3632 D, S, S, \ 3633 kInput64bits##input_d, \ 3634 kInput32bits##input_n, \ 3635 kInput32bits##input_m, \ 3636 kInputSIndices); \ 3637 } 3638 3639 3640#define CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, \ 3641 variant, \ 3642 input_d, \ 3643 input_imm1, \ 3644 input_n, \ 3645 input_imm2) { \ 3646 CALL_TEST_NEON_HELPER_OpImmOpImm(&MacroAssembler::mnemonic, \ 3647 mnemonic, \ 3648 variant, variant, \ 3649 input_d, input_imm1, \ 3650 input_n, input_imm2); \ 3651 } 3652 3653#define DEFINE_TEST_NEON_2OP2IMM(mnemonic, \ 3654 input_d, input_imm1, \ 3655 input_n, input_imm2) \ 3656 TEST(mnemonic##_B) { \ 3657 CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, \ 3658 16B, \ 3659 kInput8bits##input_d, \ 3660 kInput8bitsImm##input_imm1, \ 3661 kInput8bits##input_n, \ 3662 kInput8bitsImm##input_imm2); \ 3663 } \ 3664 TEST(mnemonic##_H) { \ 3665 CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, \ 3666 8H, \ 3667 kInput16bits##input_d, \ 3668 kInput16bitsImm##input_imm1, \ 3669 kInput16bits##input_n, \ 3670 kInput16bitsImm##input_imm2); \ 3671 } \ 3672 TEST(mnemonic##_S) { \ 3673 CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, \ 3674 4S, \ 3675 kInput32bits##input_d, \ 3676 kInput32bitsImm##input_imm1, \ 3677 kInput32bits##input_n, \ 3678 kInput32bitsImm##input_imm2); \ 3679 } \ 3680 TEST(mnemonic##_D) { \ 3681 CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, \ 3682 2D, \ 3683 kInput64bits##input_d, \ 3684 kInput64bitsImm##input_imm1, \ 3685 kInput64bits##input_n, \ 3686 kInput64bitsImm##input_imm2); \ 3687 } 3688 3689 3690// Advanced SIMD copy. 3691DEFINE_TEST_NEON_2OP2IMM(ins, 3692 Basic, LaneCountFromZero, 3693 Basic, LaneCountFromZero) 3694DEFINE_TEST_NEON_2OPIMM_COPY(dup, Basic, LaneCountFromZero) 3695 3696 3697// Advanced SIMD scalar copy. 3698DEFINE_TEST_NEON_2OPIMM_SCALAR(dup, Basic, LaneCountFromZero) 3699 3700 3701// Advanced SIMD three same. 3702DEFINE_TEST_NEON_3SAME_NO2D(shadd, Basic) 3703DEFINE_TEST_NEON_3SAME(sqadd, Basic) 3704DEFINE_TEST_NEON_3SAME_NO2D(srhadd, Basic) 3705DEFINE_TEST_NEON_3SAME_NO2D(shsub, Basic) 3706DEFINE_TEST_NEON_3SAME(sqsub, Basic) 3707DEFINE_TEST_NEON_3SAME(cmgt, Basic) 3708DEFINE_TEST_NEON_3SAME(cmge, Basic) 3709DEFINE_TEST_NEON_3SAME(sshl, Basic) 3710DEFINE_TEST_NEON_3SAME(sqshl, Basic) 3711DEFINE_TEST_NEON_3SAME(srshl, Basic) 3712DEFINE_TEST_NEON_3SAME(sqrshl, Basic) 3713DEFINE_TEST_NEON_3SAME_NO2D(smax, Basic) 3714DEFINE_TEST_NEON_3SAME_NO2D(smin, Basic) 3715DEFINE_TEST_NEON_3SAME_NO2D(sabd, Basic) 3716DEFINE_TEST_NEON_3SAME_NO2D(saba, Basic) 3717DEFINE_TEST_NEON_3SAME(add, Basic) 3718DEFINE_TEST_NEON_3SAME(cmtst, Basic) 3719DEFINE_TEST_NEON_3SAME_NO2D(mla, Basic) 3720DEFINE_TEST_NEON_3SAME_NO2D(mul, Basic) 3721DEFINE_TEST_NEON_3SAME_NO2D(smaxp, Basic) 3722DEFINE_TEST_NEON_3SAME_NO2D(sminp, Basic) 3723DEFINE_TEST_NEON_3SAME_HS(sqdmulh, Basic) 3724DEFINE_TEST_NEON_3SAME(addp, Basic) 3725DEFINE_TEST_NEON_3SAME_FP(fmaxnm, Basic) 3726DEFINE_TEST_NEON_3SAME_FP(fmla, Basic) 3727DEFINE_TEST_NEON_3SAME_FP(fadd, Basic) 3728DEFINE_TEST_NEON_3SAME_FP(fmulx, Basic) 3729DEFINE_TEST_NEON_3SAME_FP(fcmeq, Basic) 3730DEFINE_TEST_NEON_3SAME_FP(fmax, Basic) 3731DEFINE_TEST_NEON_3SAME_FP(frecps, Basic) 3732DEFINE_TEST_NEON_3SAME_8B_16B(and_, Basic) 3733DEFINE_TEST_NEON_3SAME_8B_16B(bic, Basic) 3734DEFINE_TEST_NEON_3SAME_FP(fminnm, Basic) 3735DEFINE_TEST_NEON_3SAME_FP(fmls, Basic) 3736DEFINE_TEST_NEON_3SAME_FP(fsub, Basic) 3737DEFINE_TEST_NEON_3SAME_FP(fmin, Basic) 3738DEFINE_TEST_NEON_3SAME_FP(frsqrts, Basic) 3739DEFINE_TEST_NEON_3SAME_8B_16B(orr, Basic) 3740DEFINE_TEST_NEON_3SAME_8B_16B(orn, Basic) 3741DEFINE_TEST_NEON_3SAME_NO2D(uhadd, Basic) 3742DEFINE_TEST_NEON_3SAME(uqadd, Basic) 3743DEFINE_TEST_NEON_3SAME_NO2D(urhadd, Basic) 3744DEFINE_TEST_NEON_3SAME_NO2D(uhsub, Basic) 3745DEFINE_TEST_NEON_3SAME(uqsub, Basic) 3746DEFINE_TEST_NEON_3SAME(cmhi, Basic) 3747DEFINE_TEST_NEON_3SAME(cmhs, Basic) 3748DEFINE_TEST_NEON_3SAME(ushl, Basic) 3749DEFINE_TEST_NEON_3SAME(uqshl, Basic) 3750DEFINE_TEST_NEON_3SAME(urshl, Basic) 3751DEFINE_TEST_NEON_3SAME(uqrshl, Basic) 3752DEFINE_TEST_NEON_3SAME_NO2D(umax, Basic) 3753DEFINE_TEST_NEON_3SAME_NO2D(umin, Basic) 3754DEFINE_TEST_NEON_3SAME_NO2D(uabd, Basic) 3755DEFINE_TEST_NEON_3SAME_NO2D(uaba, Basic) 3756DEFINE_TEST_NEON_3SAME(sub, Basic) 3757DEFINE_TEST_NEON_3SAME(cmeq, Basic) 3758DEFINE_TEST_NEON_3SAME_NO2D(mls, Basic) 3759DEFINE_TEST_NEON_3SAME_8B_16B(pmul, Basic) 3760DEFINE_TEST_NEON_3SAME_NO2D(uminp, Basic) 3761DEFINE_TEST_NEON_3SAME_NO2D(umaxp, Basic) 3762DEFINE_TEST_NEON_3SAME_HS(sqrdmulh, Basic) 3763DEFINE_TEST_NEON_3SAME_FP(fmaxnmp, Basic) 3764DEFINE_TEST_NEON_3SAME_FP(faddp, Basic) 3765DEFINE_TEST_NEON_3SAME_FP(fmul, Basic) 3766DEFINE_TEST_NEON_3SAME_FP(fcmge, Basic) 3767DEFINE_TEST_NEON_3SAME_FP(facge, Basic) 3768DEFINE_TEST_NEON_3SAME_FP(fmaxp, Basic) 3769DEFINE_TEST_NEON_3SAME_FP(fdiv, Basic) 3770DEFINE_TEST_NEON_3SAME_8B_16B(eor, Basic) 3771DEFINE_TEST_NEON_3SAME_8B_16B(bsl, Basic) 3772DEFINE_TEST_NEON_3SAME_FP(fminnmp, Basic) 3773DEFINE_TEST_NEON_3SAME_FP(fabd, Basic) 3774DEFINE_TEST_NEON_3SAME_FP(fcmgt, Basic) 3775DEFINE_TEST_NEON_3SAME_FP(facgt, Basic) 3776DEFINE_TEST_NEON_3SAME_FP(fminp, Basic) 3777DEFINE_TEST_NEON_3SAME_8B_16B(bit, Basic) 3778DEFINE_TEST_NEON_3SAME_8B_16B(bif, Basic) 3779 3780 3781// Advanced SIMD scalar three same. 3782DEFINE_TEST_NEON_3SAME_SCALAR(sqadd, Basic) 3783DEFINE_TEST_NEON_3SAME_SCALAR(sqsub, Basic) 3784DEFINE_TEST_NEON_3SAME_SCALAR_D(cmgt, Basic) 3785DEFINE_TEST_NEON_3SAME_SCALAR_D(cmge, Basic) 3786DEFINE_TEST_NEON_3SAME_SCALAR_D(sshl, Basic) 3787DEFINE_TEST_NEON_3SAME_SCALAR(sqshl, Basic) 3788DEFINE_TEST_NEON_3SAME_SCALAR_D(srshl, Basic) 3789DEFINE_TEST_NEON_3SAME_SCALAR(sqrshl, Basic) 3790DEFINE_TEST_NEON_3SAME_SCALAR_D(add, Basic) 3791DEFINE_TEST_NEON_3SAME_SCALAR_D(cmtst, Basic) 3792DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqdmulh, Basic) 3793DEFINE_TEST_NEON_3SAME_FP_SCALAR(fmulx, Basic) 3794DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmeq, Basic) 3795DEFINE_TEST_NEON_3SAME_FP_SCALAR(frecps, Basic) 3796DEFINE_TEST_NEON_3SAME_FP_SCALAR(frsqrts, Basic) 3797DEFINE_TEST_NEON_3SAME_SCALAR_D(uqadd, Basic) 3798DEFINE_TEST_NEON_3SAME_SCALAR_D(uqsub, Basic) 3799DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhi, Basic) 3800DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhs, Basic) 3801DEFINE_TEST_NEON_3SAME_SCALAR_D(ushl, Basic) 3802DEFINE_TEST_NEON_3SAME_SCALAR(uqshl, Basic) 3803DEFINE_TEST_NEON_3SAME_SCALAR_D(urshl, Basic) 3804DEFINE_TEST_NEON_3SAME_SCALAR(uqrshl, Basic) 3805DEFINE_TEST_NEON_3SAME_SCALAR_D(sub, Basic) 3806DEFINE_TEST_NEON_3SAME_SCALAR_D(cmeq, Basic) 3807DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqrdmulh, Basic) 3808DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmge, Basic) 3809DEFINE_TEST_NEON_3SAME_FP_SCALAR(facge, Basic) 3810DEFINE_TEST_NEON_3SAME_FP_SCALAR(fabd, Basic) 3811DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmgt, Basic) 3812DEFINE_TEST_NEON_3SAME_FP_SCALAR(facgt, Basic) 3813 3814 3815// Advanced SIMD three different. 3816DEFINE_TEST_NEON_3DIFF_LONG(saddl, Basic) 3817DEFINE_TEST_NEON_3DIFF_WIDE(saddw, Basic) 3818DEFINE_TEST_NEON_3DIFF_LONG(ssubl, Basic) 3819DEFINE_TEST_NEON_3DIFF_WIDE(ssubw, Basic) 3820DEFINE_TEST_NEON_3DIFF_NARROW(addhn, Basic) 3821DEFINE_TEST_NEON_3DIFF_LONG(sabal, Basic) 3822DEFINE_TEST_NEON_3DIFF_NARROW(subhn, Basic) 3823DEFINE_TEST_NEON_3DIFF_LONG(sabdl, Basic) 3824DEFINE_TEST_NEON_3DIFF_LONG(smlal, Basic) 3825DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlal, Basic) 3826DEFINE_TEST_NEON_3DIFF_LONG(smlsl, Basic) 3827DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlsl, Basic) 3828DEFINE_TEST_NEON_3DIFF_LONG(smull, Basic) 3829DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmull, Basic) 3830DEFINE_TEST_NEON_3DIFF_LONG_8H(pmull, Basic) 3831DEFINE_TEST_NEON_3DIFF_LONG(uaddl, Basic) 3832DEFINE_TEST_NEON_3DIFF_WIDE(uaddw, Basic) 3833DEFINE_TEST_NEON_3DIFF_LONG(usubl, Basic) 3834DEFINE_TEST_NEON_3DIFF_WIDE(usubw, Basic) 3835DEFINE_TEST_NEON_3DIFF_NARROW(raddhn, Basic) 3836DEFINE_TEST_NEON_3DIFF_LONG(uabal, Basic) 3837DEFINE_TEST_NEON_3DIFF_NARROW(rsubhn, Basic) 3838DEFINE_TEST_NEON_3DIFF_LONG(uabdl, Basic) 3839DEFINE_TEST_NEON_3DIFF_LONG(umlal, Basic) 3840DEFINE_TEST_NEON_3DIFF_LONG(umlsl, Basic) 3841DEFINE_TEST_NEON_3DIFF_LONG(umull, Basic) 3842 3843 3844// Advanced SIMD scalar three different. 3845DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlal, Basic) 3846DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlsl, Basic) 3847DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmull, Basic) 3848 3849 3850// Advanced SIMD scalar pairwise. 3851TEST(addp_SCALAR) { 3852 CALL_TEST_NEON_HELPER_2DIFF(addp, D, 2D, kInput64bitsBasic); 3853} 3854DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxnmp, Basic) 3855DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(faddp, Basic) 3856DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxp, Basic) 3857DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminnmp, Basic) 3858DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminp, Basic) 3859 3860 3861// Advanced SIMD shift by immediate. 3862DEFINE_TEST_NEON_2OPIMM(sshr, Basic, TypeWidth) 3863DEFINE_TEST_NEON_2OPIMM(ssra, Basic, TypeWidth) 3864DEFINE_TEST_NEON_2OPIMM(srshr, Basic, TypeWidth) 3865DEFINE_TEST_NEON_2OPIMM(srsra, Basic, TypeWidth) 3866DEFINE_TEST_NEON_2OPIMM(shl, Basic, TypeWidthFromZero) 3867DEFINE_TEST_NEON_2OPIMM(sqshl, Basic, TypeWidthFromZero) 3868DEFINE_TEST_NEON_2OPIMM_NARROW(shrn, Basic, TypeWidth) 3869DEFINE_TEST_NEON_2OPIMM_NARROW(rshrn, Basic, TypeWidth) 3870DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrn, Basic, TypeWidth) 3871DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrn, Basic, TypeWidth) 3872DEFINE_TEST_NEON_2OPIMM_LONG(sshll, Basic, TypeWidthFromZero) 3873DEFINE_TEST_NEON_2OPIMM_SD(scvtf, FixedPointConversions, \ 3874 TypeWidthFromZeroToWidth) 3875DEFINE_TEST_NEON_2OPIMM_FP(fcvtzs, Conversions, TypeWidthFromZeroToWidth) 3876DEFINE_TEST_NEON_2OPIMM(ushr, Basic, TypeWidth) 3877DEFINE_TEST_NEON_2OPIMM(usra, Basic, TypeWidth) 3878DEFINE_TEST_NEON_2OPIMM(urshr, Basic, TypeWidth) 3879DEFINE_TEST_NEON_2OPIMM(ursra, Basic, TypeWidth) 3880DEFINE_TEST_NEON_2OPIMM(sri, Basic, TypeWidth) 3881DEFINE_TEST_NEON_2OPIMM(sli, Basic, TypeWidthFromZero) 3882DEFINE_TEST_NEON_2OPIMM(sqshlu, Basic, TypeWidthFromZero) 3883DEFINE_TEST_NEON_2OPIMM(uqshl, Basic, TypeWidthFromZero) 3884DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrun, Basic, TypeWidth) 3885DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrun, Basic, TypeWidth) 3886DEFINE_TEST_NEON_2OPIMM_NARROW(uqshrn, Basic, TypeWidth) 3887DEFINE_TEST_NEON_2OPIMM_NARROW(uqrshrn, Basic, TypeWidth) 3888DEFINE_TEST_NEON_2OPIMM_LONG(ushll, Basic, TypeWidthFromZero) 3889DEFINE_TEST_NEON_2OPIMM_SD(ucvtf, FixedPointConversions, \ 3890 TypeWidthFromZeroToWidth) 3891DEFINE_TEST_NEON_2OPIMM_FP(fcvtzu, Conversions, TypeWidthFromZeroToWidth) 3892 3893 3894// Advanced SIMD scalar shift by immediate.. 3895DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sshr, Basic, TypeWidth) 3896DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ssra, Basic, TypeWidth) 3897DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srshr, Basic, TypeWidth) 3898DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srsra, Basic, TypeWidth) 3899DEFINE_TEST_NEON_2OPIMM_SCALAR_D(shl, Basic, TypeWidthFromZero) 3900DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshl, Basic, TypeWidthFromZero) 3901DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrn, Basic, TypeWidth) 3902DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrn, Basic, TypeWidth) 3903DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(scvtf, FixedPointConversions, \ 3904 TypeWidthFromZeroToWidth) 3905DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzs, Conversions, TypeWidthFromZeroToWidth) 3906DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ushr, Basic, TypeWidth) 3907DEFINE_TEST_NEON_2OPIMM_SCALAR_D(usra, Basic, TypeWidth) 3908DEFINE_TEST_NEON_2OPIMM_SCALAR_D(urshr, Basic, TypeWidth) 3909DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ursra, Basic, TypeWidth) 3910DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sri, Basic, TypeWidth) 3911DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sli, Basic, TypeWidthFromZero) 3912DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshlu, Basic, TypeWidthFromZero) 3913DEFINE_TEST_NEON_2OPIMM_SCALAR(uqshl, Basic, TypeWidthFromZero) 3914DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrun, Basic, TypeWidth) 3915DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrun, Basic, TypeWidth) 3916DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqshrn, Basic, TypeWidth) 3917DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqrshrn, Basic, TypeWidth) 3918DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(ucvtf, FixedPointConversions, \ 3919 TypeWidthFromZeroToWidth) 3920DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzu, Conversions, TypeWidthFromZeroToWidth) 3921 3922 3923// Advanced SIMD two-register miscellaneous. 3924DEFINE_TEST_NEON_2SAME_NO2D(rev64, Basic) 3925DEFINE_TEST_NEON_2SAME_8B_16B(rev16, Basic) 3926DEFINE_TEST_NEON_2DIFF_LONG(saddlp, Basic) 3927DEFINE_TEST_NEON_2SAME(suqadd, Basic) 3928DEFINE_TEST_NEON_2SAME_NO2D(cls, Basic) 3929DEFINE_TEST_NEON_2SAME_8B_16B(cnt, Basic) 3930DEFINE_TEST_NEON_2DIFF_LONG(sadalp, Basic) 3931DEFINE_TEST_NEON_2SAME(sqabs, Basic) 3932DEFINE_TEST_NEON_2OPIMM(cmgt, Basic, Zero) 3933DEFINE_TEST_NEON_2OPIMM(cmeq, Basic, Zero) 3934DEFINE_TEST_NEON_2OPIMM(cmlt, Basic, Zero) 3935DEFINE_TEST_NEON_2SAME(abs, Basic) 3936DEFINE_TEST_NEON_2DIFF_NARROW(xtn, Basic) 3937DEFINE_TEST_NEON_2DIFF_NARROW(sqxtn, Basic) 3938DEFINE_TEST_NEON_2DIFF_FP_NARROW(fcvtn, Conversions) 3939DEFINE_TEST_NEON_2DIFF_FP_LONG(fcvtl, Conversions) 3940DEFINE_TEST_NEON_2SAME_FP(frintn, Conversions) 3941DEFINE_TEST_NEON_2SAME_FP(frintm, Conversions) 3942DEFINE_TEST_NEON_2SAME_FP(fcvtns, Conversions) 3943DEFINE_TEST_NEON_2SAME_FP(fcvtms, Conversions) 3944DEFINE_TEST_NEON_2SAME_FP(fcvtas, Conversions) 3945// SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0. 3946DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmgt, Basic, Zero) 3947DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmeq, Basic, Zero) 3948DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmlt, Basic, Zero) 3949DEFINE_TEST_NEON_2SAME_FP(fabs, Basic) 3950DEFINE_TEST_NEON_2SAME_FP(frintp, Conversions) 3951DEFINE_TEST_NEON_2SAME_FP(frintz, Conversions) 3952DEFINE_TEST_NEON_2SAME_FP(fcvtps, Conversions) 3953// FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0. 3954DEFINE_TEST_NEON_2SAME_2S_4S(urecpe, Basic) 3955DEFINE_TEST_NEON_2SAME_FP(frecpe, Basic) 3956DEFINE_TEST_NEON_2SAME_BH(rev32, Basic) 3957DEFINE_TEST_NEON_2DIFF_LONG(uaddlp, Basic) 3958DEFINE_TEST_NEON_2SAME(usqadd, Basic) 3959DEFINE_TEST_NEON_2SAME_NO2D(clz, Basic) 3960DEFINE_TEST_NEON_2DIFF_LONG(uadalp, Basic) 3961DEFINE_TEST_NEON_2SAME(sqneg, Basic) 3962DEFINE_TEST_NEON_2OPIMM(cmge, Basic, Zero) 3963DEFINE_TEST_NEON_2OPIMM(cmle, Basic, Zero) 3964DEFINE_TEST_NEON_2SAME(neg, Basic) 3965DEFINE_TEST_NEON_2DIFF_NARROW(sqxtun, Basic) 3966DEFINE_TEST_NEON_2OPIMM_LONG(shll, Basic, SHLL) 3967DEFINE_TEST_NEON_2DIFF_NARROW(uqxtn, Basic) 3968DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(fcvtxn, Conversions) 3969DEFINE_TEST_NEON_2SAME_FP(frinta, Conversions) 3970DEFINE_TEST_NEON_2SAME_FP(frintx, Conversions) 3971DEFINE_TEST_NEON_2SAME_FP(fcvtnu, Conversions) 3972DEFINE_TEST_NEON_2SAME_FP(fcvtmu, Conversions) 3973DEFINE_TEST_NEON_2SAME_FP(fcvtau, Conversions) 3974// UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0. 3975DEFINE_TEST_NEON_2SAME_8B_16B(not_, Basic) 3976DEFINE_TEST_NEON_2SAME_8B_16B(rbit, Basic) 3977DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmge, Basic, Zero) 3978DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmle, Basic, Zero) 3979DEFINE_TEST_NEON_2SAME_FP(fneg, Basic) 3980DEFINE_TEST_NEON_2SAME_FP(frinti, Conversions) 3981DEFINE_TEST_NEON_2SAME_FP(fcvtpu, Conversions) 3982// FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0. 3983DEFINE_TEST_NEON_2SAME_2S_4S(ursqrte, Basic) 3984DEFINE_TEST_NEON_2SAME_FP(frsqrte, Basic) 3985DEFINE_TEST_NEON_2SAME_FP(fsqrt, Basic) 3986 3987 3988// Advanced SIMD scalar two-register miscellaneous. 3989DEFINE_TEST_NEON_2SAME_SCALAR(suqadd, Basic) 3990DEFINE_TEST_NEON_2SAME_SCALAR(sqabs, Basic) 3991DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmgt, Basic, Zero) 3992DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmeq, Basic, Zero) 3993DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmlt, Basic, Zero) 3994DEFINE_TEST_NEON_2SAME_SCALAR_D(abs, Basic) 3995DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtn, Basic) 3996DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtns, Conversions) 3997DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtms, Conversions) 3998DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtas, Conversions) 3999// SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0. 4000DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmgt, Basic, Zero) 4001DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmeq, Basic, Zero) 4002DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmlt, Basic, Zero) 4003DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtps, Conversions) 4004// FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0. 4005DEFINE_TEST_NEON_2SAME_FP_SCALAR(frecpe, Basic) 4006DEFINE_TEST_NEON_2SAME_FP_SCALAR(frecpx, Basic) 4007DEFINE_TEST_NEON_2SAME_SCALAR(usqadd, Basic) 4008DEFINE_TEST_NEON_2SAME_SCALAR(sqneg, Basic) 4009DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmge, Basic, Zero) 4010DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmle, Basic, Zero) 4011DEFINE_TEST_NEON_2SAME_SCALAR_D(neg, Basic) 4012DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtun, Basic) 4013DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(uqxtn, Basic) 4014TEST(fcvtxn_SCALAR) { 4015 CALL_TEST_NEON_HELPER_2DIFF(fcvtxn, S, D, kInputDoubleConversions); 4016} 4017DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtnu, Conversions) 4018DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtmu, Conversions) 4019DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtau, Conversions) 4020// UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0. 4021DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmge, Basic, Zero) 4022DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmle, Basic, Zero) 4023DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtpu, Conversions) 4024// FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0. 4025DEFINE_TEST_NEON_2SAME_FP_SCALAR(frsqrte, Basic) 4026 4027 4028// Advanced SIMD across lanes. 4029DEFINE_TEST_NEON_ACROSS_LONG(saddlv, Basic) 4030DEFINE_TEST_NEON_ACROSS(smaxv, Basic) 4031DEFINE_TEST_NEON_ACROSS(sminv, Basic) 4032DEFINE_TEST_NEON_ACROSS(addv, Basic) 4033DEFINE_TEST_NEON_ACROSS_LONG(uaddlv, Basic) 4034DEFINE_TEST_NEON_ACROSS(umaxv, Basic) 4035DEFINE_TEST_NEON_ACROSS(uminv, Basic) 4036DEFINE_TEST_NEON_ACROSS_FP(fmaxnmv, Basic) 4037DEFINE_TEST_NEON_ACROSS_FP(fmaxv, Basic) 4038DEFINE_TEST_NEON_ACROSS_FP(fminnmv, Basic) 4039DEFINE_TEST_NEON_ACROSS_FP(fminv, Basic) 4040 4041 4042// Advanced SIMD permute. 4043DEFINE_TEST_NEON_3SAME(uzp1, Basic) 4044DEFINE_TEST_NEON_3SAME(trn1, Basic) 4045DEFINE_TEST_NEON_3SAME(zip1, Basic) 4046DEFINE_TEST_NEON_3SAME(uzp2, Basic) 4047DEFINE_TEST_NEON_3SAME(trn2, Basic) 4048DEFINE_TEST_NEON_3SAME(zip2, Basic) 4049 4050 4051// Advanced SIMD vector x indexed element. 4052DEFINE_TEST_NEON_BYELEMENT_DIFF(smlal, Basic, Basic, Basic) 4053DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlal, Basic, Basic, Basic) 4054DEFINE_TEST_NEON_BYELEMENT_DIFF(smlsl, Basic, Basic, Basic) 4055DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlsl, Basic, Basic, Basic) 4056DEFINE_TEST_NEON_BYELEMENT(mul, Basic, Basic, Basic) 4057DEFINE_TEST_NEON_BYELEMENT_DIFF(smull, Basic, Basic, Basic) 4058DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmull, Basic, Basic, Basic) 4059DEFINE_TEST_NEON_BYELEMENT(sqdmulh, Basic, Basic, Basic) 4060DEFINE_TEST_NEON_BYELEMENT(sqrdmulh, Basic, Basic, Basic) 4061DEFINE_TEST_NEON_FP_BYELEMENT(fmla, Basic, Basic, Basic) 4062DEFINE_TEST_NEON_FP_BYELEMENT(fmls, Basic, Basic, Basic) 4063DEFINE_TEST_NEON_FP_BYELEMENT(fmul, Basic, Basic, Basic) 4064DEFINE_TEST_NEON_BYELEMENT(mla, Basic, Basic, Basic) 4065DEFINE_TEST_NEON_BYELEMENT_DIFF(umlal, Basic, Basic, Basic) 4066DEFINE_TEST_NEON_BYELEMENT(mls, Basic, Basic, Basic) 4067DEFINE_TEST_NEON_BYELEMENT_DIFF(umlsl, Basic, Basic, Basic) 4068DEFINE_TEST_NEON_BYELEMENT_DIFF(umull, Basic, Basic, Basic) 4069DEFINE_TEST_NEON_FP_BYELEMENT(fmulx, Basic, Basic, Basic) 4070 4071 4072// Advanced SIMD scalar x indexed element. 4073DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlal, Basic, Basic, Basic) 4074DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlsl, Basic, Basic, Basic) 4075DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmull, Basic, Basic, Basic) 4076DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqdmulh, Basic, Basic, Basic) 4077DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqrdmulh, Basic, Basic, Basic) 4078DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmla, Basic, Basic, Basic) 4079DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmls, Basic, Basic, Basic) 4080DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmul, Basic, Basic, Basic) 4081DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmulx, Basic, Basic, Basic) 4082 4083} // namespace vixl 4084