1// Copyright 2015, ARM Limited
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are met:
6//
7//   * Redistributions of source code must retain the above copyright notice,
8//     this list of conditions and the following disclaimer.
9//   * Redistributions in binary form must reproduce the above copyright notice,
10//     this list of conditions and the following disclaimer in the documentation
11//     and/or other materials provided with the distribution.
12//   * Neither the name of ARM Limited nor the names of its contributors may be
13//     used to endorse or promote products derived from this software without
14//     specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27#include <stdio.h>
28#include <float.h>
29
30#include "test-runner.h"
31#include "test-utils-a64.h"
32#include "test-simulator-inputs-a64.h"
33#include "test-simulator-traces-a64.h"
34#include "vixl/a64/macro-assembler-a64.h"
35#include "vixl/a64/simulator-a64.h"
36
37namespace vixl {
38
39// ==== Simulator Tests ====
40//
41// These simulator tests check instruction behaviour against a trace taken from
42// real AArch64 hardware. The same test code is used to generate the trace; the
43// results are printed to stdout when the test is run with --sim_test_trace.
44//
45// The input lists and expected results are stored in test/traces. The expected
46// results can be regenerated using tools/generate_simulator_traces.py. Adding
47// a test for a new instruction is described at the top of
48// test-simulator-traces-a64.h.
49
50#define __ masm.
51#define TEST(name)  TEST_(SIM_##name)
52
53#define BUF_SIZE (256)
54
55#ifdef USE_SIMULATOR
56
57#define SETUP()                                                               \
58  MacroAssembler masm(BUF_SIZE);                                              \
59  Decoder decoder;                                                            \
60  Simulator* simulator = Test::run_debugger() ? new Debugger(&decoder)        \
61                                              : new Simulator(&decoder);      \
62  simulator->set_coloured_trace(Test::coloured_trace());                      \
63  simulator->set_instruction_stats(Test::instruction_stats());                \
64
65#define START()                                                               \
66  masm.Reset();                                                               \
67  simulator->ResetState();                                                    \
68  __ PushCalleeSavedRegisters();                                              \
69  if (Test::trace_reg()) {                                                    \
70    __ Trace(LOG_STATE, TRACE_ENABLE);                                        \
71  }                                                                           \
72  if (Test::trace_write()) {                                                  \
73    __ Trace(LOG_WRITE, TRACE_ENABLE);                                        \
74  }                                                                           \
75  if (Test::trace_sim()) {                                                    \
76    __ Trace(LOG_DISASM, TRACE_ENABLE);                                       \
77  }                                                                           \
78  if (Test::instruction_stats()) {                                            \
79    __ EnableInstrumentation();                                               \
80  }
81
82#define END()                                                                 \
83  if (Test::instruction_stats()) {                                            \
84    __ DisableInstrumentation();                                              \
85  }                                                                           \
86  __ Trace(LOG_ALL, TRACE_DISABLE);                                           \
87  __ PopCalleeSavedRegisters();                                               \
88  __ Ret();                                                                   \
89  masm.FinalizeCode()
90
91#define RUN()                                                                 \
92  simulator->RunFrom(masm.GetStartAddress<Instruction*>())
93
94#define TEARDOWN()                                                            \
95  delete simulator;
96
97#else     // USE_SIMULATOR
98
99#define SETUP()                                                               \
100  MacroAssembler masm(BUF_SIZE);                                              \
101  CPU::SetUp()
102
103#define START()                                                               \
104  masm.Reset();                                                               \
105  __ PushCalleeSavedRegisters()
106
107#define END()                                                                 \
108  __ PopCalleeSavedRegisters();                                               \
109  __ Ret();                                                                   \
110  masm.FinalizeCode()
111
112#define RUN()                                                                  \
113  {                                                                            \
114    byte* buffer_start = masm.GetStartAddress<byte*>();                        \
115    size_t buffer_length = masm.CursorOffset();                                \
116    void (*test_function)(void);                                               \
117                                                                               \
118    CPU::EnsureIAndDCacheCoherency(buffer_start, buffer_length);               \
119    VIXL_STATIC_ASSERT(sizeof(buffer_start) == sizeof(test_function));         \
120    memcpy(&test_function, &buffer_start, sizeof(buffer_start));               \
121    test_function();                                                           \
122  }
123
124#define TEARDOWN()
125
126#endif    // USE_SIMULATOR
127
128
129// The maximum number of errors to report in detail for each test.
130static const unsigned kErrorReportLimit = 8;
131
132
133// Overloaded versions of rawbits_to_double and rawbits_to_float for use in the
134// templated test functions.
135static float rawbits_to_fp(uint32_t bits) {
136  return rawbits_to_float(bits);
137}
138
139static double rawbits_to_fp(uint64_t bits) {
140  return rawbits_to_double(bits);
141}
142
143
144// MacroAssembler member function pointers to pass to the test dispatchers.
145typedef void (MacroAssembler::*Test1OpFPHelper_t)(const FPRegister& fd,
146                                                  const FPRegister& fn);
147typedef void (MacroAssembler::*Test2OpFPHelper_t)(const FPRegister& fd,
148                                                  const FPRegister& fn,
149                                                  const FPRegister& fm);
150typedef void (MacroAssembler::*Test3OpFPHelper_t)(const FPRegister& fd,
151                                                  const FPRegister& fn,
152                                                  const FPRegister& fm,
153                                                  const FPRegister& fa);
154typedef void (MacroAssembler::*TestFPCmpHelper_t)(const FPRegister& fn,
155                                                  const FPRegister& fm);
156typedef void (MacroAssembler::*TestFPCmpZeroHelper_t)(const FPRegister& fn,
157                                                      double value);
158typedef void (MacroAssembler::*TestFPToIntHelper_t)(const Register& rd,
159                                                    const FPRegister& fn);
160typedef void (MacroAssembler::*TestFPToFixedHelper_t)(const Register& rd,
161                                                      const FPRegister& fn,
162                                                      int fbits);
163typedef void (MacroAssembler::*TestFixedToFPHelper_t)(const FPRegister& fd,
164                                                      const Register& rn,
165                                                      int fbits);
166// TODO: 'Test2OpNEONHelper_t' and 'Test2OpFPHelper_t' can be
167//       consolidated into one routine.
168typedef void (MacroAssembler::*Test1OpNEONHelper_t)(
169  const VRegister& vd, const VRegister& vn);
170typedef void (MacroAssembler::*Test2OpNEONHelper_t)(
171  const VRegister& vd, const VRegister& vn, const VRegister& vm);
172typedef void (MacroAssembler::*TestByElementNEONHelper_t)(
173  const VRegister& vd, const VRegister& vn, const VRegister& vm, int vm_index);
174typedef void (MacroAssembler::*TestOpImmOpImmVdUpdateNEONHelper_t)(
175  const VRegister& vd, int imm1, const VRegister& vn, int imm2);
176
177// This helps using the same typename for both the function pointer
178// and the array of immediates passed to helper routines.
179template <typename T>
180class Test2OpImmediateNEONHelper_t {
181 public:
182    typedef void (MacroAssembler::*mnemonic)(
183      const VRegister& vd, const VRegister& vn, T imm);
184};
185
186
187// Standard test dispatchers.
188
189
190static void Test1Op_Helper(Test1OpFPHelper_t helper, uintptr_t inputs,
191                           unsigned inputs_length, uintptr_t results,
192                           unsigned d_size, unsigned n_size) {
193  VIXL_ASSERT((d_size == kDRegSize) || (d_size == kSRegSize));
194  VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize));
195
196  SETUP();
197  START();
198
199  // Roll up the loop to keep the code size down.
200  Label loop_n;
201
202  Register out = x0;
203  Register inputs_base = x1;
204  Register length = w2;
205  Register index_n = w3;
206
207  const int n_index_shift =
208      (n_size == kDRegSize) ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
209
210  FPRegister fd = (d_size == kDRegSize) ? d0 : s0;
211  FPRegister fn = (n_size == kDRegSize) ? d1 : s1;
212
213  __ Mov(out, results);
214  __ Mov(inputs_base, inputs);
215  __ Mov(length, inputs_length);
216
217  __ Mov(index_n, 0);
218  __ Bind(&loop_n);
219  __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
220
221  {
222    SingleEmissionCheckScope guard(&masm);
223    (masm.*helper)(fd, fn);
224  }
225  __ Str(fd, MemOperand(out, fd.SizeInBytes(), PostIndex));
226
227  __ Add(index_n, index_n, 1);
228  __ Cmp(index_n, inputs_length);
229  __ B(lo, &loop_n);
230
231  END();
232  RUN();
233  TEARDOWN();
234}
235
236
237// Test FP instructions. The inputs[] and expected[] arrays should be arrays of
238// rawbits representations of doubles or floats. This ensures that exact bit
239// comparisons can be performed.
240template <typename Tn, typename Td>
241static void Test1Op(const char * name, Test1OpFPHelper_t helper,
242                    const Tn inputs[], unsigned inputs_length,
243                    const Td expected[], unsigned expected_length) {
244  VIXL_ASSERT(inputs_length > 0);
245
246  const unsigned results_length = inputs_length;
247  Td * results = new Td[results_length];
248
249  const unsigned d_bits = sizeof(Td) * 8;
250  const unsigned n_bits = sizeof(Tn) * 8;
251
252  Test1Op_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length,
253                 reinterpret_cast<uintptr_t>(results), d_bits, n_bits);
254
255  if (Test::sim_test_trace()) {
256    // Print the results.
257    printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name);
258    for (unsigned d = 0; d < results_length; d++) {
259      printf("  0x%0*" PRIx64 ",\n",
260             d_bits / 4, static_cast<uint64_t>(results[d]));
261    }
262    printf("};\n");
263    printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
264  } else {
265    // Check the results.
266    VIXL_CHECK(expected_length == results_length);
267    unsigned error_count = 0;
268    unsigned d = 0;
269    for (unsigned n = 0; n < inputs_length; n++, d++) {
270      if (results[d] != expected[d]) {
271        if (++error_count > kErrorReportLimit) continue;
272
273        printf("%s 0x%0*" PRIx64 " (%s %g):\n",
274               name, n_bits / 4, static_cast<uint64_t>(inputs[n]),
275               name, rawbits_to_fp(inputs[n]));
276        printf("  Expected: 0x%0*" PRIx64 " (%g)\n",
277               d_bits / 4, static_cast<uint64_t>(expected[d]),
278               rawbits_to_fp(expected[d]));
279        printf("  Found:    0x%0*" PRIx64 " (%g)\n",
280               d_bits / 4, static_cast<uint64_t>(results[d]),
281               rawbits_to_fp(results[d]));
282        printf("\n");
283      }
284    }
285    VIXL_ASSERT(d == expected_length);
286    if (error_count > kErrorReportLimit) {
287      printf("%u other errors follow.\n", error_count - kErrorReportLimit);
288    }
289    VIXL_CHECK(error_count == 0);
290  }
291  delete[] results;
292}
293
294
295static void Test2Op_Helper(Test2OpFPHelper_t helper,
296                           uintptr_t inputs, unsigned inputs_length,
297                           uintptr_t results, unsigned reg_size) {
298  VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
299
300  SETUP();
301  START();
302
303  // Roll up the loop to keep the code size down.
304  Label loop_n, loop_m;
305
306  Register out = x0;
307  Register inputs_base = x1;
308  Register length = w2;
309  Register index_n = w3;
310  Register index_m = w4;
311
312  bool double_op = reg_size == kDRegSize;
313  const int index_shift =
314      double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
315
316  FPRegister fd = double_op ? d0 : s0;
317  FPRegister fn = double_op ? d1 : s1;
318  FPRegister fm = double_op ? d2 : s2;
319
320  __ Mov(out, results);
321  __ Mov(inputs_base, inputs);
322  __ Mov(length, inputs_length);
323
324  __ Mov(index_n, 0);
325  __ Bind(&loop_n);
326  __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
327
328  __ Mov(index_m, 0);
329  __ Bind(&loop_m);
330  __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
331
332  {
333    SingleEmissionCheckScope guard(&masm);
334    (masm.*helper)(fd, fn, fm);
335  }
336    __ Str(fd, MemOperand(out, fd.SizeInBytes(), PostIndex));
337
338  __ Add(index_m, index_m, 1);
339  __ Cmp(index_m, inputs_length);
340  __ B(lo, &loop_m);
341
342  __ Add(index_n, index_n, 1);
343  __ Cmp(index_n, inputs_length);
344  __ B(lo, &loop_n);
345
346  END();
347  RUN();
348  TEARDOWN();
349}
350
351
352// Test FP instructions. The inputs[] and expected[] arrays should be arrays of
353// rawbits representations of doubles or floats. This ensures that exact bit
354// comparisons can be performed.
355template <typename T>
356static void Test2Op(const char * name, Test2OpFPHelper_t helper,
357                    const T inputs[], unsigned inputs_length,
358                    const T expected[], unsigned expected_length) {
359  VIXL_ASSERT(inputs_length > 0);
360
361  const unsigned results_length = inputs_length * inputs_length;
362  T * results = new T[results_length];
363
364  const unsigned bits = sizeof(T) * 8;
365
366  Test2Op_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length,
367                 reinterpret_cast<uintptr_t>(results), bits);
368
369  if (Test::sim_test_trace()) {
370    // Print the results.
371    printf("const uint%u_t kExpected_%s[] = {\n", bits, name);
372    for (unsigned d = 0; d < results_length; d++) {
373      printf("  0x%0*" PRIx64 ",\n",
374             bits / 4, static_cast<uint64_t>(results[d]));
375    }
376    printf("};\n");
377    printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
378  } else {
379    // Check the results.
380    VIXL_CHECK(expected_length == results_length);
381    unsigned error_count = 0;
382    unsigned d = 0;
383    for (unsigned n = 0; n < inputs_length; n++) {
384      for (unsigned m = 0; m < inputs_length; m++, d++) {
385        if (results[d] != expected[d]) {
386          if (++error_count > kErrorReportLimit) continue;
387
388          printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 " (%s %g %g):\n",
389                 name,
390                 bits / 4, static_cast<uint64_t>(inputs[n]),
391                 bits / 4, static_cast<uint64_t>(inputs[m]),
392                 name,
393                 rawbits_to_fp(inputs[n]),
394                 rawbits_to_fp(inputs[m]));
395          printf("  Expected: 0x%0*" PRIx64 " (%g)\n",
396                 bits / 4, static_cast<uint64_t>(expected[d]),
397                 rawbits_to_fp(expected[d]));
398          printf("  Found:    0x%0*" PRIx64 " (%g)\n",
399                 bits / 4, static_cast<uint64_t>(results[d]),
400                 rawbits_to_fp(results[d]));
401          printf("\n");
402        }
403      }
404    }
405    VIXL_ASSERT(d == expected_length);
406    if (error_count > kErrorReportLimit) {
407      printf("%u other errors follow.\n", error_count - kErrorReportLimit);
408    }
409    VIXL_CHECK(error_count == 0);
410  }
411  delete[] results;
412}
413
414
415static void Test3Op_Helper(Test3OpFPHelper_t helper,
416                           uintptr_t inputs, unsigned inputs_length,
417                           uintptr_t results, unsigned reg_size) {
418  VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
419
420  SETUP();
421  START();
422
423  // Roll up the loop to keep the code size down.
424  Label loop_n, loop_m, loop_a;
425
426  Register out = x0;
427  Register inputs_base = x1;
428  Register length = w2;
429  Register index_n = w3;
430  Register index_m = w4;
431  Register index_a = w5;
432
433  bool double_op = reg_size == kDRegSize;
434  const int index_shift =
435      double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
436
437  FPRegister fd = double_op ? d0 : s0;
438  FPRegister fn = double_op ? d1 : s1;
439  FPRegister fm = double_op ? d2 : s2;
440  FPRegister fa = double_op ? d3 : s3;
441
442  __ Mov(out, results);
443  __ Mov(inputs_base, inputs);
444  __ Mov(length, inputs_length);
445
446  __ Mov(index_n, 0);
447  __ Bind(&loop_n);
448  __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
449
450  __ Mov(index_m, 0);
451  __ Bind(&loop_m);
452  __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
453
454  __ Mov(index_a, 0);
455  __ Bind(&loop_a);
456  __ Ldr(fa, MemOperand(inputs_base, index_a, UXTW, index_shift));
457
458  {
459    SingleEmissionCheckScope guard(&masm);
460    (masm.*helper)(fd, fn, fm, fa);
461  }
462  __ Str(fd, MemOperand(out, fd.SizeInBytes(), PostIndex));
463
464  __ Add(index_a, index_a, 1);
465  __ Cmp(index_a, inputs_length);
466  __ B(lo, &loop_a);
467
468  __ Add(index_m, index_m, 1);
469  __ Cmp(index_m, inputs_length);
470  __ B(lo, &loop_m);
471
472  __ Add(index_n, index_n, 1);
473  __ Cmp(index_n, inputs_length);
474  __ B(lo, &loop_n);
475
476  END();
477  RUN();
478  TEARDOWN();
479}
480
481
482// Test FP instructions. The inputs[] and expected[] arrays should be arrays of
483// rawbits representations of doubles or floats. This ensures that exact bit
484// comparisons can be performed.
485template <typename T>
486static void Test3Op(const char * name, Test3OpFPHelper_t helper,
487                    const T inputs[], unsigned inputs_length,
488                    const T expected[], unsigned expected_length) {
489  VIXL_ASSERT(inputs_length > 0);
490
491  const unsigned results_length = inputs_length * inputs_length * inputs_length;
492  T * results = new T[results_length];
493
494  const unsigned bits = sizeof(T) * 8;
495
496  Test3Op_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length,
497                 reinterpret_cast<uintptr_t>(results), bits);
498
499  if (Test::sim_test_trace()) {
500    // Print the results.
501    printf("const uint%u_t kExpected_%s[] = {\n", bits, name);
502    for (unsigned d = 0; d < results_length; d++) {
503      printf("  0x%0*" PRIx64 ",\n",
504             bits / 4, static_cast<uint64_t>(results[d]));
505    }
506    printf("};\n");
507    printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
508  } else {
509    // Check the results.
510    VIXL_CHECK(expected_length == results_length);
511    unsigned error_count = 0;
512    unsigned d = 0;
513    for (unsigned n = 0; n < inputs_length; n++) {
514      for (unsigned m = 0; m < inputs_length; m++) {
515        for (unsigned a = 0; a < inputs_length; a++, d++) {
516          if (results[d] != expected[d]) {
517            if (++error_count > kErrorReportLimit) continue;
518
519            printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 ", 0x%0*" PRIx64
520                   " (%s %g %g %g):\n",
521                   name,
522                   bits / 4, static_cast<uint64_t>(inputs[n]),
523                   bits / 4, static_cast<uint64_t>(inputs[m]),
524                   bits / 4, static_cast<uint64_t>(inputs[a]),
525                   name,
526                   rawbits_to_fp(inputs[n]),
527                   rawbits_to_fp(inputs[m]),
528                   rawbits_to_fp(inputs[a]));
529            printf("  Expected: 0x%0*" PRIx64 " (%g)\n",
530                   bits / 4, static_cast<uint64_t>(expected[d]),
531                   rawbits_to_fp(expected[d]));
532            printf("  Found:    0x%0*" PRIx64 " (%g)\n",
533                   bits / 4, static_cast<uint64_t>(results[d]),
534                   rawbits_to_fp(results[d]));
535            printf("\n");
536          }
537        }
538      }
539    }
540    VIXL_ASSERT(d == expected_length);
541    if (error_count > kErrorReportLimit) {
542      printf("%u other errors follow.\n", error_count - kErrorReportLimit);
543    }
544    VIXL_CHECK(error_count == 0);
545  }
546  delete[] results;
547}
548
549
550static void TestCmp_Helper(TestFPCmpHelper_t helper,
551                           uintptr_t inputs, unsigned inputs_length,
552                           uintptr_t results, unsigned reg_size) {
553  VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
554
555  SETUP();
556  START();
557
558  // Roll up the loop to keep the code size down.
559  Label loop_n, loop_m;
560
561  Register out = x0;
562  Register inputs_base = x1;
563  Register length = w2;
564  Register index_n = w3;
565  Register index_m = w4;
566  Register flags = x5;
567
568  bool double_op = reg_size == kDRegSize;
569  const int index_shift =
570      double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
571
572  FPRegister fn = double_op ? d1 : s1;
573  FPRegister fm = double_op ? d2 : s2;
574
575  __ Mov(out, results);
576  __ Mov(inputs_base, inputs);
577  __ Mov(length, inputs_length);
578
579  __ Mov(index_n, 0);
580  __ Bind(&loop_n);
581  __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
582
583  __ Mov(index_m, 0);
584  __ Bind(&loop_m);
585  __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
586
587  {
588    SingleEmissionCheckScope guard(&masm);
589    (masm.*helper)(fn, fm);
590  }
591  __ Mrs(flags, NZCV);
592  __ Ubfx(flags, flags, 28, 4);
593  __ Strb(flags, MemOperand(out, 1, PostIndex));
594
595  __ Add(index_m, index_m, 1);
596  __ Cmp(index_m, inputs_length);
597  __ B(lo, &loop_m);
598
599  __ Add(index_n, index_n, 1);
600  __ Cmp(index_n, inputs_length);
601  __ B(lo, &loop_n);
602
603  END();
604  RUN();
605  TEARDOWN();
606}
607
608
609// Test FP instructions. The inputs[] and expected[] arrays should be arrays of
610// rawbits representations of doubles or floats. This ensures that exact bit
611// comparisons can be performed.
612template <typename T>
613static void TestCmp(const char * name, TestFPCmpHelper_t helper,
614                    const T inputs[], unsigned inputs_length,
615                    const uint8_t expected[], unsigned expected_length) {
616  VIXL_ASSERT(inputs_length > 0);
617
618  const unsigned results_length = inputs_length * inputs_length;
619  uint8_t * results = new uint8_t[results_length];
620
621  const unsigned bits = sizeof(T) * 8;
622
623  TestCmp_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length,
624                 reinterpret_cast<uintptr_t>(results), bits);
625
626  if (Test::sim_test_trace()) {
627    // Print the results.
628    printf("const uint8_t kExpected_%s[] = {\n", name);
629    for (unsigned d = 0; d < results_length; d++) {
630      // Each NZCV result only requires 4 bits.
631      VIXL_ASSERT((results[d] & 0xf) == results[d]);
632      printf("  0x%" PRIx8 ",\n", results[d]);
633    }
634    printf("};\n");
635    printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
636  } else {
637    // Check the results.
638    VIXL_CHECK(expected_length == results_length);
639    unsigned error_count = 0;
640    unsigned d = 0;
641    for (unsigned n = 0; n < inputs_length; n++) {
642      for (unsigned m = 0; m < inputs_length; m++, d++) {
643        if (results[d] != expected[d]) {
644          if (++error_count > kErrorReportLimit) continue;
645
646          printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 " (%s %g %g):\n",
647                 name,
648                 bits / 4, static_cast<uint64_t>(inputs[n]),
649                 bits / 4, static_cast<uint64_t>(inputs[m]),
650                 name,
651                 rawbits_to_fp(inputs[n]),
652                 rawbits_to_fp(inputs[m]));
653          printf("  Expected: %c%c%c%c (0x%" PRIx8 ")\n",
654                 (expected[d] & 0x8) ? 'N' : 'n',
655                 (expected[d] & 0x4) ? 'Z' : 'z',
656                 (expected[d] & 0x2) ? 'C' : 'c',
657                 (expected[d] & 0x1) ? 'V' : 'v',
658                 expected[d]);
659          printf("  Found:    %c%c%c%c (0x%" PRIx8 ")\n",
660                 (results[d] & 0x8) ? 'N' : 'n',
661                 (results[d] & 0x4) ? 'Z' : 'z',
662                 (results[d] & 0x2) ? 'C' : 'c',
663                 (results[d] & 0x1) ? 'V' : 'v',
664                 results[d]);
665          printf("\n");
666        }
667      }
668    }
669    VIXL_ASSERT(d == expected_length);
670    if (error_count > kErrorReportLimit) {
671      printf("%u other errors follow.\n", error_count - kErrorReportLimit);
672    }
673    VIXL_CHECK(error_count == 0);
674  }
675  delete[] results;
676}
677
678
679static void TestCmpZero_Helper(TestFPCmpZeroHelper_t helper,
680                               uintptr_t inputs, unsigned inputs_length,
681                               uintptr_t results, unsigned reg_size) {
682  VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
683
684  SETUP();
685  START();
686
687  // Roll up the loop to keep the code size down.
688  Label loop_n, loop_m;
689
690  Register out = x0;
691  Register inputs_base = x1;
692  Register length = w2;
693  Register index_n = w3;
694  Register flags = x4;
695
696  bool double_op = reg_size == kDRegSize;
697  const int index_shift =
698      double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
699
700  FPRegister fn = double_op ? d1 : s1;
701
702  __ Mov(out, results);
703  __ Mov(inputs_base, inputs);
704  __ Mov(length, inputs_length);
705
706  __ Mov(index_n, 0);
707  __ Bind(&loop_n);
708  __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
709
710  {
711    SingleEmissionCheckScope guard(&masm);
712    (masm.*helper)(fn, 0.0);
713  }
714  __ Mrs(flags, NZCV);
715  __ Ubfx(flags, flags, 28, 4);
716  __ Strb(flags, MemOperand(out, 1, PostIndex));
717
718  __ Add(index_n, index_n, 1);
719  __ Cmp(index_n, inputs_length);
720  __ B(lo, &loop_n);
721
722  END();
723  RUN();
724  TEARDOWN();
725}
726
727
728// Test FP instructions. The inputs[] and expected[] arrays should be arrays of
729// rawbits representations of doubles or floats. This ensures that exact bit
730// comparisons can be performed.
731template <typename T>
732static void TestCmpZero(const char * name, TestFPCmpZeroHelper_t helper,
733                        const T inputs[], unsigned inputs_length,
734                        const uint8_t expected[], unsigned expected_length) {
735  VIXL_ASSERT(inputs_length > 0);
736
737  const unsigned results_length = inputs_length;
738  uint8_t * results = new uint8_t[results_length];
739
740  const unsigned bits = sizeof(T) * 8;
741
742  TestCmpZero_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length,
743                     reinterpret_cast<uintptr_t>(results), bits);
744
745  if (Test::sim_test_trace()) {
746    // Print the results.
747    printf("const uint8_t kExpected_%s[] = {\n", name);
748    for (unsigned d = 0; d < results_length; d++) {
749      // Each NZCV result only requires 4 bits.
750      VIXL_ASSERT((results[d] & 0xf) == results[d]);
751      printf("  0x%" PRIx8 ",\n", results[d]);
752    }
753    printf("};\n");
754    printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
755  } else {
756    // Check the results.
757    VIXL_CHECK(expected_length == results_length);
758    unsigned error_count = 0;
759    unsigned d = 0;
760    for (unsigned n = 0; n < inputs_length; n++, d++) {
761      if (results[d] != expected[d]) {
762        if (++error_count > kErrorReportLimit) continue;
763
764        printf("%s 0x%0*" PRIx64 ", 0x%0*u (%s %g #0.0):\n",
765               name,
766               bits / 4, static_cast<uint64_t>(inputs[n]),
767               bits / 4, 0,
768               name,
769               rawbits_to_fp(inputs[n]));
770        printf("  Expected: %c%c%c%c (0x%" PRIx8 ")\n",
771               (expected[d] & 0x8) ? 'N' : 'n',
772               (expected[d] & 0x4) ? 'Z' : 'z',
773               (expected[d] & 0x2) ? 'C' : 'c',
774               (expected[d] & 0x1) ? 'V' : 'v',
775               expected[d]);
776        printf("  Found:    %c%c%c%c (0x%" PRIx8 ")\n",
777               (results[d] & 0x8) ? 'N' : 'n',
778               (results[d] & 0x4) ? 'Z' : 'z',
779               (results[d] & 0x2) ? 'C' : 'c',
780               (results[d] & 0x1) ? 'V' : 'v',
781               results[d]);
782        printf("\n");
783      }
784    }
785    VIXL_ASSERT(d == expected_length);
786    if (error_count > kErrorReportLimit) {
787      printf("%u other errors follow.\n", error_count - kErrorReportLimit);
788    }
789    VIXL_CHECK(error_count == 0);
790  }
791  delete[] results;
792}
793
794
795static void TestFPToFixed_Helper(TestFPToFixedHelper_t helper,
796                                 uintptr_t inputs, unsigned inputs_length,
797                                 uintptr_t results,
798                                 unsigned d_size, unsigned n_size) {
799  VIXL_ASSERT((d_size == kXRegSize) || (d_size == kWRegSize));
800  VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize));
801
802  SETUP();
803  START();
804
805  // Roll up the loop to keep the code size down.
806  Label loop_n;
807
808  Register out = x0;
809  Register inputs_base = x1;
810  Register length = w2;
811  Register index_n = w3;
812
813  const int n_index_shift =
814      (n_size == kDRegSize) ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
815
816  Register rd = (d_size == kXRegSize) ? x10 : w10;
817  FPRegister fn = (n_size == kDRegSize) ? d1 : s1;
818
819  __ Mov(out, results);
820  __ Mov(inputs_base, inputs);
821  __ Mov(length, inputs_length);
822
823  __ Mov(index_n, 0);
824  __ Bind(&loop_n);
825  __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
826
827  for (unsigned fbits = 0; fbits <= d_size; ++fbits) {
828    {
829      SingleEmissionCheckScope guard(&masm);
830      (masm.*helper)(rd, fn, fbits);
831    }
832    __ Str(rd, MemOperand(out, rd.SizeInBytes(), PostIndex));
833  }
834
835  __ Add(index_n, index_n, 1);
836  __ Cmp(index_n, inputs_length);
837  __ B(lo, &loop_n);
838
839  END();
840  RUN();
841  TEARDOWN();
842}
843
844
845static void TestFPToInt_Helper(TestFPToIntHelper_t helper, uintptr_t inputs,
846                               unsigned inputs_length, uintptr_t results,
847                               unsigned d_size, unsigned n_size) {
848  VIXL_ASSERT((d_size == kXRegSize) || (d_size == kWRegSize));
849  VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize));
850
851  SETUP();
852  START();
853
854  // Roll up the loop to keep the code size down.
855  Label loop_n;
856
857  Register out = x0;
858  Register inputs_base = x1;
859  Register length = w2;
860  Register index_n = w3;
861
862  const int n_index_shift =
863      (n_size == kDRegSize) ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
864
865  Register rd = (d_size == kXRegSize) ? x10 : w10;
866  FPRegister fn = (n_size == kDRegSize) ? d1 : s1;
867
868  __ Mov(out, results);
869  __ Mov(inputs_base, inputs);
870  __ Mov(length, inputs_length);
871
872  __ Mov(index_n, 0);
873  __ Bind(&loop_n);
874  __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
875
876  {
877    SingleEmissionCheckScope guard(&masm);
878    (masm.*helper)(rd, fn);
879  }
880  __ Str(rd, MemOperand(out, rd.SizeInBytes(), PostIndex));
881
882  __ Add(index_n, index_n, 1);
883  __ Cmp(index_n, inputs_length);
884  __ B(lo, &loop_n);
885
886  END();
887  RUN();
888  TEARDOWN();
889}
890
891
892// Test FP instructions.
893//  - The inputs[] array should be an array of rawbits representations of
894//    doubles or floats. This ensures that exact bit comparisons can be
895//    performed.
896//  - The expected[] array should be an array of signed integers.
897template <typename Tn, typename Td>
898static void TestFPToS(const char * name, TestFPToIntHelper_t helper,
899                      const Tn inputs[], unsigned inputs_length,
900                      const Td expected[], unsigned expected_length) {
901  VIXL_ASSERT(inputs_length > 0);
902
903  const unsigned results_length = inputs_length;
904  Td * results = new Td[results_length];
905
906  const unsigned d_bits = sizeof(Td) * 8;
907  const unsigned n_bits = sizeof(Tn) * 8;
908
909  TestFPToInt_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length,
910                     reinterpret_cast<uintptr_t>(results), d_bits, n_bits);
911
912  if (Test::sim_test_trace()) {
913    // Print the results.
914    printf("const int%u_t kExpected_%s[] = {\n", d_bits, name);
915    // There is no simple C++ literal for INT*_MIN that doesn't produce
916    // warnings, so we use an appropriate constant in that case instead.
917    // Deriving int_d_min in this way (rather than just checking INT64_MIN and
918    // the like) avoids warnings about comparing values with differing ranges.
919    const int64_t int_d_max = (UINT64_C(1) << (d_bits - 1)) - 1;
920    const int64_t int_d_min = -(int_d_max) - 1;
921    for (unsigned d = 0; d < results_length; d++) {
922      if (results[d] == int_d_min) {
923        printf("  -INT%u_C(%" PRId64 ") - 1,\n", d_bits, int_d_max);
924      } else {
925        // Some constants (such as those between INT32_MAX and UINT32_MAX)
926        // trigger compiler warnings. To avoid these warnings, use an
927        // appropriate macro to make the type explicit.
928        int64_t result_int64 = static_cast<int64_t>(results[d]);
929        if (result_int64 >= 0) {
930          printf("  INT%u_C(%" PRId64 "),\n", d_bits, result_int64);
931        } else {
932          printf("  -INT%u_C(%" PRId64 "),\n", d_bits, -result_int64);
933        }
934      }
935    }
936    printf("};\n");
937    printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
938  } else {
939    // Check the results.
940    VIXL_CHECK(expected_length == results_length);
941    unsigned error_count = 0;
942    unsigned d = 0;
943    for (unsigned n = 0; n < inputs_length; n++, d++) {
944      if (results[d] != expected[d]) {
945        if (++error_count > kErrorReportLimit) continue;
946
947        printf("%s 0x%0*" PRIx64 " (%s %g):\n",
948               name, n_bits / 4, static_cast<uint64_t>(inputs[n]),
949               name, rawbits_to_fp(inputs[n]));
950        printf("  Expected: 0x%0*" PRIx64 " (%" PRId64 ")\n",
951               d_bits / 4, static_cast<uint64_t>(expected[d]),
952               static_cast<int64_t>(expected[d]));
953        printf("  Found:    0x%0*" PRIx64 " (%" PRId64 ")\n",
954               d_bits / 4, static_cast<uint64_t>(results[d]),
955               static_cast<int64_t>(results[d]));
956        printf("\n");
957      }
958    }
959    VIXL_ASSERT(d == expected_length);
960    if (error_count > kErrorReportLimit) {
961      printf("%u other errors follow.\n", error_count - kErrorReportLimit);
962    }
963    VIXL_CHECK(error_count == 0);
964  }
965  delete[] results;
966}
967
968
969// Test FP instructions.
970//  - The inputs[] array should be an array of rawbits representations of
971//    doubles or floats. This ensures that exact bit comparisons can be
972//    performed.
973//  - The expected[] array should be an array of unsigned integers.
974template <typename Tn, typename Td>
975static void TestFPToU(const char * name, TestFPToIntHelper_t helper,
976                      const Tn inputs[], unsigned inputs_length,
977                      const Td expected[], unsigned expected_length) {
978  VIXL_ASSERT(inputs_length > 0);
979
980  const unsigned results_length = inputs_length;
981  Td * results = new Td[results_length];
982
983  const unsigned d_bits = sizeof(Td) * 8;
984  const unsigned n_bits = sizeof(Tn) * 8;
985
986  TestFPToInt_Helper(helper,
987                     reinterpret_cast<uintptr_t>(inputs), inputs_length,
988                     reinterpret_cast<uintptr_t>(results), d_bits, n_bits);
989
990  if (Test::sim_test_trace()) {
991    // Print the results.
992    printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name);
993    for (unsigned d = 0; d < results_length; d++) {
994      printf("  %" PRIu64 "u,\n", static_cast<uint64_t>(results[d]));
995    }
996    printf("};\n");
997    printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
998  } else {
999    // Check the results.
1000    VIXL_CHECK(expected_length == results_length);
1001    unsigned error_count = 0;
1002    unsigned d = 0;
1003    for (unsigned n = 0; n < inputs_length; n++, d++) {
1004      if (results[d] != expected[d]) {
1005        if (++error_count > kErrorReportLimit) continue;
1006
1007        printf("%s 0x%0*" PRIx64 " (%s %g):\n",
1008               name, n_bits / 4, static_cast<uint64_t>(inputs[n]),
1009               name, rawbits_to_fp(inputs[n]));
1010        printf("  Expected: 0x%0*" PRIx64 " (%" PRIu64 ")\n",
1011               d_bits / 4, static_cast<uint64_t>(expected[d]),
1012               static_cast<uint64_t>(expected[d]));
1013        printf("  Found:    0x%0*" PRIx64 " (%" PRIu64 ")\n",
1014               d_bits / 4, static_cast<uint64_t>(results[d]),
1015               static_cast<uint64_t>(results[d]));
1016        printf("\n");
1017      }
1018    }
1019    VIXL_ASSERT(d == expected_length);
1020    if (error_count > kErrorReportLimit) {
1021      printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1022    }
1023    VIXL_CHECK(error_count == 0);
1024  }
1025  delete[] results;
1026}
1027
1028
1029// Test FP instructions.
1030//  - The inputs[] array should be an array of rawbits representations of
1031//    doubles or floats. This ensures that exact bit comparisons can be
1032//    performed.
1033//  - The expected[] array should be an array of signed integers.
1034template <typename Tn, typename Td>
1035static void TestFPToFixedS(const char * name, TestFPToFixedHelper_t helper,
1036                           const Tn inputs[], unsigned inputs_length,
1037                           const Td expected[], unsigned expected_length) {
1038  VIXL_ASSERT(inputs_length > 0);
1039
1040  const unsigned d_bits = sizeof(Td) * 8;
1041  const unsigned n_bits = sizeof(Tn) * 8;
1042
1043  const unsigned results_length = inputs_length * (d_bits + 1);
1044  Td * results = new Td[results_length];
1045
1046  TestFPToFixed_Helper(helper,
1047                       reinterpret_cast<uintptr_t>(inputs), inputs_length,
1048                       reinterpret_cast<uintptr_t>(results), d_bits, n_bits);
1049
1050  if (Test::sim_test_trace()) {
1051    // Print the results.
1052    printf("const int%u_t kExpected_%s[] = {\n", d_bits, name);
1053    // There is no simple C++ literal for INT*_MIN that doesn't produce
1054    // warnings, so we use an appropriate constant in that case instead.
1055    // Deriving int_d_min in this way (rather than just checking INT64_MIN and
1056    // the like) avoids warnings about comparing values with differing ranges.
1057    const int64_t int_d_max = (UINT64_C(1) << (d_bits - 1)) - 1;
1058    const int64_t int_d_min = -(int_d_max) - 1;
1059    for (unsigned d = 0; d < results_length; d++) {
1060      if (results[d] == int_d_min) {
1061        printf("  -INT%u_C(%" PRId64 ") - 1,\n", d_bits, int_d_max);
1062      } else {
1063        // Some constants (such as those between INT32_MAX and UINT32_MAX)
1064        // trigger compiler warnings. To avoid these warnings, use an
1065        // appropriate macro to make the type explicit.
1066        int64_t result_int64 = static_cast<int64_t>(results[d]);
1067        if (result_int64 >= 0) {
1068          printf("  INT%u_C(%" PRId64 "),\n", d_bits, result_int64);
1069        } else {
1070          printf("  -INT%u_C(%" PRId64 "),\n", d_bits, -result_int64);
1071        }
1072      }
1073    }
1074    printf("};\n");
1075    printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
1076  } else {
1077    // Check the results.
1078    VIXL_CHECK(expected_length == results_length);
1079    unsigned error_count = 0;
1080    unsigned d = 0;
1081    for (unsigned n = 0; n < inputs_length; n++) {
1082      for (unsigned fbits = 0; fbits <= d_bits; ++fbits, d++) {
1083        if (results[d] != expected[d]) {
1084          if (++error_count > kErrorReportLimit) continue;
1085
1086          printf("%s 0x%0*" PRIx64 " #%d (%s %g #%d):\n",
1087                 name, n_bits / 4, static_cast<uint64_t>(inputs[n]), fbits,
1088                 name, rawbits_to_fp(inputs[n]), fbits);
1089          printf("  Expected: 0x%0*" PRIx64 " (%" PRId64 ")\n",
1090                 d_bits / 4, static_cast<uint64_t>(expected[d]),
1091                 static_cast<int64_t>(expected[d]));
1092          printf("  Found:    0x%0*" PRIx64 " (%" PRId64 ")\n",
1093                 d_bits / 4, static_cast<uint64_t>(results[d]),
1094                 static_cast<int64_t>(results[d]));
1095          printf("\n");
1096        }
1097      }
1098    }
1099    VIXL_ASSERT(d == expected_length);
1100    if (error_count > kErrorReportLimit) {
1101      printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1102    }
1103    VIXL_CHECK(error_count == 0);
1104  }
1105  delete[] results;
1106}
1107
1108
1109// Test FP instructions.
1110//  - The inputs[] array should be an array of rawbits representations of
1111//    doubles or floats. This ensures that exact bit comparisons can be
1112//    performed.
1113//  - The expected[] array should be an array of unsigned integers.
1114template <typename Tn, typename Td>
1115static void TestFPToFixedU(const char * name, TestFPToFixedHelper_t helper,
1116                           const Tn inputs[], unsigned inputs_length,
1117                           const Td expected[], unsigned expected_length) {
1118  VIXL_ASSERT(inputs_length > 0);
1119
1120  const unsigned d_bits = sizeof(Td) * 8;
1121  const unsigned n_bits = sizeof(Tn) * 8;
1122
1123  const unsigned results_length = inputs_length * (d_bits + 1);
1124  Td * results = new Td[results_length];
1125
1126  TestFPToFixed_Helper(helper,
1127                       reinterpret_cast<uintptr_t>(inputs), inputs_length,
1128                       reinterpret_cast<uintptr_t>(results), d_bits, n_bits);
1129
1130  if (Test::sim_test_trace()) {
1131    // Print the results.
1132    printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name);
1133    for (unsigned d = 0; d < results_length; d++) {
1134      printf("  %" PRIu64 "u,\n", static_cast<uint64_t>(results[d]));
1135    }
1136    printf("};\n");
1137    printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
1138  } else {
1139    // Check the results.
1140    VIXL_CHECK(expected_length == results_length);
1141    unsigned error_count = 0;
1142    unsigned d = 0;
1143    for (unsigned n = 0; n < inputs_length; n++) {
1144      for (unsigned fbits = 0; fbits <= d_bits; ++fbits, d++) {
1145        if (results[d] != expected[d]) {
1146          if (++error_count > kErrorReportLimit) continue;
1147
1148          printf("%s 0x%0*" PRIx64 " #%d (%s %g #%d):\n",
1149                 name, n_bits / 4, static_cast<uint64_t>(inputs[n]), fbits,
1150                 name, rawbits_to_fp(inputs[n]), fbits);
1151          printf("  Expected: 0x%0*" PRIx64 " (%" PRIu64 ")\n",
1152                 d_bits / 4, static_cast<uint64_t>(expected[d]),
1153                 static_cast<uint64_t>(expected[d]));
1154          printf("  Found:    0x%0*" PRIx64 " (%" PRIu64 ")\n",
1155                 d_bits / 4, static_cast<uint64_t>(results[d]),
1156                 static_cast<uint64_t>(results[d]));
1157          printf("\n");
1158        }
1159      }
1160    }
1161    VIXL_ASSERT(d == expected_length);
1162    if (error_count > kErrorReportLimit) {
1163      printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1164    }
1165    VIXL_CHECK(error_count == 0);
1166  }
1167  delete[] results;
1168}
1169
1170
1171// ==== Tests for instructions of the form <INST> VReg, VReg. ====
1172
1173
1174static void Test1OpNEON_Helper(Test1OpNEONHelper_t helper,
1175                               uintptr_t inputs_n, unsigned inputs_n_length,
1176                               uintptr_t results,
1177                               VectorFormat vd_form,
1178                               VectorFormat vn_form) {
1179  VIXL_ASSERT(vd_form != kFormatUndefined);
1180  VIXL_ASSERT(vn_form != kFormatUndefined);
1181
1182  SETUP();
1183  START();
1184
1185  // Roll up the loop to keep the code size down.
1186  Label loop_n;
1187
1188  Register out = x0;
1189  Register inputs_n_base = x1;
1190  Register inputs_n_last_16bytes = x3;
1191  Register index_n = x5;
1192
1193  // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
1194  const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
1195  const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1196
1197  const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
1198  const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1199  const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1200  const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
1201  const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
1202
1203
1204  // These will be either a D- or a Q-register form, with a single lane
1205  // (for use in scalar load and store operations).
1206  VRegister vd = VRegister(0, vd_bits);
1207  VRegister vn = v1.V16B();
1208  VRegister vntmp = v3.V16B();
1209
1210  // These will have the correct format for use when calling 'helper'.
1211  VRegister vd_helper = VRegister(0, vd_bits, vd_lane_count);
1212  VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
1213
1214  // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
1215  VRegister vntmp_single = VRegister(3, vn_lane_bits);
1216
1217  __ Mov(out, results);
1218
1219  __ Mov(inputs_n_base, inputs_n);
1220  __ Mov(inputs_n_last_16bytes,
1221         inputs_n + (vn_lane_bytes * inputs_n_length) - 16);
1222
1223  __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
1224
1225  __ Mov(index_n, 0);
1226  __ Bind(&loop_n);
1227
1228  __ Ldr(vntmp_single, MemOperand(inputs_n_base, index_n, LSL,
1229                                  vn_lane_bytes_log2));
1230  __ Ext(vn, vn, vntmp, vn_lane_bytes);
1231
1232  // Set the destination to zero.
1233  // TODO: Setting the destination to values other than zero
1234  //       might be a better test for instructions such as sqxtn2
1235  //       which may leave parts of V registers unchanged.
1236  __ Movi(vd.V16B(), 0);
1237
1238  {
1239    SingleEmissionCheckScope guard(&masm);
1240    (masm.*helper)(vd_helper, vn_helper);
1241  }
1242  __ Str(vd, MemOperand(out, vd.SizeInBytes(), PostIndex));
1243
1244  __ Add(index_n, index_n, 1);
1245  __ Cmp(index_n, inputs_n_length);
1246  __ B(lo, &loop_n);
1247
1248  END();
1249  RUN();
1250  TEARDOWN();
1251}
1252
1253
1254// Test NEON instructions. The inputs_*[] and expected[] arrays should be
1255// arrays of rawbit representation of input values. This ensures that
1256// exact bit comparisons can be performed.
1257template <typename Td, typename Tn>
1258static void Test1OpNEON(const char * name, Test1OpNEONHelper_t helper,
1259                        const Tn inputs_n[], unsigned inputs_n_length,
1260                        const Td expected[], unsigned expected_length,
1261                        VectorFormat vd_form,
1262                        VectorFormat vn_form) {
1263  VIXL_ASSERT(inputs_n_length > 0);
1264
1265  const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1266  const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1267  const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1268
1269  const unsigned results_length = inputs_n_length;
1270  Td* results = new Td[results_length * vd_lane_count];
1271  const unsigned lane_bit = sizeof(Td) * 8;
1272  const unsigned lane_len_in_hex = (std::max(sizeof(Td), sizeof(Tn)) * 8) / 4;
1273
1274  Test1OpNEON_Helper(helper,
1275                     reinterpret_cast<uintptr_t>(inputs_n),
1276                     inputs_n_length,
1277                     reinterpret_cast<uintptr_t>(results),
1278                     vd_form, vn_form);
1279
1280  if (Test::sim_test_trace()) {
1281    // Print the results.
1282    printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
1283    for (unsigned iteration = 0; iteration < results_length; iteration++) {
1284      printf(" ");
1285      // Output a separate result for each element of the result vector.
1286      for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1287        unsigned index = lane + (iteration * vd_lane_count);
1288        printf(" 0x%0*" PRIx64 ",",
1289               lane_len_in_hex,
1290               static_cast<uint64_t>(results[index]));
1291      }
1292      printf("\n");
1293    }
1294
1295    printf("};\n");
1296    printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
1297           name,
1298           results_length);
1299  } else {
1300    // Check the results.
1301    VIXL_CHECK(expected_length == results_length);
1302    unsigned error_count = 0;
1303    unsigned d = 0;
1304    const char* padding = "                    ";
1305    VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
1306    for (unsigned n = 0; n < inputs_n_length; n++, d++) {
1307      bool error_in_vector = false;
1308
1309      for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1310        unsigned output_index = (n * vd_lane_count) + lane;
1311
1312        if (results[output_index] != expected[output_index]) {
1313          error_in_vector = true;
1314          break;
1315        }
1316      }
1317
1318      if (error_in_vector && (++error_count <= kErrorReportLimit)) {
1319        printf("%s\n", name);
1320        printf(" Vn%.*s| Vd%.*s| Expected\n",
1321                lane_len_in_hex+1, padding,
1322                lane_len_in_hex+1, padding);
1323
1324        const unsigned first_index_n =
1325          inputs_n_length - (16 / vn_lane_bytes) + n + 1;
1326
1327        for (unsigned lane = 0;
1328             lane < std::max(vd_lane_count, vn_lane_count);
1329             lane++) {
1330          unsigned output_index = (n * vd_lane_count) + lane;
1331          unsigned input_index_n = (first_index_n + lane) % inputs_n_length;
1332
1333          printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " "
1334                  "| 0x%0*" PRIx64 "\n",
1335                  results[output_index] != expected[output_index] ? '*' : ' ',
1336                  lane_len_in_hex,
1337                  static_cast<uint64_t>(inputs_n[input_index_n]),
1338                  lane_len_in_hex,
1339                  static_cast<uint64_t>(results[output_index]),
1340                  lane_len_in_hex,
1341                  static_cast<uint64_t>(expected[output_index]));
1342        }
1343      }
1344    }
1345    VIXL_ASSERT(d == expected_length);
1346    if (error_count > kErrorReportLimit) {
1347      printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1348    }
1349    VIXL_CHECK(error_count == 0);
1350  }
1351  delete[] results;
1352}
1353
1354
1355// ==== Tests for instructions of the form <mnemonic> <V><d>, <Vn>.<T> ====
1356//      where <V> is one of B, H, S or D registers.
1357//      e.g. saddlv H1, v0.8B
1358
1359// TODO: Change tests to store all lanes of the resulting V register.
1360//       Some tests store all 128 bits of the resulting V register to
1361//       check the simulator's behaviour on the rest of the register.
1362//       This is better than storing the affected lanes only.
1363//       Change any tests such as the 'Across' template to do the same.
1364
1365static void Test1OpAcrossNEON_Helper(Test1OpNEONHelper_t helper,
1366                                     uintptr_t inputs_n,
1367                                     unsigned inputs_n_length,
1368                                     uintptr_t results,
1369                                     VectorFormat vd_form,
1370                                     VectorFormat vn_form) {
1371  VIXL_ASSERT(vd_form != kFormatUndefined);
1372  VIXL_ASSERT(vn_form != kFormatUndefined);
1373
1374  SETUP();
1375  START();
1376
1377  // Roll up the loop to keep the code size down.
1378  Label loop_n;
1379
1380  Register out = x0;
1381  Register inputs_n_base = x1;
1382  Register inputs_n_last_vector = x3;
1383  Register index_n = x5;
1384
1385  // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
1386  const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
1387
1388  const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
1389  const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1390  const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1391  const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
1392  const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
1393
1394
1395  // These will be either a D- or a Q-register form, with a single lane
1396  // (for use in scalar load and store operations).
1397  VRegister vd = VRegister(0, vd_bits);
1398  VRegister vn = VRegister(1, vn_bits);
1399  VRegister vntmp = VRegister(3, vn_bits);
1400
1401  // These will have the correct format for use when calling 'helper'.
1402  VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
1403
1404  // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
1405  VRegister vntmp_single = VRegister(3, vn_lane_bits);
1406
1407  // Same registers for use in the 'ext' instructions.
1408  VRegister vn_ext = (kDRegSize == vn_bits) ? vn.V8B() : vn.V16B();
1409  VRegister vntmp_ext = (kDRegSize == vn_bits) ? vntmp.V8B() : vntmp.V16B();
1410
1411  __ Mov(out, results);
1412
1413  __ Mov(inputs_n_base, inputs_n);
1414  __ Mov(inputs_n_last_vector,
1415         inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count));
1416
1417  __ Ldr(vn, MemOperand(inputs_n_last_vector));
1418
1419  __ Mov(index_n, 0);
1420  __ Bind(&loop_n);
1421
1422  __ Ldr(vntmp_single, MemOperand(inputs_n_base, index_n, LSL,
1423                                  vn_lane_bytes_log2));
1424  __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes);
1425
1426  // Set the destination to zero for tests such as '[r]shrn2'.
1427  // TODO: Setting the destination to values other than zero
1428  //       might be a better test for instructions such as sqxtn2
1429  //       which may leave parts of V registers unchanged.
1430  __ Movi(vd.V16B(), 0);
1431
1432  {
1433    SingleEmissionCheckScope guard(&masm);
1434    (masm.*helper)(vd, vn_helper);
1435  }
1436  __ Str(vd, MemOperand(out, vd.SizeInBytes(), PostIndex));
1437
1438  __ Add(index_n, index_n, 1);
1439  __ Cmp(index_n, inputs_n_length);
1440  __ B(lo, &loop_n);
1441
1442  END();
1443  RUN();
1444  TEARDOWN();
1445}
1446
1447// Test NEON instructions. The inputs_*[] and expected[] arrays should be
1448// arrays of rawbit representation of input values. This ensures that
1449// exact bit comparisons can be performed.
1450template <typename Td, typename Tn>
1451static void Test1OpAcrossNEON(const char * name, Test1OpNEONHelper_t helper,
1452                              const Tn inputs_n[], unsigned inputs_n_length,
1453                              const Td expected[], unsigned expected_length,
1454                              VectorFormat vd_form,
1455                              VectorFormat vn_form) {
1456  VIXL_ASSERT(inputs_n_length > 0);
1457
1458  const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1459
1460  const unsigned results_length = inputs_n_length;
1461  Td* results = new Td[results_length * vd_lane_count];
1462  const unsigned lane_bit = sizeof(Td) * 8;
1463  const unsigned lane_len_in_hex = (std::max(sizeof(Td), sizeof(Tn)) * 8) / 4;
1464
1465  Test1OpAcrossNEON_Helper(helper,
1466                           reinterpret_cast<uintptr_t>(inputs_n),
1467                           inputs_n_length,
1468                           reinterpret_cast<uintptr_t>(results),
1469                           vd_form, vn_form);
1470
1471  if (Test::sim_test_trace()) {
1472    // Print the results.
1473    printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
1474    for (unsigned iteration = 0; iteration < results_length; iteration++) {
1475      printf(" ");
1476      // Output a separate result for each element of the result vector.
1477      for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1478        unsigned index = lane + (iteration * vd_lane_count);
1479        printf(" 0x%0*" PRIx64 ",",
1480               lane_len_in_hex,
1481               static_cast<uint64_t>(results[index]));
1482      }
1483      printf("\n");
1484    }
1485
1486    printf("};\n");
1487    printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
1488           name,
1489           results_length);
1490  } else {
1491    // Check the results.
1492    VIXL_CHECK(expected_length == results_length);
1493    unsigned error_count = 0;
1494    unsigned d = 0;
1495    const char* padding = "                    ";
1496    VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
1497    for (unsigned n = 0; n < inputs_n_length; n++, d++) {
1498      bool error_in_vector = false;
1499
1500      for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1501        unsigned output_index = (n * vd_lane_count) + lane;
1502
1503        if (results[output_index] != expected[output_index]) {
1504          error_in_vector = true;
1505          break;
1506        }
1507      }
1508
1509      if (error_in_vector && (++error_count <= kErrorReportLimit)) {
1510        const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1511
1512        printf("%s\n", name);
1513        printf(" Vn%.*s| Vd%.*s| Expected\n",
1514                lane_len_in_hex+1, padding,
1515                lane_len_in_hex+1, padding);
1516
1517        // TODO: In case of an error, all tests print out as many elements as
1518        //       there are lanes in the output or input vectors. This way
1519        //       the viewer can read all the values that were needed for the
1520        //       operation but the output contains also unnecessary values.
1521        //       These prints can be improved according to the arguments
1522        //       passed to test functions.
1523        //       This output for the 'Across' category has the required
1524        //       modifications.
1525        for (unsigned lane = 0; lane < vn_lane_count; lane++) {
1526          unsigned output_index = n * vd_lane_count;
1527          unsigned input_index_n = (inputs_n_length - vn_lane_count +
1528              n + 1 + lane) % inputs_n_length;
1529
1530          if (vn_lane_count-1 == lane) {  // Is this the last lane?
1531            // Print the result element(s) in the last lane only.
1532            printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " "
1533                  "| 0x%0*" PRIx64 "\n",
1534                  results[output_index] != expected[output_index] ? '*' : ' ',
1535                  lane_len_in_hex,
1536                  static_cast<uint64_t>(inputs_n[input_index_n]),
1537                  lane_len_in_hex,
1538                  static_cast<uint64_t>(results[output_index]),
1539                  lane_len_in_hex,
1540                  static_cast<uint64_t>(expected[output_index]));
1541          } else {
1542            printf(" 0x%0*" PRIx64 " |   %.*s|   %.*s\n",
1543                  lane_len_in_hex,
1544                  static_cast<uint64_t>(inputs_n[input_index_n]),
1545                  lane_len_in_hex+1, padding,
1546                  lane_len_in_hex+1, padding);
1547          }
1548        }
1549      }
1550    }
1551    VIXL_ASSERT(d == expected_length);
1552    if (error_count > kErrorReportLimit) {
1553      printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1554    }
1555    VIXL_CHECK(error_count == 0);
1556  }
1557  delete[] results;
1558}
1559
1560
1561// ==== Tests for instructions of the form <INST> VReg, VReg, VReg. ====
1562
1563// TODO: Iterate over inputs_d once the traces file is split.
1564
1565static void Test2OpNEON_Helper(Test2OpNEONHelper_t helper,
1566                               uintptr_t inputs_d,
1567                               uintptr_t inputs_n, unsigned inputs_n_length,
1568                               uintptr_t inputs_m, unsigned inputs_m_length,
1569                               uintptr_t results,
1570                               VectorFormat vd_form,
1571                               VectorFormat vn_form,
1572                               VectorFormat vm_form) {
1573  VIXL_ASSERT(vd_form != kFormatUndefined);
1574  VIXL_ASSERT(vn_form != kFormatUndefined);
1575  VIXL_ASSERT(vm_form != kFormatUndefined);
1576
1577  SETUP();
1578  START();
1579
1580  // Roll up the loop to keep the code size down.
1581  Label loop_n, loop_m;
1582
1583  Register out = x0;
1584  Register inputs_n_base = x1;
1585  Register inputs_m_base = x2;
1586  Register inputs_d_base = x3;
1587  Register inputs_n_last_16bytes = x4;
1588  Register inputs_m_last_16bytes = x5;
1589  Register index_n = x6;
1590  Register index_m = x7;
1591
1592  // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
1593  const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
1594  const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1595
1596  const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
1597  const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1598  const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1599  const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
1600  const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
1601
1602  const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form);
1603  const unsigned vm_lane_count = LaneCountFromFormat(vm_form);
1604  const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form);
1605  const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form);
1606  const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form);
1607
1608
1609  // Always load and store 128 bits regardless of the format.
1610  VRegister vd = v0.V16B();
1611  VRegister vn = v1.V16B();
1612  VRegister vm = v2.V16B();
1613  VRegister vntmp = v3.V16B();
1614  VRegister vmtmp = v4.V16B();
1615  VRegister vres = v5.V16B();
1616
1617  // These will have the correct format for calling the 'helper'.
1618  VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
1619  VRegister vm_helper = VRegister(2, vm_bits, vm_lane_count);
1620  VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count);
1621
1622  // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
1623  VRegister vntmp_single = VRegister(3, vn_lane_bits);
1624  VRegister vmtmp_single = VRegister(4, vm_lane_bits);
1625
1626  __ Mov(out, results);
1627
1628  __ Mov(inputs_d_base, inputs_d);
1629
1630  __ Mov(inputs_n_base, inputs_n);
1631  __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16));
1632  __ Mov(inputs_m_base, inputs_m);
1633  __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16));
1634
1635  __ Ldr(vd, MemOperand(inputs_d_base));
1636  __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
1637  __ Ldr(vm, MemOperand(inputs_m_last_16bytes));
1638
1639  __ Mov(index_n, 0);
1640  __ Bind(&loop_n);
1641
1642  __ Ldr(vntmp_single, MemOperand(inputs_n_base, index_n, LSL,
1643                                  vn_lane_bytes_log2));
1644  __ Ext(vn, vn, vntmp, vn_lane_bytes);
1645
1646  __ Mov(index_m, 0);
1647  __ Bind(&loop_m);
1648
1649  __ Ldr(vmtmp_single, MemOperand(inputs_m_base, index_m, LSL,
1650                                  vm_lane_bytes_log2));
1651  __ Ext(vm, vm, vmtmp, vm_lane_bytes);
1652
1653  __ Mov(vres, vd);
1654  {
1655    SingleEmissionCheckScope guard(&masm);
1656    (masm.*helper)(vres_helper, vn_helper, vm_helper);
1657  }
1658  __ Str(vres, MemOperand(out, vd.SizeInBytes(), PostIndex));
1659
1660  __ Add(index_m, index_m, 1);
1661  __ Cmp(index_m, inputs_m_length);
1662  __ B(lo, &loop_m);
1663
1664  __ Add(index_n, index_n, 1);
1665  __ Cmp(index_n, inputs_n_length);
1666  __ B(lo, &loop_n);
1667
1668  END();
1669  RUN();
1670  TEARDOWN();
1671}
1672
1673
1674// Test NEON instructions. The inputs_*[] and expected[] arrays should be
1675// arrays of rawbit representation of input values. This ensures that
1676// exact bit comparisons can be performed.
1677template <typename Td, typename Tn, typename Tm>
1678static void Test2OpNEON(const char * name, Test2OpNEONHelper_t helper,
1679                        const Td inputs_d[],
1680                        const Tn inputs_n[], unsigned inputs_n_length,
1681                        const Tm inputs_m[], unsigned inputs_m_length,
1682                        const Td expected[], unsigned expected_length,
1683                        VectorFormat vd_form,
1684                        VectorFormat vn_form,
1685                        VectorFormat vm_form) {
1686  VIXL_ASSERT(inputs_n_length > 0 && inputs_m_length > 0);
1687
1688  const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form);
1689
1690  const unsigned results_length = inputs_n_length * inputs_m_length;
1691  Td* results = new Td[results_length * vd_lane_count];
1692  const unsigned lane_bit = sizeof(Td) * 8;
1693  const unsigned lane_len_in_hex = (std::max(sizeof(Td), sizeof(Tm)) * 8) / 4;
1694
1695  Test2OpNEON_Helper(helper,
1696                     reinterpret_cast<uintptr_t>(inputs_d),
1697                     reinterpret_cast<uintptr_t>(inputs_n), inputs_n_length,
1698                     reinterpret_cast<uintptr_t>(inputs_m), inputs_m_length,
1699                     reinterpret_cast<uintptr_t>(results),
1700                     vd_form, vn_form, vm_form);
1701
1702  if (Test::sim_test_trace()) {
1703    // Print the results.
1704    printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
1705    for (unsigned iteration = 0; iteration < results_length; iteration++) {
1706      printf(" ");
1707      // Output a separate result for each element of the result vector.
1708      for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1709        unsigned index = lane + (iteration * vd_lane_count);
1710        printf(" 0x%0*" PRIx64 ",",
1711               lane_len_in_hex,
1712               static_cast<uint64_t>(results[index]));
1713      }
1714      printf("\n");
1715    }
1716
1717    printf("};\n");
1718    printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
1719           name,
1720           results_length);
1721  } else {
1722    // Check the results.
1723    VIXL_CHECK(expected_length == results_length);
1724    unsigned error_count = 0;
1725    unsigned d = 0;
1726    const char* padding = "                    ";
1727    VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
1728    for (unsigned n = 0; n < inputs_n_length; n++) {
1729      for (unsigned m = 0; m < inputs_m_length; m++, d++) {
1730        bool error_in_vector = false;
1731
1732        for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1733          unsigned output_index = (n * inputs_m_length * vd_lane_count) +
1734              (m * vd_lane_count) + lane;
1735
1736          if (results[output_index] != expected[output_index]) {
1737            error_in_vector = true;
1738            break;
1739          }
1740        }
1741
1742        if (error_in_vector && (++error_count <= kErrorReportLimit)) {
1743          printf("%s\n", name);
1744          printf(" Vd%.*s| Vn%.*s| Vm%.*s| Vd%.*s| Expected\n",
1745                 lane_len_in_hex+1, padding,
1746                 lane_len_in_hex+1, padding,
1747                 lane_len_in_hex+1, padding,
1748                 lane_len_in_hex+1, padding);
1749
1750          for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1751            unsigned output_index = (n * inputs_m_length * vd_lane_count) +
1752                (m * vd_lane_count) + lane;
1753            unsigned input_index_n = (inputs_n_length - vd_lane_count +
1754                n + 1 + lane) % inputs_n_length;
1755            unsigned input_index_m = (inputs_m_length - vd_lane_count +
1756                m + 1 + lane) % inputs_m_length;
1757
1758            printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 " "
1759                   "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
1760                   results[output_index] != expected[output_index] ? '*' : ' ',
1761                   lane_len_in_hex,
1762                   static_cast<uint64_t>(inputs_d[lane]),
1763                   lane_len_in_hex,
1764                   static_cast<uint64_t>(inputs_n[input_index_n]),
1765                   lane_len_in_hex,
1766                   static_cast<uint64_t>(inputs_m[input_index_m]),
1767                   lane_len_in_hex,
1768                   static_cast<uint64_t>(results[output_index]),
1769                   lane_len_in_hex,
1770                   static_cast<uint64_t>(expected[output_index]));
1771          }
1772        }
1773      }
1774    }
1775    VIXL_ASSERT(d == expected_length);
1776    if (error_count > kErrorReportLimit) {
1777      printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1778    }
1779    VIXL_CHECK(error_count == 0);
1780  }
1781  delete[] results;
1782}
1783
1784
1785// ==== Tests for instructions of the form <INST> Vd, Vn, Vm[<#index>]. ====
1786
1787static void TestByElementNEON_Helper(TestByElementNEONHelper_t helper,
1788                                     uintptr_t inputs_d,
1789                                     uintptr_t inputs_n,
1790                                     unsigned inputs_n_length,
1791                                     uintptr_t inputs_m,
1792                                     unsigned inputs_m_length,
1793                                     const int indices[],
1794                                     unsigned indices_length,
1795                                     uintptr_t results,
1796                                     VectorFormat vd_form,
1797                                     VectorFormat vn_form,
1798                                     VectorFormat vm_form) {
1799  VIXL_ASSERT(vd_form != kFormatUndefined);
1800  VIXL_ASSERT(vn_form != kFormatUndefined);
1801  VIXL_ASSERT(vm_form != kFormatUndefined);
1802
1803  SETUP();
1804  START();
1805
1806  // Roll up the loop to keep the code size down.
1807  Label loop_n, loop_m;
1808
1809  Register out = x0;
1810  Register inputs_n_base = x1;
1811  Register inputs_m_base = x2;
1812  Register inputs_d_base = x3;
1813  Register inputs_n_last_16bytes = x4;
1814  Register inputs_m_last_16bytes = x5;
1815  Register index_n = x6;
1816  Register index_m = x7;
1817
1818  // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
1819  const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
1820  const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1821
1822  const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
1823  const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1824  const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1825  const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
1826  const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
1827
1828  const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form);
1829  const unsigned vm_lane_count = LaneCountFromFormat(vm_form);
1830  const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form);
1831  const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form);
1832  const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form);
1833
1834
1835  // Always load and store 128 bits regardless of the format.
1836  VRegister vd = v0.V16B();
1837  VRegister vn = v1.V16B();
1838  VRegister vm = v2.V16B();
1839  VRegister vntmp = v3.V16B();
1840  VRegister vmtmp = v4.V16B();
1841  VRegister vres = v5.V16B();
1842
1843  // These will have the correct format for calling the 'helper'.
1844  VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
1845  VRegister vm_helper = VRegister(2, vm_bits, vm_lane_count);
1846  VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count);
1847
1848  // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
1849  VRegister vntmp_single = VRegister(3, vn_lane_bits);
1850  VRegister vmtmp_single = VRegister(4, vm_lane_bits);
1851
1852  __ Mov(out, results);
1853
1854  __ Mov(inputs_d_base, inputs_d);
1855
1856  __ Mov(inputs_n_base, inputs_n);
1857  __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16));
1858  __ Mov(inputs_m_base, inputs_m);
1859  __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16));
1860
1861  __ Ldr(vd, MemOperand(inputs_d_base));
1862  __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
1863  __ Ldr(vm, MemOperand(inputs_m_last_16bytes));
1864
1865  __ Mov(index_n, 0);
1866  __ Bind(&loop_n);
1867
1868  __ Ldr(vntmp_single, MemOperand(inputs_n_base, index_n, LSL,
1869                                  vn_lane_bytes_log2));
1870  __ Ext(vn, vn, vntmp, vn_lane_bytes);
1871
1872  __ Mov(index_m, 0);
1873  __ Bind(&loop_m);
1874
1875  __ Ldr(vmtmp_single, MemOperand(inputs_m_base, index_m, LSL,
1876                                  vm_lane_bytes_log2));
1877  __ Ext(vm, vm, vmtmp, vm_lane_bytes);
1878
1879  __ Mov(vres, vd);
1880  {
1881    for (unsigned i = 0; i < indices_length; i++) {
1882      {
1883        SingleEmissionCheckScope guard(&masm);
1884        (masm.*helper)(vres_helper, vn_helper, vm_helper, indices[i]);
1885      }
1886      __ Str(vres, MemOperand(out, vd.SizeInBytes(), PostIndex));
1887    }
1888  }
1889
1890  __ Add(index_m, index_m, 1);
1891  __ Cmp(index_m, inputs_m_length);
1892  __ B(lo, &loop_m);
1893
1894  __ Add(index_n, index_n, 1);
1895  __ Cmp(index_n, inputs_n_length);
1896  __ B(lo, &loop_n);
1897
1898  END();
1899  RUN();
1900  TEARDOWN();
1901}
1902
1903
1904
1905// Test NEON instructions. The inputs_*[] and expected[] arrays should be
1906// arrays of rawbit representation of input values. This ensures that
1907// exact bit comparisons can be performed.
1908template <typename Td, typename Tn, typename Tm>
1909static void TestByElementNEON(const char *name,
1910                              TestByElementNEONHelper_t helper,
1911                              const Td inputs_d[],
1912                              const Tn inputs_n[], unsigned inputs_n_length,
1913                              const Tm inputs_m[], unsigned inputs_m_length,
1914                              const int indices[], unsigned indices_length,
1915                              const Td expected[], unsigned expected_length,
1916                              VectorFormat vd_form,
1917                              VectorFormat vn_form,
1918                              VectorFormat vm_form) {
1919  VIXL_ASSERT(inputs_n_length > 0);
1920  VIXL_ASSERT(inputs_m_length > 0);
1921  VIXL_ASSERT(indices_length > 0);
1922
1923  const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form);
1924
1925  const unsigned results_length = inputs_n_length * inputs_m_length *
1926                                  indices_length;
1927  Td* results = new Td[results_length * vd_lane_count];
1928  const unsigned lane_bit = sizeof(Td) * 8;
1929  const unsigned lane_len_in_hex = (std::max(sizeof(Td), sizeof(Tm)) * 8) / 4;
1930
1931  TestByElementNEON_Helper(helper,
1932    reinterpret_cast<uintptr_t>(inputs_d),
1933    reinterpret_cast<uintptr_t>(inputs_n), inputs_n_length,
1934    reinterpret_cast<uintptr_t>(inputs_m), inputs_m_length,
1935    indices, indices_length,
1936    reinterpret_cast<uintptr_t>(results),
1937    vd_form, vn_form, vm_form);
1938
1939  if (Test::sim_test_trace()) {
1940    // Print the results.
1941    printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
1942    for (unsigned iteration = 0; iteration < results_length; iteration++) {
1943      printf(" ");
1944      // Output a separate result for each element of the result vector.
1945      for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1946        unsigned index = lane + (iteration * vd_lane_count);
1947        printf(" 0x%0*" PRIx64 ",",
1948               lane_len_in_hex,
1949               static_cast<uint64_t>(results[index]));
1950      }
1951      printf("\n");
1952    }
1953
1954    printf("};\n");
1955    printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
1956           name,
1957           results_length);
1958  } else {
1959    // Check the results.
1960    VIXL_CHECK(expected_length == results_length);
1961    unsigned error_count = 0;
1962    unsigned d = 0;
1963    const char* padding = "                    ";
1964    VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
1965    for (unsigned n = 0; n < inputs_n_length; n++) {
1966      for (unsigned m = 0; m < inputs_m_length; m++) {
1967        for (unsigned index = 0; index < indices_length; index++, d++) {
1968          bool error_in_vector = false;
1969
1970          for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1971            unsigned output_index =
1972                (n * inputs_m_length * indices_length * vd_lane_count) +
1973                (m * indices_length * vd_lane_count) +
1974                (index * vd_lane_count) + lane;
1975
1976            if (results[output_index] != expected[output_index]) {
1977              error_in_vector = true;
1978              break;
1979            }
1980          }
1981
1982          if (error_in_vector && (++error_count <= kErrorReportLimit)) {
1983            printf("%s\n", name);
1984            printf(" Vd%.*s| Vn%.*s| Vm%.*s| Index | Vd%.*s| Expected\n",
1985                  lane_len_in_hex+1, padding,
1986                  lane_len_in_hex+1, padding,
1987                  lane_len_in_hex+1, padding,
1988                  lane_len_in_hex+1, padding);
1989
1990            for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1991              unsigned output_index =
1992                  (n * inputs_m_length * indices_length * vd_lane_count) +
1993                  (m * indices_length * vd_lane_count) +
1994                  (index * vd_lane_count) + lane;
1995              unsigned input_index_n = (inputs_n_length - vd_lane_count +
1996                  n + 1 + lane) % inputs_n_length;
1997              unsigned input_index_m = (inputs_m_length - vd_lane_count +
1998                  m + 1 + lane) % inputs_m_length;
1999
2000              printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 " "
2001                "| [%3d] | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
2002                results[output_index] != expected[output_index] ? '*' : ' ',
2003                lane_len_in_hex,
2004                static_cast<uint64_t>(inputs_d[lane]),
2005                lane_len_in_hex,
2006                static_cast<uint64_t>(inputs_n[input_index_n]),
2007                lane_len_in_hex,
2008                static_cast<uint64_t>(inputs_m[input_index_m]),
2009                indices[index],
2010                lane_len_in_hex,
2011                static_cast<uint64_t>(results[output_index]),
2012                lane_len_in_hex,
2013                static_cast<uint64_t>(expected[output_index]));
2014            }
2015          }
2016        }
2017      }
2018    }
2019    VIXL_ASSERT(d == expected_length);
2020    if (error_count > kErrorReportLimit) {
2021      printf("%u other errors follow.\n", error_count - kErrorReportLimit);
2022    }
2023    VIXL_CHECK(error_count == 0);
2024  }
2025  delete[] results;
2026}
2027
2028
2029// ==== Tests for instructions of the form <INST> VReg, VReg, #Immediate. ====
2030
2031
2032template <typename Tm>
2033void Test2OpImmNEON_Helper(
2034    typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,
2035    uintptr_t inputs_n,
2036    unsigned inputs_n_length,
2037    const Tm inputs_m[],
2038    unsigned inputs_m_length,
2039    uintptr_t results,
2040    VectorFormat vd_form,
2041    VectorFormat vn_form) {
2042  VIXL_ASSERT(vd_form != kFormatUndefined &&
2043              vn_form != kFormatUndefined);
2044
2045  SETUP();
2046  START();
2047
2048  // Roll up the loop to keep the code size down.
2049  Label loop_n;
2050
2051  Register out = x0;
2052  Register inputs_n_base = x1;
2053  Register inputs_n_last_16bytes = x3;
2054  Register index_n = x5;
2055
2056  // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
2057  const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
2058  const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2059
2060  const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
2061  const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
2062  const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
2063  const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
2064  const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
2065
2066
2067  // These will be either a D- or a Q-register form, with a single lane
2068  // (for use in scalar load and store operations).
2069  VRegister vd = VRegister(0, vd_bits);
2070  VRegister vn = v1.V16B();
2071  VRegister vntmp = v3.V16B();
2072
2073  // These will have the correct format for use when calling 'helper'.
2074  VRegister vd_helper = VRegister(0, vd_bits, vd_lane_count);
2075  VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
2076
2077  // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
2078  VRegister vntmp_single = VRegister(3, vn_lane_bits);
2079
2080  __ Mov(out, results);
2081
2082  __ Mov(inputs_n_base, inputs_n);
2083  __ Mov(inputs_n_last_16bytes,
2084         inputs_n + (vn_lane_bytes * inputs_n_length) - 16);
2085
2086  __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
2087
2088  __ Mov(index_n, 0);
2089  __ Bind(&loop_n);
2090
2091  __ Ldr(vntmp_single, MemOperand(inputs_n_base, index_n, LSL,
2092                                  vn_lane_bytes_log2));
2093  __ Ext(vn, vn, vntmp, vn_lane_bytes);
2094
2095  // Set the destination to zero for tests such as '[r]shrn2'.
2096  // TODO: Setting the destination to values other than zero might be a better
2097  //       test for shift and accumulate instructions (srsra/ssra/usra/ursra).
2098  __ Movi(vd.V16B(), 0);
2099
2100  {
2101    for (unsigned i = 0; i < inputs_m_length; i++) {
2102      {
2103        SingleEmissionCheckScope guard(&masm);
2104        (masm.*helper)(vd_helper, vn_helper, inputs_m[i]);
2105      }
2106      __ Str(vd, MemOperand(out, vd.SizeInBytes(), PostIndex));
2107    }
2108  }
2109
2110  __ Add(index_n, index_n, 1);
2111  __ Cmp(index_n, inputs_n_length);
2112  __ B(lo, &loop_n);
2113
2114  END();
2115  RUN();
2116  TEARDOWN();
2117}
2118
2119
2120// Test NEON instructions. The inputs_*[] and expected[] arrays should be
2121// arrays of rawbit representation of input values. This ensures that
2122// exact bit comparisons can be performed.
2123template <typename Td, typename Tn, typename Tm>
2124static void Test2OpImmNEON(
2125    const char * name,
2126    typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,
2127    const Tn inputs_n[], unsigned inputs_n_length,
2128    const Tm inputs_m[], unsigned inputs_m_length,
2129    const Td expected[], unsigned expected_length,
2130    VectorFormat vd_form,
2131    VectorFormat vn_form) {
2132  VIXL_ASSERT(inputs_n_length > 0 && inputs_m_length > 0);
2133
2134  const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2135  const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
2136  const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
2137
2138  const unsigned results_length = inputs_n_length * inputs_m_length;
2139  Td* results = new Td[results_length * vd_lane_count];
2140  const unsigned lane_bit = sizeof(Td) * 8;
2141  const unsigned lane_len_in_hex = (std::max(sizeof(Td), sizeof(Tn)) * 8) / 4;
2142
2143  Test2OpImmNEON_Helper(helper,
2144                        reinterpret_cast<uintptr_t>(inputs_n), inputs_n_length,
2145                        inputs_m, inputs_m_length,
2146                        reinterpret_cast<uintptr_t>(results),
2147                        vd_form, vn_form);
2148
2149  if (Test::sim_test_trace()) {
2150    // Print the results.
2151    printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
2152    for (unsigned iteration = 0; iteration < results_length; iteration++) {
2153      printf(" ");
2154      // Output a separate result for each element of the result vector.
2155      for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2156        unsigned index = lane + (iteration * vd_lane_count);
2157        printf(" 0x%0*" PRIx64 ",",
2158               lane_len_in_hex,
2159               static_cast<uint64_t>(results[index]));
2160      }
2161      printf("\n");
2162    }
2163
2164    printf("};\n");
2165    printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
2166           name,
2167           results_length);
2168  } else {
2169    // Check the results.
2170    VIXL_CHECK(expected_length == results_length);
2171    unsigned error_count = 0;
2172    unsigned d = 0;
2173    const char* padding = "                    ";
2174    VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
2175    for (unsigned n = 0; n < inputs_n_length; n++) {
2176      for (unsigned m = 0; m < inputs_m_length; m++, d++) {
2177        bool error_in_vector = false;
2178
2179        for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2180          unsigned output_index = (n * inputs_m_length * vd_lane_count) +
2181              (m * vd_lane_count) + lane;
2182
2183          if (results[output_index] != expected[output_index]) {
2184            error_in_vector = true;
2185            break;
2186          }
2187        }
2188
2189        if (error_in_vector && (++error_count <= kErrorReportLimit)) {
2190          printf("%s\n", name);
2191          printf(" Vn%.*s| Imm%.*s| Vd%.*s| Expected\n",
2192                 lane_len_in_hex+1, padding,
2193                 lane_len_in_hex, padding,
2194                 lane_len_in_hex+1, padding);
2195
2196        const unsigned first_index_n =
2197          inputs_n_length - (16 / vn_lane_bytes) + n + 1;
2198
2199        for (unsigned lane = 0;
2200             lane < std::max(vd_lane_count, vn_lane_count);
2201             lane++) {
2202            unsigned output_index = (n * inputs_m_length * vd_lane_count) +
2203                (m * vd_lane_count) + lane;
2204            unsigned input_index_n = (first_index_n + lane) % inputs_n_length;
2205            unsigned input_index_m = m;
2206
2207            printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " "
2208                   "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
2209                   results[output_index] != expected[output_index] ? '*' : ' ',
2210                   lane_len_in_hex,
2211                   static_cast<uint64_t>(inputs_n[input_index_n]),
2212                   lane_len_in_hex,
2213                   static_cast<uint64_t>(inputs_m[input_index_m]),
2214                   lane_len_in_hex,
2215                   static_cast<uint64_t>(results[output_index]),
2216                   lane_len_in_hex,
2217                   static_cast<uint64_t>(expected[output_index]));
2218          }
2219        }
2220      }
2221    }
2222    VIXL_ASSERT(d == expected_length);
2223    if (error_count > kErrorReportLimit) {
2224      printf("%u other errors follow.\n", error_count - kErrorReportLimit);
2225    }
2226    VIXL_CHECK(error_count == 0);
2227  }
2228  delete[] results;
2229}
2230
2231
2232// ==== Tests for instructions of the form <INST> VReg, #Imm, VReg, #Imm. ====
2233
2234
2235static void TestOpImmOpImmNEON_Helper(
2236  TestOpImmOpImmVdUpdateNEONHelper_t helper,
2237  uintptr_t inputs_d,
2238  const int inputs_imm1[], unsigned inputs_imm1_length,
2239  uintptr_t inputs_n, unsigned inputs_n_length,
2240  const int inputs_imm2[], unsigned inputs_imm2_length,
2241  uintptr_t results,
2242  VectorFormat vd_form, VectorFormat vn_form) {
2243  VIXL_ASSERT(vd_form != kFormatUndefined);
2244  VIXL_ASSERT(vn_form != kFormatUndefined);
2245
2246  SETUP();
2247  START();
2248
2249  // Roll up the loop to keep the code size down.
2250  Label loop_n;
2251
2252  Register out = x0;
2253  Register inputs_d_base = x1;
2254  Register inputs_n_base = x2;
2255  Register inputs_n_last_vector = x4;
2256  Register index_n = x6;
2257
2258  // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
2259  const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
2260  const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2261
2262  const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
2263  const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
2264  const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
2265  const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
2266  const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
2267
2268
2269  // These will be either a D- or a Q-register form, with a single lane
2270  // (for use in scalar load and store operations).
2271  VRegister vd = VRegister(0, vd_bits);
2272  VRegister vn = VRegister(1, vn_bits);
2273  VRegister vntmp = VRegister(4, vn_bits);
2274  VRegister vres = VRegister(5, vn_bits);
2275
2276  VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
2277  VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count);
2278
2279  // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
2280  VRegister vntmp_single = VRegister(4, vn_lane_bits);
2281
2282  // Same registers for use in the 'ext' instructions.
2283  VRegister vn_ext = (kDRegSize == vn_bits) ? vn.V8B() : vn.V16B();
2284  VRegister vntmp_ext = (kDRegSize == vn_bits) ? vntmp.V8B() : vntmp.V16B();
2285
2286  __ Mov(out, results);
2287
2288  __ Mov(inputs_d_base, inputs_d);
2289
2290  __ Mov(inputs_n_base, inputs_n);
2291  __ Mov(inputs_n_last_vector,
2292         inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count));
2293
2294  __ Ldr(vd, MemOperand(inputs_d_base));
2295
2296  __ Ldr(vn, MemOperand(inputs_n_last_vector));
2297
2298  __ Mov(index_n, 0);
2299  __ Bind(&loop_n);
2300
2301  __ Ldr(vntmp_single, MemOperand(inputs_n_base, index_n, LSL,
2302                                  vn_lane_bytes_log2));
2303  __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes);
2304
2305  {
2306    EmissionCheckScope guard(&masm,
2307        kInstructionSize * inputs_imm1_length * inputs_imm2_length * 3);
2308    for (unsigned i = 0; i < inputs_imm1_length; i++) {
2309      for (unsigned j = 0; j < inputs_imm2_length; j++) {
2310        __ Mov(vres, vd);
2311        (masm.*helper)(vres_helper, inputs_imm1[i], vn_helper, inputs_imm2[j]);
2312        __ Str(vres, MemOperand(out, vd.SizeInBytes(), PostIndex));
2313      }
2314    }
2315  }
2316
2317  __ Add(index_n, index_n, 1);
2318  __ Cmp(index_n, inputs_n_length);
2319  __ B(lo, &loop_n);
2320
2321  END();
2322  RUN();
2323  TEARDOWN();
2324}
2325
2326
2327// Test NEON instructions. The inputs_*[] and expected[] arrays should be
2328// arrays of rawbit representation of input values. This ensures that
2329// exact bit comparisons can be performed.
2330template <typename Td, typename Tn>
2331static void TestOpImmOpImmNEON(const char * name,
2332                               TestOpImmOpImmVdUpdateNEONHelper_t helper,
2333                               const Td inputs_d[],
2334                               const int inputs_imm1[],
2335                               unsigned inputs_imm1_length,
2336                               const Tn inputs_n[],
2337                               unsigned inputs_n_length,
2338                               const int inputs_imm2[],
2339                               unsigned inputs_imm2_length,
2340                               const Td expected[],
2341                               unsigned expected_length,
2342                               VectorFormat vd_form,
2343                               VectorFormat vn_form) {
2344  VIXL_ASSERT(inputs_n_length > 0);
2345  VIXL_ASSERT(inputs_imm1_length > 0);
2346  VIXL_ASSERT(inputs_imm2_length > 0);
2347
2348  const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2349
2350  const unsigned results_length = inputs_n_length *
2351      inputs_imm1_length * inputs_imm2_length;
2352
2353  Td* results = new Td[results_length * vd_lane_count];
2354  const unsigned lane_bit = sizeof(Td) * 8;
2355  const unsigned lane_len_in_hex = (std::max(sizeof(Td), sizeof(Tn)) * 8) / 4;
2356
2357  TestOpImmOpImmNEON_Helper(helper,
2358                            reinterpret_cast<uintptr_t>(inputs_d),
2359                            inputs_imm1,
2360                            inputs_imm1_length,
2361                            reinterpret_cast<uintptr_t>(inputs_n),
2362                            inputs_n_length,
2363                            inputs_imm2,
2364                            inputs_imm2_length,
2365                            reinterpret_cast<uintptr_t>(results),
2366                            vd_form, vn_form);
2367
2368  if (Test::sim_test_trace()) {
2369    // Print the results.
2370    printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
2371    for (unsigned iteration = 0; iteration < results_length; iteration++) {
2372      printf(" ");
2373      // Output a separate result for each element of the result vector.
2374      for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2375        unsigned index = lane + (iteration * vd_lane_count);
2376        printf(" 0x%0*" PRIx64 ",",
2377               lane_len_in_hex,
2378               static_cast<uint64_t>(results[index]));
2379      }
2380      printf("\n");
2381    }
2382
2383    printf("};\n");
2384    printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
2385           name,
2386           results_length);
2387  } else {
2388    // Check the results.
2389    VIXL_CHECK(expected_length == results_length);
2390    unsigned error_count = 0;
2391    unsigned counted_length = 0;
2392    const char* padding = "                    ";
2393    VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
2394    for (unsigned n = 0; n < inputs_n_length; n++) {
2395      for (unsigned imm1 = 0; imm1 < inputs_imm1_length; imm1++) {
2396        for (unsigned imm2 = 0; imm2 < inputs_imm2_length; imm2++) {
2397          bool error_in_vector = false;
2398
2399          counted_length++;
2400
2401          for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2402            unsigned output_index =
2403                (n * inputs_imm1_length *
2404                 inputs_imm2_length * vd_lane_count) +
2405                (imm1 * inputs_imm2_length * vd_lane_count) +
2406                (imm2 * vd_lane_count) + lane;
2407
2408            if (results[output_index] != expected[output_index]) {
2409              error_in_vector = true;
2410              break;
2411            }
2412          }
2413
2414          if (error_in_vector && (++error_count <= kErrorReportLimit)) {
2415            printf("%s\n", name);
2416            printf(" Vd%.*s| Imm%.*s| Vn%.*s| Imm%.*s| Vd%.*s| Expected\n",
2417                   lane_len_in_hex+1, padding,
2418                   lane_len_in_hex, padding,
2419                   lane_len_in_hex+1, padding,
2420                   lane_len_in_hex, padding,
2421                   lane_len_in_hex+1, padding);
2422
2423            for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2424              unsigned output_index =
2425                (n * inputs_imm1_length *
2426                 inputs_imm2_length * vd_lane_count) +
2427                (imm1 * inputs_imm2_length * vd_lane_count) +
2428                (imm2 * vd_lane_count) + lane;
2429              unsigned input_index_n = (inputs_n_length - vd_lane_count +
2430                  n + 1 + lane) % inputs_n_length;
2431              unsigned input_index_imm1 = imm1;
2432              unsigned input_index_imm2 = imm2;
2433
2434              printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 " "
2435                "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
2436                results[output_index] !=
2437                  expected[output_index] ? '*' : ' ',
2438                lane_len_in_hex,
2439                static_cast<uint64_t>(inputs_d[lane]),
2440                lane_len_in_hex,
2441                static_cast<uint64_t>(inputs_imm1[input_index_imm1]),
2442                lane_len_in_hex,
2443                static_cast<uint64_t>(inputs_n[input_index_n]),
2444                lane_len_in_hex,
2445                static_cast<uint64_t>(inputs_imm2[input_index_imm2]),
2446                lane_len_in_hex,
2447                static_cast<uint64_t>(results[output_index]),
2448                lane_len_in_hex,
2449                static_cast<uint64_t>(expected[output_index]));
2450            }
2451          }
2452        }
2453      }
2454    }
2455    VIXL_ASSERT(counted_length == expected_length);
2456    if (error_count > kErrorReportLimit) {
2457      printf("%u other errors follow.\n", error_count - kErrorReportLimit);
2458    }
2459    VIXL_CHECK(error_count == 0);
2460  }
2461  delete[] results;
2462}
2463
2464
2465// ==== Floating-point tests. ====
2466
2467
2468// Standard floating-point test expansion for both double- and single-precision
2469// operations.
2470#define STRINGIFY(s) #s
2471
2472#define CALL_TEST_FP_HELPER(mnemonic, variant, type, input)         \
2473    Test##type(STRINGIFY(mnemonic) "_" STRINGIFY(variant),          \
2474               &MacroAssembler::mnemonic,                           \
2475               input, sizeof(input) / sizeof(input[0]),             \
2476               kExpected_##mnemonic##_##variant,                    \
2477               kExpectedCount_##mnemonic##_##variant)
2478
2479#define DEFINE_TEST_FP(mnemonic, type, input)                       \
2480    TEST(mnemonic##_d) {                                            \
2481      CALL_TEST_FP_HELPER(mnemonic, d, type, kInputDouble##input);  \
2482    }                                                               \
2483    TEST(mnemonic##_s) {                                            \
2484      CALL_TEST_FP_HELPER(mnemonic, s, type, kInputFloat##input);   \
2485    }
2486
2487// TODO: Test with a newer version of valgrind.
2488//
2489// Note: valgrind-3.10.0 does not properly interpret libm's fma() on x86_64.
2490// Therefore this test will be exiting though an ASSERT and thus leaking
2491// memory.
2492DEFINE_TEST_FP(fmadd, 3Op, Basic)
2493DEFINE_TEST_FP(fmsub, 3Op, Basic)
2494DEFINE_TEST_FP(fnmadd, 3Op, Basic)
2495DEFINE_TEST_FP(fnmsub, 3Op, Basic)
2496
2497DEFINE_TEST_FP(fadd, 2Op, Basic)
2498DEFINE_TEST_FP(fdiv, 2Op, Basic)
2499DEFINE_TEST_FP(fmax, 2Op, Basic)
2500DEFINE_TEST_FP(fmaxnm, 2Op, Basic)
2501DEFINE_TEST_FP(fmin, 2Op, Basic)
2502DEFINE_TEST_FP(fminnm, 2Op, Basic)
2503DEFINE_TEST_FP(fmul, 2Op, Basic)
2504DEFINE_TEST_FP(fsub, 2Op, Basic)
2505DEFINE_TEST_FP(fnmul, 2Op, Basic)
2506
2507DEFINE_TEST_FP(fabs, 1Op, Basic)
2508DEFINE_TEST_FP(fmov, 1Op, Basic)
2509DEFINE_TEST_FP(fneg, 1Op, Basic)
2510DEFINE_TEST_FP(fsqrt, 1Op, Basic)
2511DEFINE_TEST_FP(frinta, 1Op, Conversions)
2512DEFINE_TEST_FP(frinti, 1Op, Conversions)
2513DEFINE_TEST_FP(frintm, 1Op, Conversions)
2514DEFINE_TEST_FP(frintn, 1Op, Conversions)
2515DEFINE_TEST_FP(frintp, 1Op, Conversions)
2516DEFINE_TEST_FP(frintx, 1Op, Conversions)
2517DEFINE_TEST_FP(frintz, 1Op, Conversions)
2518
2519TEST(fcmp_d) { CALL_TEST_FP_HELPER(fcmp, d, Cmp, kInputDoubleBasic); }
2520TEST(fcmp_s) { CALL_TEST_FP_HELPER(fcmp, s, Cmp, kInputFloatBasic); }
2521TEST(fcmp_dz) { CALL_TEST_FP_HELPER(fcmp, dz, CmpZero, kInputDoubleBasic); }
2522TEST(fcmp_sz) { CALL_TEST_FP_HELPER(fcmp, sz, CmpZero, kInputFloatBasic); }
2523
2524TEST(fcvt_sd) { CALL_TEST_FP_HELPER(fcvt, sd, 1Op, kInputDoubleConversions); }
2525TEST(fcvt_ds) { CALL_TEST_FP_HELPER(fcvt, ds, 1Op, kInputFloatConversions); }
2526
2527#define DEFINE_TEST_FP_TO_INT(mnemonic, type, input)                \
2528    TEST(mnemonic##_xd) {                                           \
2529      CALL_TEST_FP_HELPER(mnemonic, xd, type, kInputDouble##input); \
2530    }                                                               \
2531    TEST(mnemonic##_xs) {                                           \
2532      CALL_TEST_FP_HELPER(mnemonic, xs, type, kInputFloat##input);  \
2533    }                                                               \
2534    TEST(mnemonic##_wd) {                                           \
2535      CALL_TEST_FP_HELPER(mnemonic, wd, type, kInputDouble##input); \
2536    }                                                               \
2537    TEST(mnemonic##_ws) {                                           \
2538      CALL_TEST_FP_HELPER(mnemonic, ws, type, kInputFloat##input);  \
2539    }
2540
2541DEFINE_TEST_FP_TO_INT(fcvtas, FPToS, Conversions)
2542DEFINE_TEST_FP_TO_INT(fcvtau, FPToU, Conversions)
2543DEFINE_TEST_FP_TO_INT(fcvtms, FPToS, Conversions)
2544DEFINE_TEST_FP_TO_INT(fcvtmu, FPToU, Conversions)
2545DEFINE_TEST_FP_TO_INT(fcvtns, FPToS, Conversions)
2546DEFINE_TEST_FP_TO_INT(fcvtnu, FPToU, Conversions)
2547DEFINE_TEST_FP_TO_INT(fcvtzs, FPToFixedS, Conversions)
2548DEFINE_TEST_FP_TO_INT(fcvtzu, FPToFixedU, Conversions)
2549
2550// TODO: Scvtf-fixed-point
2551// TODO: Scvtf-integer
2552// TODO: Ucvtf-fixed-point
2553// TODO: Ucvtf-integer
2554
2555// TODO: Fccmp
2556// TODO: Fcsel
2557
2558
2559// ==== NEON Tests. ====
2560
2561#define CALL_TEST_NEON_HELPER_1Op(mnemonic,                                  \
2562                                  vdform, vnform,                            \
2563                                  input_n)                                   \
2564    Test1OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform),                   \
2565                &MacroAssembler::mnemonic,                                   \
2566                input_n,                                                     \
2567                (sizeof(input_n) / sizeof(input_n[0])),                      \
2568                kExpected_NEON_##mnemonic##_##vdform,                        \
2569                kExpectedCount_NEON_##mnemonic##_##vdform,                   \
2570                kFormat##vdform,                                             \
2571                kFormat##vnform)
2572
2573#define CALL_TEST_NEON_HELPER_1OpAcross(mnemonic,                            \
2574                                        vdform, vnform,                      \
2575                                        input_n)                             \
2576    Test1OpAcrossNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform)              \
2577                                          "_" STRINGIFY(vnform),             \
2578                      &MacroAssembler::mnemonic,                             \
2579                      input_n,                                               \
2580                      (sizeof(input_n) / sizeof(input_n[0])),                \
2581                      kExpected_NEON_##mnemonic##_##vdform##_##vnform,       \
2582                      kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform,  \
2583                      kFormat##vdform,                                       \
2584                      kFormat##vnform)
2585
2586#define CALL_TEST_NEON_HELPER_2Op(mnemonic,                                  \
2587                                  vdform, vnform, vmform,                    \
2588                                  input_d, input_n, input_m)                 \
2589    Test2OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform),                   \
2590                &MacroAssembler::mnemonic,                                   \
2591                input_d,                                                     \
2592                input_n,                                                     \
2593                (sizeof(input_n) / sizeof(input_n[0])),                      \
2594                input_m,                                                     \
2595                (sizeof(input_m) / sizeof(input_m[0])),                      \
2596                kExpected_NEON_##mnemonic##_##vdform,                        \
2597                kExpectedCount_NEON_##mnemonic##_##vdform,                   \
2598                kFormat##vdform,                                             \
2599                kFormat##vnform,                                             \
2600                kFormat##vmform)
2601
2602#define CALL_TEST_NEON_HELPER_2OpImm(mnemonic,                               \
2603                                     vdform, vnform,                         \
2604                                     input_n, input_m)                       \
2605    Test2OpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_2OPIMM",      \
2606                   &MacroAssembler::mnemonic,                                \
2607                   input_n,                                                  \
2608                   (sizeof(input_n) / sizeof(input_n[0])),                   \
2609                   input_m,                                                  \
2610                   (sizeof(input_m) / sizeof(input_m[0])),                   \
2611                   kExpected_NEON_##mnemonic##_##vdform##_2OPIMM,            \
2612                   kExpectedCount_NEON_##mnemonic##_##vdform##_2OPIMM,       \
2613                   kFormat##vdform,                                          \
2614                   kFormat##vnform)
2615
2616#define CALL_TEST_NEON_HELPER_ByElement(mnemonic,                            \
2617                                        vdform, vnform, vmform,              \
2618                                        input_d, input_n, input_m, indices)  \
2619    TestByElementNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform)              \
2620        "_" STRINGIFY(vnform) "_" STRINGIFY(vmform),                         \
2621        &MacroAssembler::mnemonic,                                           \
2622        input_d,                                                             \
2623        input_n,                                                             \
2624        (sizeof(input_n) / sizeof(input_n[0])),                              \
2625        input_m,                                                             \
2626        (sizeof(input_m) / sizeof(input_m[0])),                              \
2627        indices,                                                             \
2628        (sizeof(indices) / sizeof(indices[0])),                              \
2629        kExpected_NEON_##mnemonic##_##vdform##_##vnform##_##vmform,          \
2630        kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform##_##vmform,     \
2631        kFormat##vdform,                                                     \
2632        kFormat##vnform,                                                     \
2633        kFormat##vmform)
2634
2635#define CALL_TEST_NEON_HELPER_OpImmOpImm(helper,                             \
2636                                         mnemonic,                           \
2637                                         vdform, vnform,                     \
2638                                         input_d, input_imm1,                \
2639                                         input_n, input_imm2)                \
2640    TestOpImmOpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform),            \
2641                       helper,                                               \
2642                       input_d,                                              \
2643                       input_imm1,                                           \
2644                       (sizeof(input_imm1) / sizeof(input_imm1[0])),         \
2645                       input_n,                                              \
2646                       (sizeof(input_n) / sizeof(input_n[0])),               \
2647                       input_imm2,                                           \
2648                       (sizeof(input_imm2) / sizeof(input_imm2[0])),         \
2649                       kExpected_NEON_##mnemonic##_##vdform,                 \
2650                       kExpectedCount_NEON_##mnemonic##_##vdform,            \
2651                       kFormat##vdform,                                      \
2652                       kFormat##vnform)
2653
2654#define CALL_TEST_NEON_HELPER_2SAME(mnemonic, variant, input)                \
2655    CALL_TEST_NEON_HELPER_1Op(mnemonic,                                      \
2656                              variant, variant,                              \
2657                              input)
2658
2659#define DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input)                       \
2660    TEST(mnemonic##_8B) {                                                    \
2661      CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8B, kInput8bits##input);         \
2662    }                                                                        \
2663    TEST(mnemonic##_16B) {                                                   \
2664      CALL_TEST_NEON_HELPER_2SAME(mnemonic, 16B, kInput8bits##input);        \
2665    }
2666
2667#define DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input)                        \
2668    TEST(mnemonic##_4H) {                                                    \
2669      CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4H, kInput16bits##input);        \
2670    }                                                                        \
2671    TEST(mnemonic##_8H) {                                                    \
2672      CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8H, kInput16bits##input);        \
2673    }
2674
2675#define DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input)                        \
2676    TEST(mnemonic##_2S) {                                                    \
2677      CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInput32bits##input);        \
2678    }                                                                        \
2679    TEST(mnemonic##_4S) {                                                    \
2680      CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInput32bits##input);        \
2681    }
2682
2683#define DEFINE_TEST_NEON_2SAME_BH(mnemonic, input)                           \
2684    DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input)                           \
2685    DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input)
2686
2687#define DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input)                         \
2688    DEFINE_TEST_NEON_2SAME_BH(mnemonic, input)                               \
2689    DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input)
2690
2691#define DEFINE_TEST_NEON_2SAME(mnemonic, input)                              \
2692    DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input)                             \
2693    TEST(mnemonic##_2D) {                                                    \
2694      CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input);        \
2695    }
2696#define DEFINE_TEST_NEON_2SAME_SD(mnemonic, input)                           \
2697    DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input)                            \
2698    TEST(mnemonic##_2D) {                                                    \
2699      CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input);        \
2700    }
2701
2702#define DEFINE_TEST_NEON_2SAME_FP(mnemonic, input)                           \
2703    TEST(mnemonic##_2S) {                                                    \
2704      CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInputFloat##input);         \
2705    }                                                                        \
2706    TEST(mnemonic##_4S) {                                                    \
2707      CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInputFloat##input);         \
2708    }                                                                        \
2709    TEST(mnemonic##_2D) {                                                    \
2710      CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInputDouble##input);        \
2711    }
2712
2713#define DEFINE_TEST_NEON_2SAME_FP_SCALAR(mnemonic, input)                    \
2714    TEST(mnemonic##_S) {                                                     \
2715      CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInputFloat##input);          \
2716    }                                                                        \
2717    TEST(mnemonic##_D) {                                                     \
2718      CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInputDouble##input);         \
2719    }
2720
2721#define DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input)                     \
2722    TEST(mnemonic##_B) {                                                     \
2723      CALL_TEST_NEON_HELPER_2SAME(mnemonic, B, kInput8bits##input);          \
2724    }
2725#define DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input)                     \
2726    TEST(mnemonic##_H) {                                                     \
2727      CALL_TEST_NEON_HELPER_2SAME(mnemonic, H, kInput16bits##input);         \
2728    }
2729#define DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input)                     \
2730    TEST(mnemonic##_S) {                                                     \
2731      CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInput32bits##input);         \
2732    }
2733#define DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)                     \
2734    TEST(mnemonic##_D) {                                                     \
2735      CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInput64bits##input);         \
2736    }
2737
2738#define DEFINE_TEST_NEON_2SAME_SCALAR(mnemonic, input)                       \
2739    DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input)                         \
2740    DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input)                         \
2741    DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input)                         \
2742    DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)
2743
2744#define DEFINE_TEST_NEON_2SAME_SCALAR_SD(mnemonic, input)                    \
2745    DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input)                         \
2746    DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)
2747
2748
2749#define CALL_TEST_NEON_HELPER_ACROSS(mnemonic, vd_form, vn_form, input_n)    \
2750    CALL_TEST_NEON_HELPER_1OpAcross(mnemonic,                                \
2751                                    vd_form, vn_form,                        \
2752                                    input_n)
2753
2754#define DEFINE_TEST_NEON_ACROSS(mnemonic, input)                             \
2755    TEST(mnemonic##_B_8B) {                                                  \
2756      CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 8B, kInput8bits##input);     \
2757    }                                                                        \
2758    TEST(mnemonic##_B_16B) {                                                 \
2759      CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 16B, kInput8bits##input);    \
2760    }                                                                        \
2761    TEST(mnemonic##_H_4H) {                                                  \
2762      CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 4H, kInput16bits##input);    \
2763    }                                                                        \
2764    TEST(mnemonic##_H_8H) {                                                  \
2765      CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8H, kInput16bits##input);    \
2766    }                                                                        \
2767    TEST(mnemonic##_S_4S) {                                                  \
2768      CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInput32bits##input);    \
2769    }
2770
2771#define DEFINE_TEST_NEON_ACROSS_LONG(mnemonic, input)                        \
2772    TEST(mnemonic##_H_8B) {                                                  \
2773      CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8B, kInput8bits##input);     \
2774    }                                                                        \
2775    TEST(mnemonic##_H_16B) {                                                 \
2776      CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 16B, kInput8bits##input);    \
2777    }                                                                        \
2778    TEST(mnemonic##_S_4H) {                                                  \
2779      CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4H, kInput16bits##input);    \
2780    }                                                                        \
2781    TEST(mnemonic##_S_8H) {                                                  \
2782      CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 8H, kInput16bits##input);    \
2783    }                                                                        \
2784    TEST(mnemonic##_D_4S) {                                                  \
2785      CALL_TEST_NEON_HELPER_ACROSS(mnemonic, D, 4S, kInput32bits##input);    \
2786    }
2787
2788#define DEFINE_TEST_NEON_ACROSS_FP(mnemonic, input)                          \
2789    TEST(mnemonic##_S_4S) {                                                  \
2790      CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInputFloat##input);     \
2791    }
2792
2793#define CALL_TEST_NEON_HELPER_2DIFF(mnemonic,                                \
2794                                    vdform, vnform,                          \
2795                                    input_n)                                 \
2796    CALL_TEST_NEON_HELPER_1Op(mnemonic,                                      \
2797                              vdform, vnform,                                \
2798                              input_n)
2799
2800#define DEFINE_TEST_NEON_2DIFF_LONG(mnemonic, input)                         \
2801    TEST(mnemonic##_4H) {                                                    \
2802      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 8B, kInput8bits##input);     \
2803    }                                                                        \
2804    TEST(mnemonic##_8H) {                                                    \
2805      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8H, 16B, kInput8bits##input);    \
2806    }                                                                        \
2807    TEST(mnemonic##_2S) {                                                    \
2808      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 4H, kInput16bits##input);    \
2809    }                                                                        \
2810    TEST(mnemonic##_4S) {                                                    \
2811      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 8H, kInput16bits##input);    \
2812    }                                                                        \
2813    TEST(mnemonic##_1D) {                                                    \
2814      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 1D, 2S, kInput32bits##input);    \
2815    }                                                                        \
2816    TEST(mnemonic##_2D) {                                                    \
2817      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 4S, kInput32bits##input);    \
2818    }
2819
2820#define DEFINE_TEST_NEON_2DIFF_NARROW(mnemonic, input)                       \
2821    TEST(mnemonic##_8B) {                                                    \
2822      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8B, 8H, kInput16bits##input);    \
2823    }                                                                        \
2824    TEST(mnemonic##_4H) {                                                    \
2825      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInput32bits##input);    \
2826    }                                                                        \
2827    TEST(mnemonic##_2S) {                                                    \
2828      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInput64bits##input);    \
2829    }                                                                        \
2830    TEST(mnemonic##2_16B) {                                                  \
2831      CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 16B, 8H, kInput16bits##input);\
2832    }                                                                        \
2833    TEST(mnemonic##2_8H) {                                                   \
2834      CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInput32bits##input); \
2835    }                                                                        \
2836    TEST(mnemonic##2_4S) {                                                   \
2837      CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInput64bits##input); \
2838    }
2839
2840#define DEFINE_TEST_NEON_2DIFF_FP_LONG(mnemonic, input)                      \
2841    TEST(mnemonic##_4S) {                                                    \
2842      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 4H, kInputFloat16##input);   \
2843    }                                                                        \
2844    TEST(mnemonic##_2D) {                                                    \
2845      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 2S, kInputFloat##input);     \
2846    }                                                                        \
2847    TEST(mnemonic##2_4S) {                                                   \
2848      CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 8H, kInputFloat16##input);\
2849    }                                                                        \
2850    TEST(mnemonic##2_2D) {                                                   \
2851      CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 2D, 4S, kInputFloat##input);  \
2852    }
2853
2854#define DEFINE_TEST_NEON_2DIFF_FP_NARROW(mnemonic, input)                    \
2855    TEST(mnemonic##_4H) {                                                    \
2856      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInputFloat##input);     \
2857    }                                                                        \
2858    TEST(mnemonic##_2S) {                                                    \
2859      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input);    \
2860    }                                                                        \
2861    TEST(mnemonic##2_8H) {                                                   \
2862      CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInputFloat##input);  \
2863    }                                                                        \
2864    TEST(mnemonic##2_4S) {                                                   \
2865      CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \
2866    }
2867
2868#define DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(mnemonic, input)                 \
2869    TEST(mnemonic##_2S) {                                                    \
2870      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input);    \
2871    }                                                                        \
2872    TEST(mnemonic##2_4S) {                                                   \
2873      CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \
2874    }
2875
2876#define DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(mnemonic, input)                \
2877    TEST(mnemonic##_B) {                                                     \
2878      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, B, H, kInput16bits##input);      \
2879    }                                                                        \
2880    TEST(mnemonic##_H) {                                                     \
2881      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, H, S, kInput32bits##input);      \
2882    }                                                                        \
2883    TEST(mnemonic##_S) {                                                     \
2884      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, D, kInput64bits##input);      \
2885    }
2886
2887#define DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(mnemonic, input)                 \
2888    TEST(mnemonic##_S) {                                                     \
2889      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, 2S, kInputFloat##input);      \
2890    }                                                                        \
2891    TEST(mnemonic##_D) {                                                     \
2892      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, D, 2D, kInputDouble##input);     \
2893    }
2894
2895#define CALL_TEST_NEON_HELPER_3SAME(mnemonic, variant, input_d, input_nm) {  \
2896    CALL_TEST_NEON_HELPER_2Op(mnemonic,                                      \
2897                              variant, variant, variant,                     \
2898                              input_d, input_nm, input_nm);                  \
2899    }
2900
2901#define DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input)                       \
2902    TEST(mnemonic##_8B) {                                                    \
2903      CALL_TEST_NEON_HELPER_3SAME(mnemonic, 8B,                              \
2904                                  kInput8bitsAccDestination,                 \
2905                                  kInput8bits##input);                       \
2906    }                                                                        \
2907    TEST(mnemonic##_16B) {                                                   \
2908      CALL_TEST_NEON_HELPER_3SAME(mnemonic, 16B,                             \
2909                                  kInput8bitsAccDestination,                 \
2910                                  kInput8bits##input);                       \
2911    }                                                                        \
2912
2913#define DEFINE_TEST_NEON_3SAME_HS(mnemonic, input)                           \
2914    TEST(mnemonic##_4H) {                                                    \
2915      CALL_TEST_NEON_HELPER_3SAME(mnemonic, 4H,                              \
2916                                  kInput16bitsAccDestination,                \
2917                                  kInput16bits##input);                      \
2918    }                                                                        \
2919    TEST(mnemonic##_8H) {                                                    \
2920      CALL_TEST_NEON_HELPER_3SAME(mnemonic, 8H,                              \
2921                                  kInput16bitsAccDestination,                \
2922                                  kInput16bits##input);                      \
2923    }                                                                        \
2924    TEST(mnemonic##_2S) {                                                    \
2925      CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2S,                              \
2926                                  kInput32bitsAccDestination,                \
2927                                  kInput32bits##input);                      \
2928    }                                                                        \
2929    TEST(mnemonic##_4S) {                                                    \
2930      CALL_TEST_NEON_HELPER_3SAME(mnemonic, 4S,                              \
2931                                  kInput32bitsAccDestination,                \
2932                                  kInput32bits##input);                      \
2933    }
2934
2935#define DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input)                         \
2936    DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input)                           \
2937    DEFINE_TEST_NEON_3SAME_HS(mnemonic, input)
2938
2939#define DEFINE_TEST_NEON_3SAME(mnemonic, input)                              \
2940    DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input)                             \
2941    TEST(mnemonic##_2D) {                                                    \
2942      CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2D,                              \
2943                                  kInput64bitsAccDestination,                \
2944                                  kInput64bits##input);                      \
2945    }
2946
2947#define DEFINE_TEST_NEON_3SAME_FP(mnemonic, input)                           \
2948    TEST(mnemonic##_2S) {                                                    \
2949      CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2S,                              \
2950                                  kInputFloatAccDestination,                 \
2951                                  kInputFloat##input);                       \
2952    }                                                                        \
2953    TEST(mnemonic##_4S) {                                                    \
2954      CALL_TEST_NEON_HELPER_3SAME(mnemonic, 4S,                              \
2955                                  kInputFloatAccDestination,                 \
2956                                  kInputFloat##input);                       \
2957    }                                                                        \
2958    TEST(mnemonic##_2D) {                                                    \
2959      CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2D,                              \
2960                                  kInputDoubleAccDestination,                \
2961                                  kInputDouble##input);                      \
2962    }
2963
2964#define DEFINE_TEST_NEON_3SAME_SCALAR_D(mnemonic, input)                     \
2965    TEST(mnemonic##_D) {                                                     \
2966      CALL_TEST_NEON_HELPER_3SAME(mnemonic, D,                               \
2967                                  kInput64bitsAccDestination,                \
2968                                  kInput64bits##input);                      \
2969    }
2970
2971#define DEFINE_TEST_NEON_3SAME_SCALAR_HS(mnemonic, input)                    \
2972    TEST(mnemonic##_H) {                                                     \
2973      CALL_TEST_NEON_HELPER_3SAME(mnemonic, H,                               \
2974                                  kInput16bitsAccDestination,                \
2975                                  kInput16bits##input);                      \
2976    }                                                                        \
2977    TEST(mnemonic##_S) {                                                     \
2978      CALL_TEST_NEON_HELPER_3SAME(mnemonic, S,                               \
2979                                  kInput32bitsAccDestination,                \
2980                                  kInput32bits##input);                      \
2981    }                                                                        \
2982
2983#define DEFINE_TEST_NEON_3SAME_SCALAR(mnemonic, input)                       \
2984    TEST(mnemonic##_B) {                                                     \
2985      CALL_TEST_NEON_HELPER_3SAME(mnemonic, B,                               \
2986                                  kInput8bitsAccDestination,                 \
2987                                  kInput8bits##input);                       \
2988    }                                                                        \
2989    TEST(mnemonic##_H) {                                                     \
2990      CALL_TEST_NEON_HELPER_3SAME(mnemonic, H,                               \
2991                                  kInput16bitsAccDestination,                \
2992                                  kInput16bits##input);                      \
2993    }                                                                        \
2994    TEST(mnemonic##_S) {                                                     \
2995      CALL_TEST_NEON_HELPER_3SAME(mnemonic, S,                               \
2996                                  kInput32bitsAccDestination,                \
2997                                  kInput32bits##input);                      \
2998    }                                                                        \
2999    TEST(mnemonic##_D) {                                                     \
3000      CALL_TEST_NEON_HELPER_3SAME(mnemonic, D,                               \
3001                                  kInput64bitsAccDestination,                \
3002                                  kInput64bits##input);                      \
3003    }
3004
3005#define DEFINE_TEST_NEON_3SAME_FP_SCALAR(mnemonic, input)                    \
3006    TEST(mnemonic##_S) {                                                     \
3007      CALL_TEST_NEON_HELPER_3SAME(mnemonic, S,                               \
3008                                  kInputFloatAccDestination,                 \
3009                                  kInputFloat##input);                       \
3010    }                                                                        \
3011    TEST(mnemonic##_D) {                                                     \
3012      CALL_TEST_NEON_HELPER_3SAME(mnemonic, D,                               \
3013                                  kInputDoubleAccDestination,                \
3014                                  kInputDouble##input);                      \
3015    }
3016
3017#define CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                                \
3018                                    vdform, vnform, vmform,                  \
3019                                    input_d, input_n, input_m) {             \
3020    CALL_TEST_NEON_HELPER_2Op(mnemonic,                                      \
3021                              vdform, vnform, vmform,                        \
3022                              input_d, input_n, input_m);                    \
3023    }
3024
3025#define DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input)                      \
3026    TEST(mnemonic##_8H) {                                                    \
3027      CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 8H, 8B, 8B,                      \
3028                                  kInput16bitsAccDestination,                \
3029                                  kInput8bits##input, kInput8bits##input);   \
3030    }                                                                        \
3031    TEST(mnemonic##2_8H) {                                                   \
3032      CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 8H, 16B, 16B,                 \
3033                                  kInput16bitsAccDestination,                \
3034                                  kInput8bits##input, kInput8bits##input);   \
3035    }
3036
3037#define DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input)                      \
3038    TEST(mnemonic##_4S) {                                                    \
3039      CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 4S, 4H, 4H,                      \
3040                                  kInput32bitsAccDestination,                \
3041                                  kInput16bits##input, kInput16bits##input); \
3042    }                                                                        \
3043    TEST(mnemonic##2_4S) {                                                   \
3044      CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 4S, 8H, 8H,                   \
3045                                  kInput32bitsAccDestination,                \
3046                                  kInput16bits##input, kInput16bits##input); \
3047    }
3048
3049#define DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)                      \
3050    TEST(mnemonic##_2D) {                                                    \
3051      CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 2D, 2S, 2S,                      \
3052                                  kInput64bitsAccDestination,                \
3053                                  kInput32bits##input, kInput32bits##input); \
3054    }                                                                        \
3055    TEST(mnemonic##2_2D) {                                                   \
3056      CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 2D, 4S, 4S,                   \
3057                                  kInput64bitsAccDestination,                \
3058                                  kInput32bits##input, kInput32bits##input); \
3059    }
3060
3061#define DEFINE_TEST_NEON_3DIFF_LONG_SD(mnemonic, input)                      \
3062    DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input)                          \
3063    DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)
3064
3065#define DEFINE_TEST_NEON_3DIFF_LONG(mnemonic, input)                         \
3066    DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input)                          \
3067    DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input)                          \
3068    DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)
3069
3070#define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input)                \
3071    TEST(mnemonic##_S) {                                                     \
3072      CALL_TEST_NEON_HELPER_3DIFF(mnemonic, S, H, H,                         \
3073                                  kInput32bitsAccDestination,                \
3074                                  kInput16bits##input,                       \
3075                                  kInput16bits##input);                      \
3076    }
3077
3078#define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input)                \
3079    TEST(mnemonic##_D) {                                                     \
3080      CALL_TEST_NEON_HELPER_3DIFF(mnemonic, D, S, S,                         \
3081                                  kInput64bitsAccDestination,                \
3082                                  kInput32bits##input,                       \
3083                                  kInput32bits##input);                      \
3084    }
3085
3086#define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(mnemonic, input)               \
3087    DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input)                    \
3088    DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input)
3089
3090#define DEFINE_TEST_NEON_3DIFF_WIDE(mnemonic, input)                         \
3091    TEST(mnemonic##_8H) {                                                    \
3092      CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 8H, 8H, 8B,                      \
3093                                  kInput16bitsAccDestination,                \
3094                                  kInput16bits##input, kInput8bits##input);  \
3095    }                                                                        \
3096    TEST(mnemonic##_4S) {                                                    \
3097      CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 4S, 4S, 4H,                      \
3098                                  kInput32bitsAccDestination,                \
3099                                  kInput32bits##input, kInput16bits##input); \
3100    }                                                                        \
3101    TEST(mnemonic##_2D) {                                                    \
3102      CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 2D, 2D, 2S,                      \
3103                                  kInput64bitsAccDestination,                \
3104                                  kInput64bits##input, kInput32bits##input); \
3105    }                                                                        \
3106    TEST(mnemonic##2_8H) {                                                   \
3107      CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 8H, 8H, 16B,                  \
3108                                  kInput16bitsAccDestination,                \
3109                                  kInput16bits##input, kInput8bits##input);  \
3110    }                                                                        \
3111    TEST(mnemonic##2_4S) {                                                   \
3112      CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 4S, 4S, 8H,                   \
3113                                  kInput32bitsAccDestination,                \
3114                                  kInput32bits##input, kInput16bits##input); \
3115    }                                                                        \
3116    TEST(mnemonic##2_2D) {                                                   \
3117      CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 2D, 2D, 4S,                   \
3118                                  kInput64bitsAccDestination,                \
3119                                  kInput64bits##input, kInput32bits##input); \
3120    }
3121
3122#define DEFINE_TEST_NEON_3DIFF_NARROW(mnemonic, input)                       \
3123    TEST(mnemonic##_8B) {                                                    \
3124      CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 8B, 8H, 8H,                      \
3125                                  kInput8bitsAccDestination,                 \
3126                                  kInput16bits##input, kInput16bits##input); \
3127    }                                                                        \
3128    TEST(mnemonic##_4H) {                                                    \
3129      CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 4H, 4S, 4S,                      \
3130                                  kInput16bitsAccDestination,                \
3131                                  kInput32bits##input, kInput32bits##input); \
3132    }                                                                        \
3133    TEST(mnemonic##_2S) {                                                    \
3134      CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 2S, 2D, 2D,                      \
3135                                  kInput32bitsAccDestination,                \
3136                                  kInput64bits##input, kInput64bits##input); \
3137    }                                                                        \
3138    TEST(mnemonic##2_16B) {                                                  \
3139      CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 16B, 8H, 8H,                  \
3140                                  kInput8bitsAccDestination,                 \
3141                                  kInput16bits##input, kInput16bits##input); \
3142    }                                                                        \
3143    TEST(mnemonic##2_8H) {                                                   \
3144      CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 8H, 4S, 4S,                   \
3145                                  kInput16bitsAccDestination,                \
3146                                  kInput32bits##input, kInput32bits##input); \
3147    }                                                                        \
3148    TEST(mnemonic##2_4S) {                                                   \
3149      CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 4S, 2D, 2D,                   \
3150                                  kInput32bitsAccDestination,                \
3151                                  kInput64bits##input, kInput64bits##input); \
3152    }
3153
3154#define CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                               \
3155                                     vdform, vnform,                         \
3156                                     input_n,                                \
3157                                     input_imm) {                            \
3158    CALL_TEST_NEON_HELPER_2OpImm(mnemonic,                                   \
3159                                 vdform, vnform,                             \
3160                                 input_n, input_imm);                        \
3161    }
3162
3163#define DEFINE_TEST_NEON_2OPIMM(mnemonic, input, input_imm)                  \
3164    TEST(mnemonic##_8B_2OPIMM) {                                             \
3165      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3166                                   8B, 8B,                                   \
3167                                   kInput8bits##input,                       \
3168                                   kInput8bitsImm##input_imm);               \
3169    }                                                                        \
3170    TEST(mnemonic##_16B_2OPIMM) {                                            \
3171      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3172                                   16B, 16B,                                 \
3173                                   kInput8bits##input,                       \
3174                                   kInput8bitsImm##input_imm);               \
3175    }                                                                        \
3176    TEST(mnemonic##_4H_2OPIMM) {                                             \
3177      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3178                                   4H, 4H,                                   \
3179                                   kInput16bits##input,                      \
3180                                   kInput16bitsImm##input_imm);              \
3181    }                                                                        \
3182    TEST(mnemonic##_8H_2OPIMM) {                                             \
3183      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3184                                   8H, 8H,                                   \
3185                                   kInput16bits##input,                      \
3186                                   kInput16bitsImm##input_imm);              \
3187    }                                                                        \
3188    TEST(mnemonic##_2S_2OPIMM) {                                             \
3189      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3190                                   2S, 2S,                                   \
3191                                   kInput32bits##input,                      \
3192                                   kInput32bitsImm##input_imm);              \
3193    }                                                                        \
3194    TEST(mnemonic##_4S_2OPIMM) {                                             \
3195      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3196                                   4S, 4S,                                   \
3197                                   kInput32bits##input,                      \
3198                                   kInput32bitsImm##input_imm);              \
3199    }                                                                        \
3200    TEST(mnemonic##_2D_2OPIMM) {                                             \
3201      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3202                                   2D, 2D,                                   \
3203                                   kInput64bits##input,                      \
3204                                   kInput64bitsImm##input_imm);              \
3205    }
3206
3207#define DEFINE_TEST_NEON_2OPIMM_COPY(mnemonic, input, input_imm)             \
3208    TEST(mnemonic##_8B_2OPIMM) {                                             \
3209      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3210                                   8B, B,                                    \
3211                                   kInput8bits##input,                       \
3212                                   kInput8bitsImm##input_imm);               \
3213    }                                                                        \
3214    TEST(mnemonic##_16B_2OPIMM) {                                            \
3215      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3216                                   16B, B,                                   \
3217                                   kInput8bits##input,                       \
3218                                   kInput8bitsImm##input_imm);               \
3219    }                                                                        \
3220    TEST(mnemonic##_4H_2OPIMM) {                                             \
3221      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3222                                   4H, H,                                    \
3223                                   kInput16bits##input,                      \
3224                                   kInput16bitsImm##input_imm);              \
3225    }                                                                        \
3226    TEST(mnemonic##_8H_2OPIMM) {                                             \
3227      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3228                                   8H, H,                                    \
3229                                   kInput16bits##input,                      \
3230                                   kInput16bitsImm##input_imm);              \
3231    }                                                                        \
3232    TEST(mnemonic##_2S_2OPIMM) {                                             \
3233      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3234                                   2S, S,                                    \
3235                                   kInput32bits##input,                      \
3236                                   kInput32bitsImm##input_imm);              \
3237    }                                                                        \
3238    TEST(mnemonic##_4S_2OPIMM) {                                             \
3239      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3240                                   4S, S,                                    \
3241                                   kInput32bits##input,                      \
3242                                   kInput32bitsImm##input_imm);              \
3243    }                                                                        \
3244    TEST(mnemonic##_2D_2OPIMM) {                                             \
3245      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3246                                   2D, D,                                    \
3247                                   kInput64bits##input,                      \
3248                                   kInput64bitsImm##input_imm);              \
3249    }
3250
3251#define DEFINE_TEST_NEON_2OPIMM_NARROW(mnemonic, input, input_imm)           \
3252    TEST(mnemonic##_8B_2OPIMM) {                                             \
3253      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3254                                   8B, 8H,                                   \
3255                                   kInput16bits##input,                      \
3256                                   kInput8bitsImm##input_imm);               \
3257    }                                                                        \
3258    TEST(mnemonic##_4H_2OPIMM) {                                             \
3259      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3260                                   4H, 4S,                                   \
3261                                   kInput32bits##input,                      \
3262                                   kInput16bitsImm##input_imm);              \
3263    }                                                                        \
3264    TEST(mnemonic##_2S_2OPIMM) {                                             \
3265      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3266                                   2S, 2D,                                   \
3267                                   kInput64bits##input,                      \
3268                                   kInput32bitsImm##input_imm);              \
3269    }                                                                        \
3270    TEST(mnemonic##2_16B_2OPIMM) {                                           \
3271      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                              \
3272                                   16B, 8H,                                  \
3273                                   kInput16bits##input,                      \
3274                                   kInput8bitsImm##input_imm);               \
3275    }                                                                        \
3276    TEST(mnemonic##2_8H_2OPIMM) {                                            \
3277      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                              \
3278                                   8H, 4S,                                   \
3279                                   kInput32bits##input,                      \
3280                                   kInput16bitsImm##input_imm);              \
3281    }                                                                        \
3282    TEST(mnemonic##2_4S_2OPIMM) {                                            \
3283      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                              \
3284                                   4S, 2D,                                   \
3285                                   kInput64bits##input,                      \
3286                                   kInput32bitsImm##input_imm);              \
3287    }
3288
3289#define DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(mnemonic, input, input_imm)    \
3290    TEST(mnemonic##_B_2OPIMM) {                                              \
3291      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3292                                   B, H,                                     \
3293                                   kInput16bits##input,                      \
3294                                   kInput8bitsImm##input_imm);               \
3295    }                                                                        \
3296    TEST(mnemonic##_H_2OPIMM) {                                              \
3297      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3298                                   H, S,                                     \
3299                                   kInput32bits##input,                      \
3300                                   kInput16bitsImm##input_imm);              \
3301    }                                                                        \
3302    TEST(mnemonic##_S_2OPIMM) {                                              \
3303      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3304                                   S, D,                                     \
3305                                   kInput64bits##input,                      \
3306                                   kInput32bitsImm##input_imm);              \
3307    }
3308
3309#define DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(mnemonic, input, input_imm)        \
3310    TEST(mnemonic##_2S_2OPIMM) {                                             \
3311      CALL_TEST_NEON_HELPER_2OPIMM(                                          \
3312          mnemonic,                                                          \
3313          2S, 2S,                                                            \
3314          kInputFloat##Basic,                                                \
3315          kInputDoubleImm##input_imm)                                        \
3316    }                                                                        \
3317    TEST(mnemonic##_4S_2OPIMM) {                                             \
3318      CALL_TEST_NEON_HELPER_2OPIMM(                                          \
3319          mnemonic,                                                          \
3320          4S, 4S,                                                            \
3321          kInputFloat##input,                                                \
3322          kInputDoubleImm##input_imm);                                       \
3323    }                                                                        \
3324    TEST(mnemonic##_2D_2OPIMM) {                                             \
3325      CALL_TEST_NEON_HELPER_2OPIMM(                                          \
3326          mnemonic,                                                          \
3327          2D, 2D,                                                            \
3328          kInputDouble##input,                                               \
3329          kInputDoubleImm##input_imm);                                       \
3330    }
3331
3332#define DEFINE_TEST_NEON_2OPIMM_FP(mnemonic, input, input_imm)               \
3333    TEST(mnemonic##_2S_2OPIMM) {                                             \
3334      CALL_TEST_NEON_HELPER_2OPIMM(                                          \
3335          mnemonic,                                                          \
3336          2S, 2S,                                                            \
3337          kInputFloat##Basic,                                                \
3338          kInput32bitsImm##input_imm)                                        \
3339    }                                                                        \
3340    TEST(mnemonic##_4S_2OPIMM) {                                             \
3341      CALL_TEST_NEON_HELPER_2OPIMM(                                          \
3342          mnemonic,                                                          \
3343          4S, 4S,                                                            \
3344          kInputFloat##input,                                                \
3345          kInput32bitsImm##input_imm)                                        \
3346    }                                                                        \
3347    TEST(mnemonic##_2D_2OPIMM) {                                             \
3348      CALL_TEST_NEON_HELPER_2OPIMM(                                          \
3349          mnemonic,                                                          \
3350          2D, 2D,                                                            \
3351          kInputDouble##input,                                               \
3352          kInput64bitsImm##input_imm)                                        \
3353    }
3354
3355#define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(mnemonic, input, input_imm)        \
3356    TEST(mnemonic##_S_2OPIMM) {                                              \
3357      CALL_TEST_NEON_HELPER_2OPIMM(                                          \
3358          mnemonic,                                                          \
3359          S, S,                                                              \
3360          kInputFloat##Basic,                                                \
3361          kInput32bitsImm##input_imm)                                        \
3362    }                                                                        \
3363    TEST(mnemonic##_D_2OPIMM) {                                              \
3364      CALL_TEST_NEON_HELPER_2OPIMM(                                          \
3365          mnemonic,                                                          \
3366          D, D,                                                              \
3367          kInputDouble##input,                                               \
3368          kInput64bitsImm##input_imm)                                        \
3369    }
3370
3371#define DEFINE_TEST_NEON_2OPIMM_SD(mnemonic, input, input_imm)               \
3372    TEST(mnemonic##_2S_2OPIMM) {                                             \
3373      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3374                                   2S, 2S,                                   \
3375                                   kInput32bits##input,                      \
3376                                   kInput32bitsImm##input_imm);              \
3377    }                                                                        \
3378    TEST(mnemonic##_4S_2OPIMM) {                                             \
3379      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3380                                   4S, 4S,                                   \
3381                                   kInput32bits##input,                      \
3382                                   kInput32bitsImm##input_imm);              \
3383    }                                                                        \
3384    TEST(mnemonic##_2D_2OPIMM) {                                             \
3385      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3386                                   2D, 2D,                                   \
3387                                   kInput64bits##input,                      \
3388                                   kInput64bitsImm##input_imm);              \
3389    }
3390
3391#define DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm)         \
3392    TEST(mnemonic##_D_2OPIMM) {                                              \
3393      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3394                                   D, D,                                     \
3395                                   kInput64bits##input,                      \
3396                                   kInput64bitsImm##input_imm);              \
3397    }
3398
3399#define DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(mnemonic, input, input_imm)        \
3400    TEST(mnemonic##_S_2OPIMM) {                                              \
3401      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3402                                   S, S,                                     \
3403                                   kInput32bits##input,                      \
3404                                   kInput32bitsImm##input_imm);              \
3405    }                                                                        \
3406    DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm)
3407
3408#define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm)      \
3409    TEST(mnemonic##_D_2OPIMM) {                                              \
3410      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3411                                   D, D,                                     \
3412                                   kInputDouble##input,                      \
3413                                   kInputDoubleImm##input_imm);              \
3414    }
3415
3416#define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(mnemonic, input, input_imm)     \
3417    TEST(mnemonic##_S_2OPIMM) {                                              \
3418      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3419                                   S, S,                                     \
3420                                   kInputFloat##input,                       \
3421                                   kInputDoubleImm##input_imm);              \
3422    }                                                                        \
3423    DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm)
3424
3425#define DEFINE_TEST_NEON_2OPIMM_SCALAR(mnemonic, input, input_imm)           \
3426    TEST(mnemonic##_B_2OPIMM) {                                              \
3427      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3428                                   B, B,                                     \
3429                                   kInput8bits##input,                       \
3430                                   kInput8bitsImm##input_imm);               \
3431    }                                                                        \
3432    TEST(mnemonic##_H_2OPIMM) {                                              \
3433      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3434                                   H, H,                                     \
3435                                   kInput16bits##input,                      \
3436                                   kInput16bitsImm##input_imm);              \
3437    }                                                                        \
3438    DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(mnemonic, input, input_imm)
3439
3440#define DEFINE_TEST_NEON_2OPIMM_LONG(mnemonic, input, input_imm)             \
3441    TEST(mnemonic##_8H_2OPIMM) {                                             \
3442      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3443                                   8H, 8B,                                   \
3444                                   kInput8bits##input,                       \
3445                                   kInput8bitsImm##input_imm);               \
3446    }                                                                        \
3447    TEST(mnemonic##_4S_2OPIMM) {                                             \
3448      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3449                                   4S, 4H,                                   \
3450                                   kInput16bits##input,                      \
3451                                   kInput16bitsImm##input_imm);              \
3452    }                                                                        \
3453    TEST(mnemonic##_2D_2OPIMM) {                                             \
3454      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3455                                   2D, 2S,                                   \
3456                                   kInput32bits##input,                      \
3457                                   kInput32bitsImm##input_imm);              \
3458    }                                                                        \
3459    TEST(mnemonic##2_8H_2OPIMM) {                                            \
3460      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                              \
3461                                   8H, 16B,                                  \
3462                                   kInput8bits##input,                       \
3463                                   kInput8bitsImm##input_imm);               \
3464    }                                                                        \
3465    TEST(mnemonic##2_4S_2OPIMM) {                                            \
3466      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                              \
3467                                   4S, 8H,                                   \
3468                                   kInput16bits##input,                      \
3469                                   kInput16bitsImm##input_imm);              \
3470    }                                                                        \
3471    TEST(mnemonic##2_2D_2OPIMM) {                                            \
3472      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                              \
3473                                   2D, 4S,                                   \
3474                                   kInput32bits##input,                      \
3475                                   kInput32bitsImm##input_imm);              \
3476    }
3477
3478#define CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                            \
3479                                        vdform, vnform, vmform,              \
3480                                        input_d, input_n,                    \
3481                                        input_m, indices) {                  \
3482    CALL_TEST_NEON_HELPER_ByElement(mnemonic,                                \
3483                                    vdform, vnform, vmform,                  \
3484                                    input_d, input_n,                        \
3485                                    input_m, indices);                       \
3486    }
3487
3488#define DEFINE_TEST_NEON_BYELEMENT(mnemonic, input_d, input_n, input_m)      \
3489    TEST(mnemonic##_4H_4H_H) {                                               \
3490      CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
3491                                      4H, 4H, H,                             \
3492                                      kInput16bits##input_d,                 \
3493                                      kInput16bits##input_n,                 \
3494                                      kInput16bits##input_m,                 \
3495                                      kInputHIndices);                       \
3496    }                                                                        \
3497    TEST(mnemonic##_8H_8H_H) {                                               \
3498      CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
3499                                      8H, 8H, H,                             \
3500                                      kInput16bits##input_d,                 \
3501                                      kInput16bits##input_n,                 \
3502                                      kInput16bits##input_m,                 \
3503                                      kInputHIndices);                       \
3504    }                                                                        \
3505    TEST(mnemonic##_2S_2S_S) {                                               \
3506      CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
3507                                      2S, 2S, S,                             \
3508                                      kInput32bits##input_d,                 \
3509                                      kInput32bits##input_n,                 \
3510                                      kInput32bits##input_m,                 \
3511                                      kInputSIndices);                       \
3512    }                                                                        \
3513    TEST(mnemonic##_4S_4S_S) {                                               \
3514      CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
3515                                      4S, 4S, S,                             \
3516                                      kInput32bits##input_d,                 \
3517                                      kInput32bits##input_n,                 \
3518                                      kInput32bits##input_m,                 \
3519                                      kInputSIndices);                       \
3520    }
3521
3522#define DEFINE_TEST_NEON_BYELEMENT_SCALAR(mnemonic,                          \
3523                                          input_d, input_n, input_m)         \
3524    TEST(mnemonic##_H_H_H) {                                                 \
3525      CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
3526                                      H, H, H,                               \
3527                                      kInput16bits##input_d,                 \
3528                                      kInput16bits##input_n,                 \
3529                                      kInput16bits##input_m,                 \
3530                                      kInputHIndices);                       \
3531    }                                                                        \
3532    TEST(mnemonic##_S_S_S) {                                                 \
3533      CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
3534                                      S, S, S,                               \
3535                                      kInput32bits##input_d,                 \
3536                                      kInput32bits##input_n,                 \
3537                                      kInput32bits##input_m,                 \
3538                                      kInputSIndices);                       \
3539    }
3540
3541#define DEFINE_TEST_NEON_FP_BYELEMENT(mnemonic, input_d, input_n, input_m)   \
3542    TEST(mnemonic##_2S_2S_S) {                                               \
3543      CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
3544                                      2S, 2S, S,                             \
3545                                      kInputFloat##input_d,                  \
3546                                      kInputFloat##input_n,                  \
3547                                      kInputFloat##input_m,                  \
3548                                      kInputSIndices);                       \
3549    }                                                                        \
3550    TEST(mnemonic##_4S_4S_S) {                                               \
3551      CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
3552                                      4S, 4S, S,                             \
3553                                      kInputFloat##input_d,                  \
3554                                      kInputFloat##input_n,                  \
3555                                      kInputFloat##input_m,                  \
3556                                      kInputSIndices);                       \
3557    }                                                                        \
3558    TEST(mnemonic##_2D_2D_D) {                                               \
3559      CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
3560                                      2D, 2D, D,                             \
3561                                      kInputDouble##input_d,                 \
3562                                      kInputDouble##input_n,                 \
3563                                      kInputDouble##input_m,                 \
3564                                      kInputDIndices);                       \
3565    }                                                                        \
3566
3567#define DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(mnemonic, inp_d, inp_n, inp_m)  \
3568    TEST(mnemonic##_S_S_S) {                                                 \
3569      CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
3570                                      S, S, S,                               \
3571                                      kInputFloat##inp_d,                    \
3572                                      kInputFloat##inp_n,                    \
3573                                      kInputFloat##inp_m,                    \
3574                                      kInputSIndices);                       \
3575    }                                                                        \
3576    TEST(mnemonic##_D_D_D) {                                                 \
3577      CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
3578                                      D, D, D,                               \
3579                                      kInputDouble##inp_d,                   \
3580                                      kInputDouble##inp_n,                   \
3581                                      kInputDouble##inp_m,                   \
3582                                      kInputDIndices);                       \
3583    }                                                                        \
3584
3585
3586#define DEFINE_TEST_NEON_BYELEMENT_DIFF(mnemonic, input_d, input_n, input_m) \
3587    TEST(mnemonic##_4S_4H_H) {                                               \
3588      CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
3589                                      4S, 4H, H,                             \
3590                                      kInput32bits##input_d,                 \
3591                                      kInput16bits##input_n,                 \
3592                                      kInput16bits##input_m,                 \
3593                                      kInputHIndices);                       \
3594    }                                                                        \
3595    TEST(mnemonic##2_4S_8H_H) {                                              \
3596      CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic##2,                           \
3597                                      4S, 8H, H,                             \
3598                                      kInput32bits##input_d,                 \
3599                                      kInput16bits##input_n,                 \
3600                                      kInput16bits##input_m,                 \
3601                                      kInputHIndices);                       \
3602    }                                                                        \
3603    TEST(mnemonic##_2D_2S_S) {                                               \
3604      CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
3605                                      2D, 2S, S,                             \
3606                                      kInput64bits##input_d,                 \
3607                                      kInput32bits##input_n,                 \
3608                                      kInput32bits##input_m,                 \
3609                                      kInputSIndices);                       \
3610    }                                                                        \
3611    TEST(mnemonic##2_2D_4S_S) {                                              \
3612      CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic##2,                           \
3613                                      2D, 4S, S,                             \
3614                                      kInput64bits##input_d,                 \
3615                                      kInput32bits##input_n,                 \
3616                                      kInput32bits##input_m,                 \
3617                                      kInputSIndices);                       \
3618    }
3619
3620#define DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(mnemonic,                     \
3621                                               input_d, input_n, input_m)    \
3622    TEST(mnemonic##_S_H_H) {                                                 \
3623      CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
3624                                      S, H, H,                               \
3625                                      kInput32bits##input_d,                 \
3626                                      kInput16bits##input_n,                 \
3627                                      kInput16bits##input_m,                 \
3628                                      kInputHIndices);                       \
3629    }                                                                        \
3630    TEST(mnemonic##_D_S_S) {                                                 \
3631      CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
3632                                      D, S, S,                               \
3633                                      kInput64bits##input_d,                 \
3634                                      kInput32bits##input_n,                 \
3635                                      kInput32bits##input_m,                 \
3636                                      kInputSIndices);                       \
3637    }
3638
3639
3640#define CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                              \
3641                                      variant,                               \
3642                                      input_d,                               \
3643                                      input_imm1,                            \
3644                                      input_n,                               \
3645                                      input_imm2) {                          \
3646    CALL_TEST_NEON_HELPER_OpImmOpImm(&MacroAssembler::mnemonic,              \
3647                                     mnemonic,                               \
3648                                     variant, variant,                       \
3649                                     input_d, input_imm1,                    \
3650                                     input_n, input_imm2);                   \
3651    }
3652
3653#define DEFINE_TEST_NEON_2OP2IMM(mnemonic,                                   \
3654                                 input_d, input_imm1,                        \
3655                                 input_n, input_imm2)                        \
3656    TEST(mnemonic##_B) {                                                     \
3657      CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                                \
3658                                    16B,                                     \
3659                                    kInput8bits##input_d,                    \
3660                                    kInput8bitsImm##input_imm1,              \
3661                                    kInput8bits##input_n,                    \
3662                                    kInput8bitsImm##input_imm2);             \
3663    }                                                                        \
3664    TEST(mnemonic##_H) {                                                     \
3665      CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                                \
3666                                    8H,                                      \
3667                                    kInput16bits##input_d,                   \
3668                                    kInput16bitsImm##input_imm1,             \
3669                                    kInput16bits##input_n,                   \
3670                                    kInput16bitsImm##input_imm2);            \
3671    }                                                                        \
3672    TEST(mnemonic##_S) {                                                     \
3673      CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                                \
3674                                    4S,                                      \
3675                                    kInput32bits##input_d,                   \
3676                                    kInput32bitsImm##input_imm1,             \
3677                                    kInput32bits##input_n,                   \
3678                                    kInput32bitsImm##input_imm2);            \
3679    }                                                                        \
3680    TEST(mnemonic##_D) {                                                     \
3681      CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                                \
3682                                    2D,                                      \
3683                                    kInput64bits##input_d,                   \
3684                                    kInput64bitsImm##input_imm1,             \
3685                                    kInput64bits##input_n,                   \
3686                                    kInput64bitsImm##input_imm2);            \
3687    }
3688
3689
3690// Advanced SIMD copy.
3691DEFINE_TEST_NEON_2OP2IMM(ins,
3692                         Basic, LaneCountFromZero,
3693                         Basic, LaneCountFromZero)
3694DEFINE_TEST_NEON_2OPIMM_COPY(dup, Basic, LaneCountFromZero)
3695
3696
3697// Advanced SIMD scalar copy.
3698DEFINE_TEST_NEON_2OPIMM_SCALAR(dup, Basic, LaneCountFromZero)
3699
3700
3701// Advanced SIMD three same.
3702DEFINE_TEST_NEON_3SAME_NO2D(shadd, Basic)
3703DEFINE_TEST_NEON_3SAME(sqadd, Basic)
3704DEFINE_TEST_NEON_3SAME_NO2D(srhadd, Basic)
3705DEFINE_TEST_NEON_3SAME_NO2D(shsub, Basic)
3706DEFINE_TEST_NEON_3SAME(sqsub, Basic)
3707DEFINE_TEST_NEON_3SAME(cmgt, Basic)
3708DEFINE_TEST_NEON_3SAME(cmge, Basic)
3709DEFINE_TEST_NEON_3SAME(sshl, Basic)
3710DEFINE_TEST_NEON_3SAME(sqshl, Basic)
3711DEFINE_TEST_NEON_3SAME(srshl, Basic)
3712DEFINE_TEST_NEON_3SAME(sqrshl, Basic)
3713DEFINE_TEST_NEON_3SAME_NO2D(smax, Basic)
3714DEFINE_TEST_NEON_3SAME_NO2D(smin, Basic)
3715DEFINE_TEST_NEON_3SAME_NO2D(sabd, Basic)
3716DEFINE_TEST_NEON_3SAME_NO2D(saba, Basic)
3717DEFINE_TEST_NEON_3SAME(add, Basic)
3718DEFINE_TEST_NEON_3SAME(cmtst, Basic)
3719DEFINE_TEST_NEON_3SAME_NO2D(mla, Basic)
3720DEFINE_TEST_NEON_3SAME_NO2D(mul, Basic)
3721DEFINE_TEST_NEON_3SAME_NO2D(smaxp, Basic)
3722DEFINE_TEST_NEON_3SAME_NO2D(sminp, Basic)
3723DEFINE_TEST_NEON_3SAME_HS(sqdmulh, Basic)
3724DEFINE_TEST_NEON_3SAME(addp, Basic)
3725DEFINE_TEST_NEON_3SAME_FP(fmaxnm, Basic)
3726DEFINE_TEST_NEON_3SAME_FP(fmla, Basic)
3727DEFINE_TEST_NEON_3SAME_FP(fadd, Basic)
3728DEFINE_TEST_NEON_3SAME_FP(fmulx, Basic)
3729DEFINE_TEST_NEON_3SAME_FP(fcmeq, Basic)
3730DEFINE_TEST_NEON_3SAME_FP(fmax, Basic)
3731DEFINE_TEST_NEON_3SAME_FP(frecps, Basic)
3732DEFINE_TEST_NEON_3SAME_8B_16B(and_, Basic)
3733DEFINE_TEST_NEON_3SAME_8B_16B(bic, Basic)
3734DEFINE_TEST_NEON_3SAME_FP(fminnm, Basic)
3735DEFINE_TEST_NEON_3SAME_FP(fmls, Basic)
3736DEFINE_TEST_NEON_3SAME_FP(fsub, Basic)
3737DEFINE_TEST_NEON_3SAME_FP(fmin, Basic)
3738DEFINE_TEST_NEON_3SAME_FP(frsqrts, Basic)
3739DEFINE_TEST_NEON_3SAME_8B_16B(orr, Basic)
3740DEFINE_TEST_NEON_3SAME_8B_16B(orn, Basic)
3741DEFINE_TEST_NEON_3SAME_NO2D(uhadd, Basic)
3742DEFINE_TEST_NEON_3SAME(uqadd, Basic)
3743DEFINE_TEST_NEON_3SAME_NO2D(urhadd, Basic)
3744DEFINE_TEST_NEON_3SAME_NO2D(uhsub, Basic)
3745DEFINE_TEST_NEON_3SAME(uqsub, Basic)
3746DEFINE_TEST_NEON_3SAME(cmhi, Basic)
3747DEFINE_TEST_NEON_3SAME(cmhs, Basic)
3748DEFINE_TEST_NEON_3SAME(ushl, Basic)
3749DEFINE_TEST_NEON_3SAME(uqshl, Basic)
3750DEFINE_TEST_NEON_3SAME(urshl, Basic)
3751DEFINE_TEST_NEON_3SAME(uqrshl, Basic)
3752DEFINE_TEST_NEON_3SAME_NO2D(umax, Basic)
3753DEFINE_TEST_NEON_3SAME_NO2D(umin, Basic)
3754DEFINE_TEST_NEON_3SAME_NO2D(uabd, Basic)
3755DEFINE_TEST_NEON_3SAME_NO2D(uaba, Basic)
3756DEFINE_TEST_NEON_3SAME(sub, Basic)
3757DEFINE_TEST_NEON_3SAME(cmeq, Basic)
3758DEFINE_TEST_NEON_3SAME_NO2D(mls, Basic)
3759DEFINE_TEST_NEON_3SAME_8B_16B(pmul, Basic)
3760DEFINE_TEST_NEON_3SAME_NO2D(uminp, Basic)
3761DEFINE_TEST_NEON_3SAME_NO2D(umaxp, Basic)
3762DEFINE_TEST_NEON_3SAME_HS(sqrdmulh, Basic)
3763DEFINE_TEST_NEON_3SAME_FP(fmaxnmp, Basic)
3764DEFINE_TEST_NEON_3SAME_FP(faddp, Basic)
3765DEFINE_TEST_NEON_3SAME_FP(fmul, Basic)
3766DEFINE_TEST_NEON_3SAME_FP(fcmge, Basic)
3767DEFINE_TEST_NEON_3SAME_FP(facge, Basic)
3768DEFINE_TEST_NEON_3SAME_FP(fmaxp, Basic)
3769DEFINE_TEST_NEON_3SAME_FP(fdiv, Basic)
3770DEFINE_TEST_NEON_3SAME_8B_16B(eor, Basic)
3771DEFINE_TEST_NEON_3SAME_8B_16B(bsl, Basic)
3772DEFINE_TEST_NEON_3SAME_FP(fminnmp, Basic)
3773DEFINE_TEST_NEON_3SAME_FP(fabd, Basic)
3774DEFINE_TEST_NEON_3SAME_FP(fcmgt, Basic)
3775DEFINE_TEST_NEON_3SAME_FP(facgt, Basic)
3776DEFINE_TEST_NEON_3SAME_FP(fminp, Basic)
3777DEFINE_TEST_NEON_3SAME_8B_16B(bit, Basic)
3778DEFINE_TEST_NEON_3SAME_8B_16B(bif, Basic)
3779
3780
3781// Advanced SIMD scalar three same.
3782DEFINE_TEST_NEON_3SAME_SCALAR(sqadd, Basic)
3783DEFINE_TEST_NEON_3SAME_SCALAR(sqsub, Basic)
3784DEFINE_TEST_NEON_3SAME_SCALAR_D(cmgt, Basic)
3785DEFINE_TEST_NEON_3SAME_SCALAR_D(cmge, Basic)
3786DEFINE_TEST_NEON_3SAME_SCALAR_D(sshl, Basic)
3787DEFINE_TEST_NEON_3SAME_SCALAR(sqshl, Basic)
3788DEFINE_TEST_NEON_3SAME_SCALAR_D(srshl, Basic)
3789DEFINE_TEST_NEON_3SAME_SCALAR(sqrshl, Basic)
3790DEFINE_TEST_NEON_3SAME_SCALAR_D(add, Basic)
3791DEFINE_TEST_NEON_3SAME_SCALAR_D(cmtst, Basic)
3792DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqdmulh, Basic)
3793DEFINE_TEST_NEON_3SAME_FP_SCALAR(fmulx, Basic)
3794DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmeq, Basic)
3795DEFINE_TEST_NEON_3SAME_FP_SCALAR(frecps, Basic)
3796DEFINE_TEST_NEON_3SAME_FP_SCALAR(frsqrts, Basic)
3797DEFINE_TEST_NEON_3SAME_SCALAR_D(uqadd, Basic)
3798DEFINE_TEST_NEON_3SAME_SCALAR_D(uqsub, Basic)
3799DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhi, Basic)
3800DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhs, Basic)
3801DEFINE_TEST_NEON_3SAME_SCALAR_D(ushl, Basic)
3802DEFINE_TEST_NEON_3SAME_SCALAR(uqshl, Basic)
3803DEFINE_TEST_NEON_3SAME_SCALAR_D(urshl, Basic)
3804DEFINE_TEST_NEON_3SAME_SCALAR(uqrshl, Basic)
3805DEFINE_TEST_NEON_3SAME_SCALAR_D(sub, Basic)
3806DEFINE_TEST_NEON_3SAME_SCALAR_D(cmeq, Basic)
3807DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqrdmulh, Basic)
3808DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmge, Basic)
3809DEFINE_TEST_NEON_3SAME_FP_SCALAR(facge, Basic)
3810DEFINE_TEST_NEON_3SAME_FP_SCALAR(fabd, Basic)
3811DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmgt, Basic)
3812DEFINE_TEST_NEON_3SAME_FP_SCALAR(facgt, Basic)
3813
3814
3815// Advanced SIMD three different.
3816DEFINE_TEST_NEON_3DIFF_LONG(saddl, Basic)
3817DEFINE_TEST_NEON_3DIFF_WIDE(saddw, Basic)
3818DEFINE_TEST_NEON_3DIFF_LONG(ssubl, Basic)
3819DEFINE_TEST_NEON_3DIFF_WIDE(ssubw, Basic)
3820DEFINE_TEST_NEON_3DIFF_NARROW(addhn, Basic)
3821DEFINE_TEST_NEON_3DIFF_LONG(sabal, Basic)
3822DEFINE_TEST_NEON_3DIFF_NARROW(subhn, Basic)
3823DEFINE_TEST_NEON_3DIFF_LONG(sabdl, Basic)
3824DEFINE_TEST_NEON_3DIFF_LONG(smlal, Basic)
3825DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlal, Basic)
3826DEFINE_TEST_NEON_3DIFF_LONG(smlsl, Basic)
3827DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlsl, Basic)
3828DEFINE_TEST_NEON_3DIFF_LONG(smull, Basic)
3829DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmull, Basic)
3830DEFINE_TEST_NEON_3DIFF_LONG_8H(pmull, Basic)
3831DEFINE_TEST_NEON_3DIFF_LONG(uaddl, Basic)
3832DEFINE_TEST_NEON_3DIFF_WIDE(uaddw, Basic)
3833DEFINE_TEST_NEON_3DIFF_LONG(usubl, Basic)
3834DEFINE_TEST_NEON_3DIFF_WIDE(usubw, Basic)
3835DEFINE_TEST_NEON_3DIFF_NARROW(raddhn, Basic)
3836DEFINE_TEST_NEON_3DIFF_LONG(uabal, Basic)
3837DEFINE_TEST_NEON_3DIFF_NARROW(rsubhn, Basic)
3838DEFINE_TEST_NEON_3DIFF_LONG(uabdl, Basic)
3839DEFINE_TEST_NEON_3DIFF_LONG(umlal, Basic)
3840DEFINE_TEST_NEON_3DIFF_LONG(umlsl, Basic)
3841DEFINE_TEST_NEON_3DIFF_LONG(umull, Basic)
3842
3843
3844// Advanced SIMD scalar three different.
3845DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlal, Basic)
3846DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlsl, Basic)
3847DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmull, Basic)
3848
3849
3850// Advanced SIMD scalar pairwise.
3851TEST(addp_SCALAR) {
3852  CALL_TEST_NEON_HELPER_2DIFF(addp, D, 2D, kInput64bitsBasic);
3853}
3854DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxnmp, Basic)
3855DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(faddp, Basic)
3856DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxp, Basic)
3857DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminnmp, Basic)
3858DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminp, Basic)
3859
3860
3861// Advanced SIMD shift by immediate.
3862DEFINE_TEST_NEON_2OPIMM(sshr, Basic, TypeWidth)
3863DEFINE_TEST_NEON_2OPIMM(ssra, Basic, TypeWidth)
3864DEFINE_TEST_NEON_2OPIMM(srshr, Basic, TypeWidth)
3865DEFINE_TEST_NEON_2OPIMM(srsra, Basic, TypeWidth)
3866DEFINE_TEST_NEON_2OPIMM(shl, Basic, TypeWidthFromZero)
3867DEFINE_TEST_NEON_2OPIMM(sqshl, Basic, TypeWidthFromZero)
3868DEFINE_TEST_NEON_2OPIMM_NARROW(shrn, Basic, TypeWidth)
3869DEFINE_TEST_NEON_2OPIMM_NARROW(rshrn, Basic, TypeWidth)
3870DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrn, Basic, TypeWidth)
3871DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrn, Basic, TypeWidth)
3872DEFINE_TEST_NEON_2OPIMM_LONG(sshll, Basic, TypeWidthFromZero)
3873DEFINE_TEST_NEON_2OPIMM_SD(scvtf, FixedPointConversions, \
3874                           TypeWidthFromZeroToWidth)
3875DEFINE_TEST_NEON_2OPIMM_FP(fcvtzs, Conversions, TypeWidthFromZeroToWidth)
3876DEFINE_TEST_NEON_2OPIMM(ushr, Basic, TypeWidth)
3877DEFINE_TEST_NEON_2OPIMM(usra, Basic, TypeWidth)
3878DEFINE_TEST_NEON_2OPIMM(urshr, Basic, TypeWidth)
3879DEFINE_TEST_NEON_2OPIMM(ursra, Basic, TypeWidth)
3880DEFINE_TEST_NEON_2OPIMM(sri, Basic, TypeWidth)
3881DEFINE_TEST_NEON_2OPIMM(sli, Basic, TypeWidthFromZero)
3882DEFINE_TEST_NEON_2OPIMM(sqshlu, Basic, TypeWidthFromZero)
3883DEFINE_TEST_NEON_2OPIMM(uqshl, Basic, TypeWidthFromZero)
3884DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrun, Basic, TypeWidth)
3885DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrun, Basic, TypeWidth)
3886DEFINE_TEST_NEON_2OPIMM_NARROW(uqshrn, Basic, TypeWidth)
3887DEFINE_TEST_NEON_2OPIMM_NARROW(uqrshrn, Basic, TypeWidth)
3888DEFINE_TEST_NEON_2OPIMM_LONG(ushll, Basic, TypeWidthFromZero)
3889DEFINE_TEST_NEON_2OPIMM_SD(ucvtf, FixedPointConversions, \
3890                           TypeWidthFromZeroToWidth)
3891DEFINE_TEST_NEON_2OPIMM_FP(fcvtzu, Conversions, TypeWidthFromZeroToWidth)
3892
3893
3894// Advanced SIMD scalar shift by immediate..
3895DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sshr, Basic, TypeWidth)
3896DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ssra, Basic, TypeWidth)
3897DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srshr, Basic, TypeWidth)
3898DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srsra, Basic, TypeWidth)
3899DEFINE_TEST_NEON_2OPIMM_SCALAR_D(shl, Basic, TypeWidthFromZero)
3900DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshl, Basic, TypeWidthFromZero)
3901DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrn, Basic, TypeWidth)
3902DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrn, Basic, TypeWidth)
3903DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(scvtf, FixedPointConversions, \
3904                                  TypeWidthFromZeroToWidth)
3905DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzs, Conversions, TypeWidthFromZeroToWidth)
3906DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ushr, Basic, TypeWidth)
3907DEFINE_TEST_NEON_2OPIMM_SCALAR_D(usra, Basic, TypeWidth)
3908DEFINE_TEST_NEON_2OPIMM_SCALAR_D(urshr, Basic, TypeWidth)
3909DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ursra, Basic, TypeWidth)
3910DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sri, Basic, TypeWidth)
3911DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sli, Basic, TypeWidthFromZero)
3912DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshlu, Basic, TypeWidthFromZero)
3913DEFINE_TEST_NEON_2OPIMM_SCALAR(uqshl, Basic, TypeWidthFromZero)
3914DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrun, Basic, TypeWidth)
3915DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrun, Basic, TypeWidth)
3916DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqshrn, Basic, TypeWidth)
3917DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqrshrn, Basic, TypeWidth)
3918DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(ucvtf, FixedPointConversions, \
3919                                  TypeWidthFromZeroToWidth)
3920DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzu, Conversions, TypeWidthFromZeroToWidth)
3921
3922
3923// Advanced SIMD two-register miscellaneous.
3924DEFINE_TEST_NEON_2SAME_NO2D(rev64, Basic)
3925DEFINE_TEST_NEON_2SAME_8B_16B(rev16, Basic)
3926DEFINE_TEST_NEON_2DIFF_LONG(saddlp, Basic)
3927DEFINE_TEST_NEON_2SAME(suqadd, Basic)
3928DEFINE_TEST_NEON_2SAME_NO2D(cls, Basic)
3929DEFINE_TEST_NEON_2SAME_8B_16B(cnt, Basic)
3930DEFINE_TEST_NEON_2DIFF_LONG(sadalp, Basic)
3931DEFINE_TEST_NEON_2SAME(sqabs, Basic)
3932DEFINE_TEST_NEON_2OPIMM(cmgt, Basic, Zero)
3933DEFINE_TEST_NEON_2OPIMM(cmeq, Basic, Zero)
3934DEFINE_TEST_NEON_2OPIMM(cmlt, Basic, Zero)
3935DEFINE_TEST_NEON_2SAME(abs, Basic)
3936DEFINE_TEST_NEON_2DIFF_NARROW(xtn, Basic)
3937DEFINE_TEST_NEON_2DIFF_NARROW(sqxtn, Basic)
3938DEFINE_TEST_NEON_2DIFF_FP_NARROW(fcvtn, Conversions)
3939DEFINE_TEST_NEON_2DIFF_FP_LONG(fcvtl, Conversions)
3940DEFINE_TEST_NEON_2SAME_FP(frintn, Conversions)
3941DEFINE_TEST_NEON_2SAME_FP(frintm, Conversions)
3942DEFINE_TEST_NEON_2SAME_FP(fcvtns, Conversions)
3943DEFINE_TEST_NEON_2SAME_FP(fcvtms, Conversions)
3944DEFINE_TEST_NEON_2SAME_FP(fcvtas, Conversions)
3945// SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0.
3946DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmgt, Basic, Zero)
3947DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmeq, Basic, Zero)
3948DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmlt, Basic, Zero)
3949DEFINE_TEST_NEON_2SAME_FP(fabs, Basic)
3950DEFINE_TEST_NEON_2SAME_FP(frintp, Conversions)
3951DEFINE_TEST_NEON_2SAME_FP(frintz, Conversions)
3952DEFINE_TEST_NEON_2SAME_FP(fcvtps, Conversions)
3953// FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0.
3954DEFINE_TEST_NEON_2SAME_2S_4S(urecpe, Basic)
3955DEFINE_TEST_NEON_2SAME_FP(frecpe, Basic)
3956DEFINE_TEST_NEON_2SAME_BH(rev32, Basic)
3957DEFINE_TEST_NEON_2DIFF_LONG(uaddlp, Basic)
3958DEFINE_TEST_NEON_2SAME(usqadd, Basic)
3959DEFINE_TEST_NEON_2SAME_NO2D(clz, Basic)
3960DEFINE_TEST_NEON_2DIFF_LONG(uadalp, Basic)
3961DEFINE_TEST_NEON_2SAME(sqneg, Basic)
3962DEFINE_TEST_NEON_2OPIMM(cmge, Basic, Zero)
3963DEFINE_TEST_NEON_2OPIMM(cmle, Basic, Zero)
3964DEFINE_TEST_NEON_2SAME(neg, Basic)
3965DEFINE_TEST_NEON_2DIFF_NARROW(sqxtun, Basic)
3966DEFINE_TEST_NEON_2OPIMM_LONG(shll, Basic, SHLL)
3967DEFINE_TEST_NEON_2DIFF_NARROW(uqxtn, Basic)
3968DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(fcvtxn, Conversions)
3969DEFINE_TEST_NEON_2SAME_FP(frinta, Conversions)
3970DEFINE_TEST_NEON_2SAME_FP(frintx, Conversions)
3971DEFINE_TEST_NEON_2SAME_FP(fcvtnu, Conversions)
3972DEFINE_TEST_NEON_2SAME_FP(fcvtmu, Conversions)
3973DEFINE_TEST_NEON_2SAME_FP(fcvtau, Conversions)
3974// UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0.
3975DEFINE_TEST_NEON_2SAME_8B_16B(not_, Basic)
3976DEFINE_TEST_NEON_2SAME_8B_16B(rbit, Basic)
3977DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmge, Basic, Zero)
3978DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmle, Basic, Zero)
3979DEFINE_TEST_NEON_2SAME_FP(fneg, Basic)
3980DEFINE_TEST_NEON_2SAME_FP(frinti, Conversions)
3981DEFINE_TEST_NEON_2SAME_FP(fcvtpu, Conversions)
3982// FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0.
3983DEFINE_TEST_NEON_2SAME_2S_4S(ursqrte, Basic)
3984DEFINE_TEST_NEON_2SAME_FP(frsqrte, Basic)
3985DEFINE_TEST_NEON_2SAME_FP(fsqrt, Basic)
3986
3987
3988// Advanced SIMD scalar two-register miscellaneous.
3989DEFINE_TEST_NEON_2SAME_SCALAR(suqadd, Basic)
3990DEFINE_TEST_NEON_2SAME_SCALAR(sqabs, Basic)
3991DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmgt, Basic, Zero)
3992DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmeq, Basic, Zero)
3993DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmlt, Basic, Zero)
3994DEFINE_TEST_NEON_2SAME_SCALAR_D(abs, Basic)
3995DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtn, Basic)
3996DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtns, Conversions)
3997DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtms, Conversions)
3998DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtas, Conversions)
3999// SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0.
4000DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmgt, Basic, Zero)
4001DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmeq, Basic, Zero)
4002DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmlt, Basic, Zero)
4003DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtps, Conversions)
4004// FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0.
4005DEFINE_TEST_NEON_2SAME_FP_SCALAR(frecpe, Basic)
4006DEFINE_TEST_NEON_2SAME_FP_SCALAR(frecpx, Basic)
4007DEFINE_TEST_NEON_2SAME_SCALAR(usqadd, Basic)
4008DEFINE_TEST_NEON_2SAME_SCALAR(sqneg, Basic)
4009DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmge, Basic, Zero)
4010DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmle, Basic, Zero)
4011DEFINE_TEST_NEON_2SAME_SCALAR_D(neg, Basic)
4012DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtun, Basic)
4013DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(uqxtn, Basic)
4014TEST(fcvtxn_SCALAR) {
4015  CALL_TEST_NEON_HELPER_2DIFF(fcvtxn, S, D, kInputDoubleConversions);
4016}
4017DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtnu, Conversions)
4018DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtmu, Conversions)
4019DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtau, Conversions)
4020// UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0.
4021DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmge, Basic, Zero)
4022DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmle, Basic, Zero)
4023DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtpu, Conversions)
4024// FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0.
4025DEFINE_TEST_NEON_2SAME_FP_SCALAR(frsqrte, Basic)
4026
4027
4028// Advanced SIMD across lanes.
4029DEFINE_TEST_NEON_ACROSS_LONG(saddlv, Basic)
4030DEFINE_TEST_NEON_ACROSS(smaxv, Basic)
4031DEFINE_TEST_NEON_ACROSS(sminv, Basic)
4032DEFINE_TEST_NEON_ACROSS(addv, Basic)
4033DEFINE_TEST_NEON_ACROSS_LONG(uaddlv, Basic)
4034DEFINE_TEST_NEON_ACROSS(umaxv, Basic)
4035DEFINE_TEST_NEON_ACROSS(uminv, Basic)
4036DEFINE_TEST_NEON_ACROSS_FP(fmaxnmv, Basic)
4037DEFINE_TEST_NEON_ACROSS_FP(fmaxv, Basic)
4038DEFINE_TEST_NEON_ACROSS_FP(fminnmv, Basic)
4039DEFINE_TEST_NEON_ACROSS_FP(fminv, Basic)
4040
4041
4042// Advanced SIMD permute.
4043DEFINE_TEST_NEON_3SAME(uzp1, Basic)
4044DEFINE_TEST_NEON_3SAME(trn1, Basic)
4045DEFINE_TEST_NEON_3SAME(zip1, Basic)
4046DEFINE_TEST_NEON_3SAME(uzp2, Basic)
4047DEFINE_TEST_NEON_3SAME(trn2, Basic)
4048DEFINE_TEST_NEON_3SAME(zip2, Basic)
4049
4050
4051// Advanced SIMD vector x indexed element.
4052DEFINE_TEST_NEON_BYELEMENT_DIFF(smlal, Basic, Basic, Basic)
4053DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlal, Basic, Basic, Basic)
4054DEFINE_TEST_NEON_BYELEMENT_DIFF(smlsl, Basic, Basic, Basic)
4055DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlsl, Basic, Basic, Basic)
4056DEFINE_TEST_NEON_BYELEMENT(mul, Basic, Basic, Basic)
4057DEFINE_TEST_NEON_BYELEMENT_DIFF(smull, Basic, Basic, Basic)
4058DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmull, Basic, Basic, Basic)
4059DEFINE_TEST_NEON_BYELEMENT(sqdmulh, Basic, Basic, Basic)
4060DEFINE_TEST_NEON_BYELEMENT(sqrdmulh, Basic, Basic, Basic)
4061DEFINE_TEST_NEON_FP_BYELEMENT(fmla, Basic, Basic, Basic)
4062DEFINE_TEST_NEON_FP_BYELEMENT(fmls, Basic, Basic, Basic)
4063DEFINE_TEST_NEON_FP_BYELEMENT(fmul, Basic, Basic, Basic)
4064DEFINE_TEST_NEON_BYELEMENT(mla, Basic, Basic, Basic)
4065DEFINE_TEST_NEON_BYELEMENT_DIFF(umlal, Basic, Basic, Basic)
4066DEFINE_TEST_NEON_BYELEMENT(mls, Basic, Basic, Basic)
4067DEFINE_TEST_NEON_BYELEMENT_DIFF(umlsl, Basic, Basic, Basic)
4068DEFINE_TEST_NEON_BYELEMENT_DIFF(umull, Basic, Basic, Basic)
4069DEFINE_TEST_NEON_FP_BYELEMENT(fmulx, Basic, Basic, Basic)
4070
4071
4072// Advanced SIMD scalar x indexed element.
4073DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlal, Basic, Basic, Basic)
4074DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlsl, Basic, Basic, Basic)
4075DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmull, Basic, Basic, Basic)
4076DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqdmulh, Basic, Basic, Basic)
4077DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqrdmulh, Basic, Basic, Basic)
4078DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmla, Basic, Basic, Basic)
4079DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmls, Basic, Basic, Basic)
4080DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmul, Basic, Basic, Basic)
4081DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmulx, Basic, Basic, Basic)
4082
4083}  // namespace vixl
4084