1// Copyright 2015, ARM Limited
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are met:
6//
7//   * Redistributions of source code must retain the above copyright notice,
8//     this list of conditions and the following disclaimer.
9//   * Redistributions in binary form must reproduce the above copyright notice,
10//     this list of conditions and the following disclaimer in the documentation
11//     and/or other materials provided with the distribution.
12//   * Neither the name of ARM Limited nor the names of its contributors may be
13//     used to endorse or promote products derived from this software without
14//     specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27#include <stdio.h>
28#include <float.h>
29
30#include "test-runner.h"
31#include "test-utils-a64.h"
32#include "test-simulator-inputs-a64.h"
33#include "test-simulator-traces-a64.h"
34#include "vixl/a64/macro-assembler-a64.h"
35#include "vixl/a64/simulator-a64.h"
36
37namespace vixl {
38
39// ==== Simulator Tests ====
40//
41// These simulator tests check instruction behaviour against a trace taken from
42// real AArch64 hardware. The same test code is used to generate the trace; the
43// results are printed to stdout when the test is run with --sim_test_trace.
44//
45// The input lists and expected results are stored in test/traces. The expected
46// results can be regenerated using tools/generate_simulator_traces.py. Adding
47// a test for a new instruction is described at the top of
48// test-simulator-traces-a64.h.
49
50#define __ masm.
51#define TEST(name)  TEST_(SIM_##name)
52
53#define BUF_SIZE (256)
54
55#ifdef VIXL_INCLUDE_SIMULATOR
56
57#define SETUP()                                                               \
58  MacroAssembler masm(BUF_SIZE);                                              \
59  Decoder decoder;                                                            \
60  Simulator* simulator = Test::run_debugger() ? new Debugger(&decoder)        \
61                                              : new Simulator(&decoder);      \
62  simulator->set_coloured_trace(Test::coloured_trace());                      \
63  simulator->set_instruction_stats(Test::instruction_stats());                \
64
65#define START()                                                               \
66  masm.Reset();                                                               \
67  simulator->ResetState();                                                    \
68  __ PushCalleeSavedRegisters();                                              \
69  if (Test::trace_reg()) {                                                    \
70    __ Trace(LOG_STATE, TRACE_ENABLE);                                        \
71  }                                                                           \
72  if (Test::trace_write()) {                                                  \
73    __ Trace(LOG_WRITE, TRACE_ENABLE);                                        \
74  }                                                                           \
75  if (Test::trace_sim()) {                                                    \
76    __ Trace(LOG_DISASM, TRACE_ENABLE);                                       \
77  }                                                                           \
78  if (Test::instruction_stats()) {                                            \
79    __ EnableInstrumentation();                                               \
80  }
81
82#define END()                                                                 \
83  if (Test::instruction_stats()) {                                            \
84    __ DisableInstrumentation();                                              \
85  }                                                                           \
86  __ Trace(LOG_ALL, TRACE_DISABLE);                                           \
87  __ PopCalleeSavedRegisters();                                               \
88  __ Ret();                                                                   \
89  masm.FinalizeCode()
90
91#define RUN()                                                                 \
92  simulator->RunFrom(masm.GetStartAddress<Instruction*>())
93
94#define TEARDOWN()                                                            \
95  delete simulator;
96
97#else     // VIXL_INCLUDE_SIMULATOR
98
99#define SETUP()                                                               \
100  MacroAssembler masm(BUF_SIZE);                                              \
101  CPU::SetUp()
102
103#define START()                                                               \
104  masm.Reset();                                                               \
105  __ PushCalleeSavedRegisters()
106
107#define END()                                                                 \
108  __ PopCalleeSavedRegisters();                                               \
109  __ Ret();                                                                   \
110  masm.FinalizeCode()
111
112#define RUN()                                                                  \
113  {                                                                            \
114    byte* buffer_start = masm.GetStartAddress<byte*>();                        \
115    size_t buffer_length = masm.CursorOffset();                                \
116    void (*test_function)(void);                                               \
117                                                                               \
118    CPU::EnsureIAndDCacheCoherency(buffer_start, buffer_length);               \
119    VIXL_STATIC_ASSERT(sizeof(buffer_start) == sizeof(test_function));         \
120    memcpy(&test_function, &buffer_start, sizeof(buffer_start));               \
121    test_function();                                                           \
122  }
123
124#define TEARDOWN()
125
126#endif    // VIXL_INCLUDE_SIMULATOR
127
128
129// The maximum number of errors to report in detail for each test.
130static const unsigned kErrorReportLimit = 8;
131
132
133// Overloaded versions of rawbits_to_double and rawbits_to_float for use in the
134// templated test functions.
135static float rawbits_to_fp(uint32_t bits) {
136  return rawbits_to_float(bits);
137}
138
139static double rawbits_to_fp(uint64_t bits) {
140  return rawbits_to_double(bits);
141}
142
143
144// MacroAssembler member function pointers to pass to the test dispatchers.
145typedef void (MacroAssembler::*Test1OpFPHelper_t)(const FPRegister& fd,
146                                                  const FPRegister& fn);
147typedef void (MacroAssembler::*Test2OpFPHelper_t)(const FPRegister& fd,
148                                                  const FPRegister& fn,
149                                                  const FPRegister& fm);
150typedef void (MacroAssembler::*Test3OpFPHelper_t)(const FPRegister& fd,
151                                                  const FPRegister& fn,
152                                                  const FPRegister& fm,
153                                                  const FPRegister& fa);
154typedef void (MacroAssembler::*TestFPCmpHelper_t)(const FPRegister& fn,
155                                                  const FPRegister& fm);
156typedef void (MacroAssembler::*TestFPCmpZeroHelper_t)(const FPRegister& fn,
157                                                      double value);
158typedef void (MacroAssembler::*TestFPToIntHelper_t)(const Register& rd,
159                                                    const FPRegister& fn);
160typedef void (MacroAssembler::*TestFPToFixedHelper_t)(const Register& rd,
161                                                      const FPRegister& fn,
162                                                      int fbits);
163typedef void (MacroAssembler::*TestFixedToFPHelper_t)(const FPRegister& fd,
164                                                      const Register& rn,
165                                                      int fbits);
166// TODO: 'Test2OpNEONHelper_t' and 'Test2OpFPHelper_t' can be
167//       consolidated into one routine.
168typedef void (MacroAssembler::*Test1OpNEONHelper_t)(
169  const VRegister& vd, const VRegister& vn);
170typedef void (MacroAssembler::*Test2OpNEONHelper_t)(
171  const VRegister& vd, const VRegister& vn, const VRegister& vm);
172typedef void (MacroAssembler::*TestByElementNEONHelper_t)(
173  const VRegister& vd, const VRegister& vn, const VRegister& vm, int vm_index);
174typedef void (MacroAssembler::*TestOpImmOpImmVdUpdateNEONHelper_t)(
175  const VRegister& vd, int imm1, const VRegister& vn, int imm2);
176
177// This helps using the same typename for both the function pointer
178// and the array of immediates passed to helper routines.
179template <typename T>
180class Test2OpImmediateNEONHelper_t {
181 public:
182    typedef void (MacroAssembler::*mnemonic)(
183      const VRegister& vd, const VRegister& vn, T imm);
184};
185
186
187// Maximum number of hex characters required to represent values of either
188// templated type.
189template <typename Ta, typename Tb>
190static unsigned MaxHexCharCount() {
191  unsigned count = static_cast<unsigned>(std::max(sizeof(Ta), sizeof(Tb)));
192  return (count * 8) / 4;
193}
194
195
196// Standard test dispatchers.
197
198
199static void Test1Op_Helper(Test1OpFPHelper_t helper, uintptr_t inputs,
200                           unsigned inputs_length, uintptr_t results,
201                           unsigned d_size, unsigned n_size) {
202  VIXL_ASSERT((d_size == kDRegSize) || (d_size == kSRegSize));
203  VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize));
204
205  SETUP();
206  START();
207
208  // Roll up the loop to keep the code size down.
209  Label loop_n;
210
211  Register out = x0;
212  Register inputs_base = x1;
213  Register length = w2;
214  Register index_n = w3;
215
216  const int n_index_shift =
217      (n_size == kDRegSize) ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
218
219  FPRegister fd = (d_size == kDRegSize) ? d0 : s0;
220  FPRegister fn = (n_size == kDRegSize) ? d1 : s1;
221
222  __ Mov(out, results);
223  __ Mov(inputs_base, inputs);
224  __ Mov(length, inputs_length);
225
226  __ Mov(index_n, 0);
227  __ Bind(&loop_n);
228  __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
229
230  {
231    SingleEmissionCheckScope guard(&masm);
232    (masm.*helper)(fd, fn);
233  }
234  __ Str(fd, MemOperand(out, fd.SizeInBytes(), PostIndex));
235
236  __ Add(index_n, index_n, 1);
237  __ Cmp(index_n, inputs_length);
238  __ B(lo, &loop_n);
239
240  END();
241  RUN();
242  TEARDOWN();
243}
244
245
246// Test FP instructions. The inputs[] and expected[] arrays should be arrays of
247// rawbits representations of doubles or floats. This ensures that exact bit
248// comparisons can be performed.
249template <typename Tn, typename Td>
250static void Test1Op(const char * name, Test1OpFPHelper_t helper,
251                    const Tn inputs[], unsigned inputs_length,
252                    const Td expected[], unsigned expected_length) {
253  VIXL_ASSERT(inputs_length > 0);
254
255  const unsigned results_length = inputs_length;
256  Td * results = new Td[results_length];
257
258  const unsigned d_bits = sizeof(Td) * 8;
259  const unsigned n_bits = sizeof(Tn) * 8;
260
261  Test1Op_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length,
262                 reinterpret_cast<uintptr_t>(results), d_bits, n_bits);
263
264  if (Test::sim_test_trace()) {
265    // Print the results.
266    printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name);
267    for (unsigned d = 0; d < results_length; d++) {
268      printf("  0x%0*" PRIx64 ",\n",
269             d_bits / 4, static_cast<uint64_t>(results[d]));
270    }
271    printf("};\n");
272    printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
273  } else {
274    // Check the results.
275    VIXL_CHECK(expected_length == results_length);
276    unsigned error_count = 0;
277    unsigned d = 0;
278    for (unsigned n = 0; n < inputs_length; n++, d++) {
279      if (results[d] != expected[d]) {
280        if (++error_count > kErrorReportLimit) continue;
281
282        printf("%s 0x%0*" PRIx64 " (%s %g):\n",
283               name, n_bits / 4, static_cast<uint64_t>(inputs[n]),
284               name, rawbits_to_fp(inputs[n]));
285        printf("  Expected: 0x%0*" PRIx64 " (%g)\n",
286               d_bits / 4, static_cast<uint64_t>(expected[d]),
287               rawbits_to_fp(expected[d]));
288        printf("  Found:    0x%0*" PRIx64 " (%g)\n",
289               d_bits / 4, static_cast<uint64_t>(results[d]),
290               rawbits_to_fp(results[d]));
291        printf("\n");
292      }
293    }
294    VIXL_ASSERT(d == expected_length);
295    if (error_count > kErrorReportLimit) {
296      printf("%u other errors follow.\n", error_count - kErrorReportLimit);
297    }
298    VIXL_CHECK(error_count == 0);
299  }
300  delete[] results;
301}
302
303
304static void Test2Op_Helper(Test2OpFPHelper_t helper,
305                           uintptr_t inputs, unsigned inputs_length,
306                           uintptr_t results, unsigned reg_size) {
307  VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
308
309  SETUP();
310  START();
311
312  // Roll up the loop to keep the code size down.
313  Label loop_n, loop_m;
314
315  Register out = x0;
316  Register inputs_base = x1;
317  Register length = w2;
318  Register index_n = w3;
319  Register index_m = w4;
320
321  bool double_op = reg_size == kDRegSize;
322  const int index_shift =
323      double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
324
325  FPRegister fd = double_op ? d0 : s0;
326  FPRegister fn = double_op ? d1 : s1;
327  FPRegister fm = double_op ? d2 : s2;
328
329  __ Mov(out, results);
330  __ Mov(inputs_base, inputs);
331  __ Mov(length, inputs_length);
332
333  __ Mov(index_n, 0);
334  __ Bind(&loop_n);
335  __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
336
337  __ Mov(index_m, 0);
338  __ Bind(&loop_m);
339  __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
340
341  {
342    SingleEmissionCheckScope guard(&masm);
343    (masm.*helper)(fd, fn, fm);
344  }
345    __ Str(fd, MemOperand(out, fd.SizeInBytes(), PostIndex));
346
347  __ Add(index_m, index_m, 1);
348  __ Cmp(index_m, inputs_length);
349  __ B(lo, &loop_m);
350
351  __ Add(index_n, index_n, 1);
352  __ Cmp(index_n, inputs_length);
353  __ B(lo, &loop_n);
354
355  END();
356  RUN();
357  TEARDOWN();
358}
359
360
361// Test FP instructions. The inputs[] and expected[] arrays should be arrays of
362// rawbits representations of doubles or floats. This ensures that exact bit
363// comparisons can be performed.
364template <typename T>
365static void Test2Op(const char * name, Test2OpFPHelper_t helper,
366                    const T inputs[], unsigned inputs_length,
367                    const T expected[], unsigned expected_length) {
368  VIXL_ASSERT(inputs_length > 0);
369
370  const unsigned results_length = inputs_length * inputs_length;
371  T * results = new T[results_length];
372
373  const unsigned bits = sizeof(T) * 8;
374
375  Test2Op_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length,
376                 reinterpret_cast<uintptr_t>(results), bits);
377
378  if (Test::sim_test_trace()) {
379    // Print the results.
380    printf("const uint%u_t kExpected_%s[] = {\n", bits, name);
381    for (unsigned d = 0; d < results_length; d++) {
382      printf("  0x%0*" PRIx64 ",\n",
383             bits / 4, static_cast<uint64_t>(results[d]));
384    }
385    printf("};\n");
386    printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
387  } else {
388    // Check the results.
389    VIXL_CHECK(expected_length == results_length);
390    unsigned error_count = 0;
391    unsigned d = 0;
392    for (unsigned n = 0; n < inputs_length; n++) {
393      for (unsigned m = 0; m < inputs_length; m++, d++) {
394        if (results[d] != expected[d]) {
395          if (++error_count > kErrorReportLimit) continue;
396
397          printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 " (%s %g %g):\n",
398                 name,
399                 bits / 4, static_cast<uint64_t>(inputs[n]),
400                 bits / 4, static_cast<uint64_t>(inputs[m]),
401                 name,
402                 rawbits_to_fp(inputs[n]),
403                 rawbits_to_fp(inputs[m]));
404          printf("  Expected: 0x%0*" PRIx64 " (%g)\n",
405                 bits / 4, static_cast<uint64_t>(expected[d]),
406                 rawbits_to_fp(expected[d]));
407          printf("  Found:    0x%0*" PRIx64 " (%g)\n",
408                 bits / 4, static_cast<uint64_t>(results[d]),
409                 rawbits_to_fp(results[d]));
410          printf("\n");
411        }
412      }
413    }
414    VIXL_ASSERT(d == expected_length);
415    if (error_count > kErrorReportLimit) {
416      printf("%u other errors follow.\n", error_count - kErrorReportLimit);
417    }
418    VIXL_CHECK(error_count == 0);
419  }
420  delete[] results;
421}
422
423
424static void Test3Op_Helper(Test3OpFPHelper_t helper,
425                           uintptr_t inputs, unsigned inputs_length,
426                           uintptr_t results, unsigned reg_size) {
427  VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
428
429  SETUP();
430  START();
431
432  // Roll up the loop to keep the code size down.
433  Label loop_n, loop_m, loop_a;
434
435  Register out = x0;
436  Register inputs_base = x1;
437  Register length = w2;
438  Register index_n = w3;
439  Register index_m = w4;
440  Register index_a = w5;
441
442  bool double_op = reg_size == kDRegSize;
443  const int index_shift =
444      double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
445
446  FPRegister fd = double_op ? d0 : s0;
447  FPRegister fn = double_op ? d1 : s1;
448  FPRegister fm = double_op ? d2 : s2;
449  FPRegister fa = double_op ? d3 : s3;
450
451  __ Mov(out, results);
452  __ Mov(inputs_base, inputs);
453  __ Mov(length, inputs_length);
454
455  __ Mov(index_n, 0);
456  __ Bind(&loop_n);
457  __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
458
459  __ Mov(index_m, 0);
460  __ Bind(&loop_m);
461  __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
462
463  __ Mov(index_a, 0);
464  __ Bind(&loop_a);
465  __ Ldr(fa, MemOperand(inputs_base, index_a, UXTW, index_shift));
466
467  {
468    SingleEmissionCheckScope guard(&masm);
469    (masm.*helper)(fd, fn, fm, fa);
470  }
471  __ Str(fd, MemOperand(out, fd.SizeInBytes(), PostIndex));
472
473  __ Add(index_a, index_a, 1);
474  __ Cmp(index_a, inputs_length);
475  __ B(lo, &loop_a);
476
477  __ Add(index_m, index_m, 1);
478  __ Cmp(index_m, inputs_length);
479  __ B(lo, &loop_m);
480
481  __ Add(index_n, index_n, 1);
482  __ Cmp(index_n, inputs_length);
483  __ B(lo, &loop_n);
484
485  END();
486  RUN();
487  TEARDOWN();
488}
489
490
491// Test FP instructions. The inputs[] and expected[] arrays should be arrays of
492// rawbits representations of doubles or floats. This ensures that exact bit
493// comparisons can be performed.
494template <typename T>
495static void Test3Op(const char * name, Test3OpFPHelper_t helper,
496                    const T inputs[], unsigned inputs_length,
497                    const T expected[], unsigned expected_length) {
498  VIXL_ASSERT(inputs_length > 0);
499
500  const unsigned results_length = inputs_length * inputs_length * inputs_length;
501  T * results = new T[results_length];
502
503  const unsigned bits = sizeof(T) * 8;
504
505  Test3Op_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length,
506                 reinterpret_cast<uintptr_t>(results), bits);
507
508  if (Test::sim_test_trace()) {
509    // Print the results.
510    printf("const uint%u_t kExpected_%s[] = {\n", bits, name);
511    for (unsigned d = 0; d < results_length; d++) {
512      printf("  0x%0*" PRIx64 ",\n",
513             bits / 4, static_cast<uint64_t>(results[d]));
514    }
515    printf("};\n");
516    printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
517  } else {
518    // Check the results.
519    VIXL_CHECK(expected_length == results_length);
520    unsigned error_count = 0;
521    unsigned d = 0;
522    for (unsigned n = 0; n < inputs_length; n++) {
523      for (unsigned m = 0; m < inputs_length; m++) {
524        for (unsigned a = 0; a < inputs_length; a++, d++) {
525          if (results[d] != expected[d]) {
526            if (++error_count > kErrorReportLimit) continue;
527
528            printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 ", 0x%0*" PRIx64
529                   " (%s %g %g %g):\n",
530                   name,
531                   bits / 4, static_cast<uint64_t>(inputs[n]),
532                   bits / 4, static_cast<uint64_t>(inputs[m]),
533                   bits / 4, static_cast<uint64_t>(inputs[a]),
534                   name,
535                   rawbits_to_fp(inputs[n]),
536                   rawbits_to_fp(inputs[m]),
537                   rawbits_to_fp(inputs[a]));
538            printf("  Expected: 0x%0*" PRIx64 " (%g)\n",
539                   bits / 4, static_cast<uint64_t>(expected[d]),
540                   rawbits_to_fp(expected[d]));
541            printf("  Found:    0x%0*" PRIx64 " (%g)\n",
542                   bits / 4, static_cast<uint64_t>(results[d]),
543                   rawbits_to_fp(results[d]));
544            printf("\n");
545          }
546        }
547      }
548    }
549    VIXL_ASSERT(d == expected_length);
550    if (error_count > kErrorReportLimit) {
551      printf("%u other errors follow.\n", error_count - kErrorReportLimit);
552    }
553    VIXL_CHECK(error_count == 0);
554  }
555  delete[] results;
556}
557
558
559static void TestCmp_Helper(TestFPCmpHelper_t helper,
560                           uintptr_t inputs, unsigned inputs_length,
561                           uintptr_t results, unsigned reg_size) {
562  VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
563
564  SETUP();
565  START();
566
567  // Roll up the loop to keep the code size down.
568  Label loop_n, loop_m;
569
570  Register out = x0;
571  Register inputs_base = x1;
572  Register length = w2;
573  Register index_n = w3;
574  Register index_m = w4;
575  Register flags = x5;
576
577  bool double_op = reg_size == kDRegSize;
578  const int index_shift =
579      double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
580
581  FPRegister fn = double_op ? d1 : s1;
582  FPRegister fm = double_op ? d2 : s2;
583
584  __ Mov(out, results);
585  __ Mov(inputs_base, inputs);
586  __ Mov(length, inputs_length);
587
588  __ Mov(index_n, 0);
589  __ Bind(&loop_n);
590  __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
591
592  __ Mov(index_m, 0);
593  __ Bind(&loop_m);
594  __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
595
596  {
597    SingleEmissionCheckScope guard(&masm);
598    (masm.*helper)(fn, fm);
599  }
600  __ Mrs(flags, NZCV);
601  __ Ubfx(flags, flags, 28, 4);
602  __ Strb(flags, MemOperand(out, 1, PostIndex));
603
604  __ Add(index_m, index_m, 1);
605  __ Cmp(index_m, inputs_length);
606  __ B(lo, &loop_m);
607
608  __ Add(index_n, index_n, 1);
609  __ Cmp(index_n, inputs_length);
610  __ B(lo, &loop_n);
611
612  END();
613  RUN();
614  TEARDOWN();
615}
616
617
618// Test FP instructions. The inputs[] and expected[] arrays should be arrays of
619// rawbits representations of doubles or floats. This ensures that exact bit
620// comparisons can be performed.
621template <typename T>
622static void TestCmp(const char * name, TestFPCmpHelper_t helper,
623                    const T inputs[], unsigned inputs_length,
624                    const uint8_t expected[], unsigned expected_length) {
625  VIXL_ASSERT(inputs_length > 0);
626
627  const unsigned results_length = inputs_length * inputs_length;
628  uint8_t * results = new uint8_t[results_length];
629
630  const unsigned bits = sizeof(T) * 8;
631
632  TestCmp_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length,
633                 reinterpret_cast<uintptr_t>(results), bits);
634
635  if (Test::sim_test_trace()) {
636    // Print the results.
637    printf("const uint8_t kExpected_%s[] = {\n", name);
638    for (unsigned d = 0; d < results_length; d++) {
639      // Each NZCV result only requires 4 bits.
640      VIXL_ASSERT((results[d] & 0xf) == results[d]);
641      printf("  0x%" PRIx8 ",\n", results[d]);
642    }
643    printf("};\n");
644    printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
645  } else {
646    // Check the results.
647    VIXL_CHECK(expected_length == results_length);
648    unsigned error_count = 0;
649    unsigned d = 0;
650    for (unsigned n = 0; n < inputs_length; n++) {
651      for (unsigned m = 0; m < inputs_length; m++, d++) {
652        if (results[d] != expected[d]) {
653          if (++error_count > kErrorReportLimit) continue;
654
655          printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 " (%s %g %g):\n",
656                 name,
657                 bits / 4, static_cast<uint64_t>(inputs[n]),
658                 bits / 4, static_cast<uint64_t>(inputs[m]),
659                 name,
660                 rawbits_to_fp(inputs[n]),
661                 rawbits_to_fp(inputs[m]));
662          printf("  Expected: %c%c%c%c (0x%" PRIx8 ")\n",
663                 (expected[d] & 0x8) ? 'N' : 'n',
664                 (expected[d] & 0x4) ? 'Z' : 'z',
665                 (expected[d] & 0x2) ? 'C' : 'c',
666                 (expected[d] & 0x1) ? 'V' : 'v',
667                 expected[d]);
668          printf("  Found:    %c%c%c%c (0x%" PRIx8 ")\n",
669                 (results[d] & 0x8) ? 'N' : 'n',
670                 (results[d] & 0x4) ? 'Z' : 'z',
671                 (results[d] & 0x2) ? 'C' : 'c',
672                 (results[d] & 0x1) ? 'V' : 'v',
673                 results[d]);
674          printf("\n");
675        }
676      }
677    }
678    VIXL_ASSERT(d == expected_length);
679    if (error_count > kErrorReportLimit) {
680      printf("%u other errors follow.\n", error_count - kErrorReportLimit);
681    }
682    VIXL_CHECK(error_count == 0);
683  }
684  delete[] results;
685}
686
687
688static void TestCmpZero_Helper(TestFPCmpZeroHelper_t helper,
689                               uintptr_t inputs, unsigned inputs_length,
690                               uintptr_t results, unsigned reg_size) {
691  VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
692
693  SETUP();
694  START();
695
696  // Roll up the loop to keep the code size down.
697  Label loop_n, loop_m;
698
699  Register out = x0;
700  Register inputs_base = x1;
701  Register length = w2;
702  Register index_n = w3;
703  Register flags = x4;
704
705  bool double_op = reg_size == kDRegSize;
706  const int index_shift =
707      double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
708
709  FPRegister fn = double_op ? d1 : s1;
710
711  __ Mov(out, results);
712  __ Mov(inputs_base, inputs);
713  __ Mov(length, inputs_length);
714
715  __ Mov(index_n, 0);
716  __ Bind(&loop_n);
717  __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
718
719  {
720    SingleEmissionCheckScope guard(&masm);
721    (masm.*helper)(fn, 0.0);
722  }
723  __ Mrs(flags, NZCV);
724  __ Ubfx(flags, flags, 28, 4);
725  __ Strb(flags, MemOperand(out, 1, PostIndex));
726
727  __ Add(index_n, index_n, 1);
728  __ Cmp(index_n, inputs_length);
729  __ B(lo, &loop_n);
730
731  END();
732  RUN();
733  TEARDOWN();
734}
735
736
737// Test FP instructions. The inputs[] and expected[] arrays should be arrays of
738// rawbits representations of doubles or floats. This ensures that exact bit
739// comparisons can be performed.
740template <typename T>
741static void TestCmpZero(const char * name, TestFPCmpZeroHelper_t helper,
742                        const T inputs[], unsigned inputs_length,
743                        const uint8_t expected[], unsigned expected_length) {
744  VIXL_ASSERT(inputs_length > 0);
745
746  const unsigned results_length = inputs_length;
747  uint8_t * results = new uint8_t[results_length];
748
749  const unsigned bits = sizeof(T) * 8;
750
751  TestCmpZero_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length,
752                     reinterpret_cast<uintptr_t>(results), bits);
753
754  if (Test::sim_test_trace()) {
755    // Print the results.
756    printf("const uint8_t kExpected_%s[] = {\n", name);
757    for (unsigned d = 0; d < results_length; d++) {
758      // Each NZCV result only requires 4 bits.
759      VIXL_ASSERT((results[d] & 0xf) == results[d]);
760      printf("  0x%" PRIx8 ",\n", results[d]);
761    }
762    printf("};\n");
763    printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
764  } else {
765    // Check the results.
766    VIXL_CHECK(expected_length == results_length);
767    unsigned error_count = 0;
768    unsigned d = 0;
769    for (unsigned n = 0; n < inputs_length; n++, d++) {
770      if (results[d] != expected[d]) {
771        if (++error_count > kErrorReportLimit) continue;
772
773        printf("%s 0x%0*" PRIx64 ", 0x%0*u (%s %g #0.0):\n",
774               name,
775               bits / 4, static_cast<uint64_t>(inputs[n]),
776               bits / 4, 0,
777               name,
778               rawbits_to_fp(inputs[n]));
779        printf("  Expected: %c%c%c%c (0x%" PRIx8 ")\n",
780               (expected[d] & 0x8) ? 'N' : 'n',
781               (expected[d] & 0x4) ? 'Z' : 'z',
782               (expected[d] & 0x2) ? 'C' : 'c',
783               (expected[d] & 0x1) ? 'V' : 'v',
784               expected[d]);
785        printf("  Found:    %c%c%c%c (0x%" PRIx8 ")\n",
786               (results[d] & 0x8) ? 'N' : 'n',
787               (results[d] & 0x4) ? 'Z' : 'z',
788               (results[d] & 0x2) ? 'C' : 'c',
789               (results[d] & 0x1) ? 'V' : 'v',
790               results[d]);
791        printf("\n");
792      }
793    }
794    VIXL_ASSERT(d == expected_length);
795    if (error_count > kErrorReportLimit) {
796      printf("%u other errors follow.\n", error_count - kErrorReportLimit);
797    }
798    VIXL_CHECK(error_count == 0);
799  }
800  delete[] results;
801}
802
803
804static void TestFPToFixed_Helper(TestFPToFixedHelper_t helper,
805                                 uintptr_t inputs, unsigned inputs_length,
806                                 uintptr_t results,
807                                 unsigned d_size, unsigned n_size) {
808  VIXL_ASSERT((d_size == kXRegSize) || (d_size == kWRegSize));
809  VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize));
810
811  SETUP();
812  START();
813
814  // Roll up the loop to keep the code size down.
815  Label loop_n;
816
817  Register out = x0;
818  Register inputs_base = x1;
819  Register length = w2;
820  Register index_n = w3;
821
822  const int n_index_shift =
823      (n_size == kDRegSize) ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
824
825  Register rd = (d_size == kXRegSize) ? x10 : w10;
826  FPRegister fn = (n_size == kDRegSize) ? d1 : s1;
827
828  __ Mov(out, results);
829  __ Mov(inputs_base, inputs);
830  __ Mov(length, inputs_length);
831
832  __ Mov(index_n, 0);
833  __ Bind(&loop_n);
834  __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
835
836  for (unsigned fbits = 0; fbits <= d_size; ++fbits) {
837    {
838      SingleEmissionCheckScope guard(&masm);
839      (masm.*helper)(rd, fn, fbits);
840    }
841    __ Str(rd, MemOperand(out, rd.SizeInBytes(), PostIndex));
842  }
843
844  __ Add(index_n, index_n, 1);
845  __ Cmp(index_n, inputs_length);
846  __ B(lo, &loop_n);
847
848  END();
849  RUN();
850  TEARDOWN();
851}
852
853
854static void TestFPToInt_Helper(TestFPToIntHelper_t helper, uintptr_t inputs,
855                               unsigned inputs_length, uintptr_t results,
856                               unsigned d_size, unsigned n_size) {
857  VIXL_ASSERT((d_size == kXRegSize) || (d_size == kWRegSize));
858  VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize));
859
860  SETUP();
861  START();
862
863  // Roll up the loop to keep the code size down.
864  Label loop_n;
865
866  Register out = x0;
867  Register inputs_base = x1;
868  Register length = w2;
869  Register index_n = w3;
870
871  const int n_index_shift =
872      (n_size == kDRegSize) ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
873
874  Register rd = (d_size == kXRegSize) ? x10 : w10;
875  FPRegister fn = (n_size == kDRegSize) ? d1 : s1;
876
877  __ Mov(out, results);
878  __ Mov(inputs_base, inputs);
879  __ Mov(length, inputs_length);
880
881  __ Mov(index_n, 0);
882  __ Bind(&loop_n);
883  __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
884
885  {
886    SingleEmissionCheckScope guard(&masm);
887    (masm.*helper)(rd, fn);
888  }
889  __ Str(rd, MemOperand(out, rd.SizeInBytes(), PostIndex));
890
891  __ Add(index_n, index_n, 1);
892  __ Cmp(index_n, inputs_length);
893  __ B(lo, &loop_n);
894
895  END();
896  RUN();
897  TEARDOWN();
898}
899
900
901// Test FP instructions.
902//  - The inputs[] array should be an array of rawbits representations of
903//    doubles or floats. This ensures that exact bit comparisons can be
904//    performed.
905//  - The expected[] array should be an array of signed integers.
906template <typename Tn, typename Td>
907static void TestFPToS(const char * name, TestFPToIntHelper_t helper,
908                      const Tn inputs[], unsigned inputs_length,
909                      const Td expected[], unsigned expected_length) {
910  VIXL_ASSERT(inputs_length > 0);
911
912  const unsigned results_length = inputs_length;
913  Td * results = new Td[results_length];
914
915  const unsigned d_bits = sizeof(Td) * 8;
916  const unsigned n_bits = sizeof(Tn) * 8;
917
918  TestFPToInt_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length,
919                     reinterpret_cast<uintptr_t>(results), d_bits, n_bits);
920
921  if (Test::sim_test_trace()) {
922    // Print the results.
923    printf("const int%u_t kExpected_%s[] = {\n", d_bits, name);
924    // There is no simple C++ literal for INT*_MIN that doesn't produce
925    // warnings, so we use an appropriate constant in that case instead.
926    // Deriving int_d_min in this way (rather than just checking INT64_MIN and
927    // the like) avoids warnings about comparing values with differing ranges.
928    const int64_t int_d_max = (UINT64_C(1) << (d_bits - 1)) - 1;
929    const int64_t int_d_min = -(int_d_max) - 1;
930    for (unsigned d = 0; d < results_length; d++) {
931      if (results[d] == int_d_min) {
932        printf("  -INT%u_C(%" PRId64 ") - 1,\n", d_bits, int_d_max);
933      } else {
934        // Some constants (such as those between INT32_MAX and UINT32_MAX)
935        // trigger compiler warnings. To avoid these warnings, use an
936        // appropriate macro to make the type explicit.
937        int64_t result_int64 = static_cast<int64_t>(results[d]);
938        if (result_int64 >= 0) {
939          printf("  INT%u_C(%" PRId64 "),\n", d_bits, result_int64);
940        } else {
941          printf("  -INT%u_C(%" PRId64 "),\n", d_bits, -result_int64);
942        }
943      }
944    }
945    printf("};\n");
946    printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
947  } else {
948    // Check the results.
949    VIXL_CHECK(expected_length == results_length);
950    unsigned error_count = 0;
951    unsigned d = 0;
952    for (unsigned n = 0; n < inputs_length; n++, d++) {
953      if (results[d] != expected[d]) {
954        if (++error_count > kErrorReportLimit) continue;
955
956        printf("%s 0x%0*" PRIx64 " (%s %g):\n",
957               name, n_bits / 4, static_cast<uint64_t>(inputs[n]),
958               name, rawbits_to_fp(inputs[n]));
959        printf("  Expected: 0x%0*" PRIx64 " (%" PRId64 ")\n",
960               d_bits / 4, static_cast<uint64_t>(expected[d]),
961               static_cast<int64_t>(expected[d]));
962        printf("  Found:    0x%0*" PRIx64 " (%" PRId64 ")\n",
963               d_bits / 4, static_cast<uint64_t>(results[d]),
964               static_cast<int64_t>(results[d]));
965        printf("\n");
966      }
967    }
968    VIXL_ASSERT(d == expected_length);
969    if (error_count > kErrorReportLimit) {
970      printf("%u other errors follow.\n", error_count - kErrorReportLimit);
971    }
972    VIXL_CHECK(error_count == 0);
973  }
974  delete[] results;
975}
976
977
978// Test FP instructions.
979//  - The inputs[] array should be an array of rawbits representations of
980//    doubles or floats. This ensures that exact bit comparisons can be
981//    performed.
982//  - The expected[] array should be an array of unsigned integers.
983template <typename Tn, typename Td>
984static void TestFPToU(const char * name, TestFPToIntHelper_t helper,
985                      const Tn inputs[], unsigned inputs_length,
986                      const Td expected[], unsigned expected_length) {
987  VIXL_ASSERT(inputs_length > 0);
988
989  const unsigned results_length = inputs_length;
990  Td * results = new Td[results_length];
991
992  const unsigned d_bits = sizeof(Td) * 8;
993  const unsigned n_bits = sizeof(Tn) * 8;
994
995  TestFPToInt_Helper(helper,
996                     reinterpret_cast<uintptr_t>(inputs), inputs_length,
997                     reinterpret_cast<uintptr_t>(results), d_bits, n_bits);
998
999  if (Test::sim_test_trace()) {
1000    // Print the results.
1001    printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name);
1002    for (unsigned d = 0; d < results_length; d++) {
1003      printf("  %" PRIu64 "u,\n", static_cast<uint64_t>(results[d]));
1004    }
1005    printf("};\n");
1006    printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
1007  } else {
1008    // Check the results.
1009    VIXL_CHECK(expected_length == results_length);
1010    unsigned error_count = 0;
1011    unsigned d = 0;
1012    for (unsigned n = 0; n < inputs_length; n++, d++) {
1013      if (results[d] != expected[d]) {
1014        if (++error_count > kErrorReportLimit) continue;
1015
1016        printf("%s 0x%0*" PRIx64 " (%s %g):\n",
1017               name, n_bits / 4, static_cast<uint64_t>(inputs[n]),
1018               name, rawbits_to_fp(inputs[n]));
1019        printf("  Expected: 0x%0*" PRIx64 " (%" PRIu64 ")\n",
1020               d_bits / 4, static_cast<uint64_t>(expected[d]),
1021               static_cast<uint64_t>(expected[d]));
1022        printf("  Found:    0x%0*" PRIx64 " (%" PRIu64 ")\n",
1023               d_bits / 4, static_cast<uint64_t>(results[d]),
1024               static_cast<uint64_t>(results[d]));
1025        printf("\n");
1026      }
1027    }
1028    VIXL_ASSERT(d == expected_length);
1029    if (error_count > kErrorReportLimit) {
1030      printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1031    }
1032    VIXL_CHECK(error_count == 0);
1033  }
1034  delete[] results;
1035}
1036
1037
1038// Test FP instructions.
1039//  - The inputs[] array should be an array of rawbits representations of
1040//    doubles or floats. This ensures that exact bit comparisons can be
1041//    performed.
1042//  - The expected[] array should be an array of signed integers.
1043template <typename Tn, typename Td>
1044static void TestFPToFixedS(const char * name, TestFPToFixedHelper_t helper,
1045                           const Tn inputs[], unsigned inputs_length,
1046                           const Td expected[], unsigned expected_length) {
1047  VIXL_ASSERT(inputs_length > 0);
1048
1049  const unsigned d_bits = sizeof(Td) * 8;
1050  const unsigned n_bits = sizeof(Tn) * 8;
1051
1052  const unsigned results_length = inputs_length * (d_bits + 1);
1053  Td * results = new Td[results_length];
1054
1055  TestFPToFixed_Helper(helper,
1056                       reinterpret_cast<uintptr_t>(inputs), inputs_length,
1057                       reinterpret_cast<uintptr_t>(results), d_bits, n_bits);
1058
1059  if (Test::sim_test_trace()) {
1060    // Print the results.
1061    printf("const int%u_t kExpected_%s[] = {\n", d_bits, name);
1062    // There is no simple C++ literal for INT*_MIN that doesn't produce
1063    // warnings, so we use an appropriate constant in that case instead.
1064    // Deriving int_d_min in this way (rather than just checking INT64_MIN and
1065    // the like) avoids warnings about comparing values with differing ranges.
1066    const int64_t int_d_max = (UINT64_C(1) << (d_bits - 1)) - 1;
1067    const int64_t int_d_min = -(int_d_max) - 1;
1068    for (unsigned d = 0; d < results_length; d++) {
1069      if (results[d] == int_d_min) {
1070        printf("  -INT%u_C(%" PRId64 ") - 1,\n", d_bits, int_d_max);
1071      } else {
1072        // Some constants (such as those between INT32_MAX and UINT32_MAX)
1073        // trigger compiler warnings. To avoid these warnings, use an
1074        // appropriate macro to make the type explicit.
1075        int64_t result_int64 = static_cast<int64_t>(results[d]);
1076        if (result_int64 >= 0) {
1077          printf("  INT%u_C(%" PRId64 "),\n", d_bits, result_int64);
1078        } else {
1079          printf("  -INT%u_C(%" PRId64 "),\n", d_bits, -result_int64);
1080        }
1081      }
1082    }
1083    printf("};\n");
1084    printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
1085  } else {
1086    // Check the results.
1087    VIXL_CHECK(expected_length == results_length);
1088    unsigned error_count = 0;
1089    unsigned d = 0;
1090    for (unsigned n = 0; n < inputs_length; n++) {
1091      for (unsigned fbits = 0; fbits <= d_bits; ++fbits, d++) {
1092        if (results[d] != expected[d]) {
1093          if (++error_count > kErrorReportLimit) continue;
1094
1095          printf("%s 0x%0*" PRIx64 " #%d (%s %g #%d):\n",
1096                 name, n_bits / 4, static_cast<uint64_t>(inputs[n]), fbits,
1097                 name, rawbits_to_fp(inputs[n]), fbits);
1098          printf("  Expected: 0x%0*" PRIx64 " (%" PRId64 ")\n",
1099                 d_bits / 4, static_cast<uint64_t>(expected[d]),
1100                 static_cast<int64_t>(expected[d]));
1101          printf("  Found:    0x%0*" PRIx64 " (%" PRId64 ")\n",
1102                 d_bits / 4, static_cast<uint64_t>(results[d]),
1103                 static_cast<int64_t>(results[d]));
1104          printf("\n");
1105        }
1106      }
1107    }
1108    VIXL_ASSERT(d == expected_length);
1109    if (error_count > kErrorReportLimit) {
1110      printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1111    }
1112    VIXL_CHECK(error_count == 0);
1113  }
1114  delete[] results;
1115}
1116
1117
1118// Test FP instructions.
1119//  - The inputs[] array should be an array of rawbits representations of
1120//    doubles or floats. This ensures that exact bit comparisons can be
1121//    performed.
1122//  - The expected[] array should be an array of unsigned integers.
1123template <typename Tn, typename Td>
1124static void TestFPToFixedU(const char * name, TestFPToFixedHelper_t helper,
1125                           const Tn inputs[], unsigned inputs_length,
1126                           const Td expected[], unsigned expected_length) {
1127  VIXL_ASSERT(inputs_length > 0);
1128
1129  const unsigned d_bits = sizeof(Td) * 8;
1130  const unsigned n_bits = sizeof(Tn) * 8;
1131
1132  const unsigned results_length = inputs_length * (d_bits + 1);
1133  Td * results = new Td[results_length];
1134
1135  TestFPToFixed_Helper(helper,
1136                       reinterpret_cast<uintptr_t>(inputs), inputs_length,
1137                       reinterpret_cast<uintptr_t>(results), d_bits, n_bits);
1138
1139  if (Test::sim_test_trace()) {
1140    // Print the results.
1141    printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name);
1142    for (unsigned d = 0; d < results_length; d++) {
1143      printf("  %" PRIu64 "u,\n", static_cast<uint64_t>(results[d]));
1144    }
1145    printf("};\n");
1146    printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
1147  } else {
1148    // Check the results.
1149    VIXL_CHECK(expected_length == results_length);
1150    unsigned error_count = 0;
1151    unsigned d = 0;
1152    for (unsigned n = 0; n < inputs_length; n++) {
1153      for (unsigned fbits = 0; fbits <= d_bits; ++fbits, d++) {
1154        if (results[d] != expected[d]) {
1155          if (++error_count > kErrorReportLimit) continue;
1156
1157          printf("%s 0x%0*" PRIx64 " #%d (%s %g #%d):\n",
1158                 name, n_bits / 4, static_cast<uint64_t>(inputs[n]), fbits,
1159                 name, rawbits_to_fp(inputs[n]), fbits);
1160          printf("  Expected: 0x%0*" PRIx64 " (%" PRIu64 ")\n",
1161                 d_bits / 4, static_cast<uint64_t>(expected[d]),
1162                 static_cast<uint64_t>(expected[d]));
1163          printf("  Found:    0x%0*" PRIx64 " (%" PRIu64 ")\n",
1164                 d_bits / 4, static_cast<uint64_t>(results[d]),
1165                 static_cast<uint64_t>(results[d]));
1166          printf("\n");
1167        }
1168      }
1169    }
1170    VIXL_ASSERT(d == expected_length);
1171    if (error_count > kErrorReportLimit) {
1172      printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1173    }
1174    VIXL_CHECK(error_count == 0);
1175  }
1176  delete[] results;
1177}
1178
1179
1180// ==== Tests for instructions of the form <INST> VReg, VReg. ====
1181
1182
1183static void Test1OpNEON_Helper(Test1OpNEONHelper_t helper,
1184                               uintptr_t inputs_n, unsigned inputs_n_length,
1185                               uintptr_t results,
1186                               VectorFormat vd_form,
1187                               VectorFormat vn_form) {
1188  VIXL_ASSERT(vd_form != kFormatUndefined);
1189  VIXL_ASSERT(vn_form != kFormatUndefined);
1190
1191  SETUP();
1192  START();
1193
1194  // Roll up the loop to keep the code size down.
1195  Label loop_n;
1196
1197  Register out = x0;
1198  Register inputs_n_base = x1;
1199  Register inputs_n_last_16bytes = x3;
1200  Register index_n = x5;
1201
1202  // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
1203  const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
1204  const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1205
1206  const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
1207  const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1208  const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1209  const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
1210  const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
1211
1212
1213  // These will be either a D- or a Q-register form, with a single lane
1214  // (for use in scalar load and store operations).
1215  VRegister vd = VRegister(0, vd_bits);
1216  VRegister vn = v1.V16B();
1217  VRegister vntmp = v3.V16B();
1218
1219  // These will have the correct format for use when calling 'helper'.
1220  VRegister vd_helper = VRegister(0, vd_bits, vd_lane_count);
1221  VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
1222
1223  // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
1224  VRegister vntmp_single = VRegister(3, vn_lane_bits);
1225
1226  __ Mov(out, results);
1227
1228  __ Mov(inputs_n_base, inputs_n);
1229  __ Mov(inputs_n_last_16bytes,
1230         inputs_n + (vn_lane_bytes * inputs_n_length) - 16);
1231
1232  __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
1233
1234  __ Mov(index_n, 0);
1235  __ Bind(&loop_n);
1236
1237  __ Ldr(vntmp_single, MemOperand(inputs_n_base, index_n, LSL,
1238                                  vn_lane_bytes_log2));
1239  __ Ext(vn, vn, vntmp, vn_lane_bytes);
1240
1241  // Set the destination to zero.
1242  // TODO: Setting the destination to values other than zero
1243  //       might be a better test for instructions such as sqxtn2
1244  //       which may leave parts of V registers unchanged.
1245  __ Movi(vd.V16B(), 0);
1246
1247  {
1248    SingleEmissionCheckScope guard(&masm);
1249    (masm.*helper)(vd_helper, vn_helper);
1250  }
1251  __ Str(vd, MemOperand(out, vd.SizeInBytes(), PostIndex));
1252
1253  __ Add(index_n, index_n, 1);
1254  __ Cmp(index_n, inputs_n_length);
1255  __ B(lo, &loop_n);
1256
1257  END();
1258  RUN();
1259  TEARDOWN();
1260}
1261
1262
1263// Test NEON instructions. The inputs_*[] and expected[] arrays should be
1264// arrays of rawbit representation of input values. This ensures that
1265// exact bit comparisons can be performed.
1266template <typename Td, typename Tn>
1267static void Test1OpNEON(const char * name, Test1OpNEONHelper_t helper,
1268                        const Tn inputs_n[], unsigned inputs_n_length,
1269                        const Td expected[], unsigned expected_length,
1270                        VectorFormat vd_form,
1271                        VectorFormat vn_form) {
1272  VIXL_ASSERT(inputs_n_length > 0);
1273
1274  const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1275  const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1276  const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1277
1278  const unsigned results_length = inputs_n_length;
1279  Td* results = new Td[results_length * vd_lane_count];
1280  const unsigned lane_bit = sizeof(Td) * 8;
1281  const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
1282
1283  Test1OpNEON_Helper(helper,
1284                     reinterpret_cast<uintptr_t>(inputs_n),
1285                     inputs_n_length,
1286                     reinterpret_cast<uintptr_t>(results),
1287                     vd_form, vn_form);
1288
1289  if (Test::sim_test_trace()) {
1290    // Print the results.
1291    printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
1292    for (unsigned iteration = 0; iteration < results_length; iteration++) {
1293      printf(" ");
1294      // Output a separate result for each element of the result vector.
1295      for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1296        unsigned index = lane + (iteration * vd_lane_count);
1297        printf(" 0x%0*" PRIx64 ",",
1298               lane_len_in_hex,
1299               static_cast<uint64_t>(results[index]));
1300      }
1301      printf("\n");
1302    }
1303
1304    printf("};\n");
1305    printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
1306           name,
1307           results_length);
1308  } else {
1309    // Check the results.
1310    VIXL_CHECK(expected_length == results_length);
1311    unsigned error_count = 0;
1312    unsigned d = 0;
1313    const char* padding = "                    ";
1314    VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
1315    for (unsigned n = 0; n < inputs_n_length; n++, d++) {
1316      bool error_in_vector = false;
1317
1318      for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1319        unsigned output_index = (n * vd_lane_count) + lane;
1320
1321        if (results[output_index] != expected[output_index]) {
1322          error_in_vector = true;
1323          break;
1324        }
1325      }
1326
1327      if (error_in_vector && (++error_count <= kErrorReportLimit)) {
1328        printf("%s\n", name);
1329        printf(" Vn%.*s| Vd%.*s| Expected\n",
1330                lane_len_in_hex+1, padding,
1331                lane_len_in_hex+1, padding);
1332
1333        const unsigned first_index_n =
1334          inputs_n_length - (16 / vn_lane_bytes) + n + 1;
1335
1336        for (unsigned lane = 0;
1337             lane < std::max(vd_lane_count, vn_lane_count);
1338             lane++) {
1339          unsigned output_index = (n * vd_lane_count) + lane;
1340          unsigned input_index_n = (first_index_n + lane) % inputs_n_length;
1341
1342          printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " "
1343                  "| 0x%0*" PRIx64 "\n",
1344                  results[output_index] != expected[output_index] ? '*' : ' ',
1345                  lane_len_in_hex,
1346                  static_cast<uint64_t>(inputs_n[input_index_n]),
1347                  lane_len_in_hex,
1348                  static_cast<uint64_t>(results[output_index]),
1349                  lane_len_in_hex,
1350                  static_cast<uint64_t>(expected[output_index]));
1351        }
1352      }
1353    }
1354    VIXL_ASSERT(d == expected_length);
1355    if (error_count > kErrorReportLimit) {
1356      printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1357    }
1358    VIXL_CHECK(error_count == 0);
1359  }
1360  delete[] results;
1361}
1362
1363
1364// ==== Tests for instructions of the form <mnemonic> <V><d>, <Vn>.<T> ====
1365//      where <V> is one of B, H, S or D registers.
1366//      e.g. saddlv H1, v0.8B
1367
1368// TODO: Change tests to store all lanes of the resulting V register.
1369//       Some tests store all 128 bits of the resulting V register to
1370//       check the simulator's behaviour on the rest of the register.
1371//       This is better than storing the affected lanes only.
1372//       Change any tests such as the 'Across' template to do the same.
1373
1374static void Test1OpAcrossNEON_Helper(Test1OpNEONHelper_t helper,
1375                                     uintptr_t inputs_n,
1376                                     unsigned inputs_n_length,
1377                                     uintptr_t results,
1378                                     VectorFormat vd_form,
1379                                     VectorFormat vn_form) {
1380  VIXL_ASSERT(vd_form != kFormatUndefined);
1381  VIXL_ASSERT(vn_form != kFormatUndefined);
1382
1383  SETUP();
1384  START();
1385
1386  // Roll up the loop to keep the code size down.
1387  Label loop_n;
1388
1389  Register out = x0;
1390  Register inputs_n_base = x1;
1391  Register inputs_n_last_vector = x3;
1392  Register index_n = x5;
1393
1394  // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
1395  const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
1396
1397  const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
1398  const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1399  const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1400  const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
1401  const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
1402
1403
1404  // These will be either a D- or a Q-register form, with a single lane
1405  // (for use in scalar load and store operations).
1406  VRegister vd = VRegister(0, vd_bits);
1407  VRegister vn = VRegister(1, vn_bits);
1408  VRegister vntmp = VRegister(3, vn_bits);
1409
1410  // These will have the correct format for use when calling 'helper'.
1411  VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
1412
1413  // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
1414  VRegister vntmp_single = VRegister(3, vn_lane_bits);
1415
1416  // Same registers for use in the 'ext' instructions.
1417  VRegister vn_ext = (kDRegSize == vn_bits) ? vn.V8B() : vn.V16B();
1418  VRegister vntmp_ext = (kDRegSize == vn_bits) ? vntmp.V8B() : vntmp.V16B();
1419
1420  __ Mov(out, results);
1421
1422  __ Mov(inputs_n_base, inputs_n);
1423  __ Mov(inputs_n_last_vector,
1424         inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count));
1425
1426  __ Ldr(vn, MemOperand(inputs_n_last_vector));
1427
1428  __ Mov(index_n, 0);
1429  __ Bind(&loop_n);
1430
1431  __ Ldr(vntmp_single, MemOperand(inputs_n_base, index_n, LSL,
1432                                  vn_lane_bytes_log2));
1433  __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes);
1434
1435  // Set the destination to zero for tests such as '[r]shrn2'.
1436  // TODO: Setting the destination to values other than zero
1437  //       might be a better test for instructions such as sqxtn2
1438  //       which may leave parts of V registers unchanged.
1439  __ Movi(vd.V16B(), 0);
1440
1441  {
1442    SingleEmissionCheckScope guard(&masm);
1443    (masm.*helper)(vd, vn_helper);
1444  }
1445  __ Str(vd, MemOperand(out, vd.SizeInBytes(), PostIndex));
1446
1447  __ Add(index_n, index_n, 1);
1448  __ Cmp(index_n, inputs_n_length);
1449  __ B(lo, &loop_n);
1450
1451  END();
1452  RUN();
1453  TEARDOWN();
1454}
1455
1456// Test NEON instructions. The inputs_*[] and expected[] arrays should be
1457// arrays of rawbit representation of input values. This ensures that
1458// exact bit comparisons can be performed.
1459template <typename Td, typename Tn>
1460static void Test1OpAcrossNEON(const char * name, Test1OpNEONHelper_t helper,
1461                              const Tn inputs_n[], unsigned inputs_n_length,
1462                              const Td expected[], unsigned expected_length,
1463                              VectorFormat vd_form,
1464                              VectorFormat vn_form) {
1465  VIXL_ASSERT(inputs_n_length > 0);
1466
1467  const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1468
1469  const unsigned results_length = inputs_n_length;
1470  Td* results = new Td[results_length * vd_lane_count];
1471  const unsigned lane_bit = sizeof(Td) * 8;
1472  const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
1473
1474  Test1OpAcrossNEON_Helper(helper,
1475                           reinterpret_cast<uintptr_t>(inputs_n),
1476                           inputs_n_length,
1477                           reinterpret_cast<uintptr_t>(results),
1478                           vd_form, vn_form);
1479
1480  if (Test::sim_test_trace()) {
1481    // Print the results.
1482    printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
1483    for (unsigned iteration = 0; iteration < results_length; iteration++) {
1484      printf(" ");
1485      // Output a separate result for each element of the result vector.
1486      for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1487        unsigned index = lane + (iteration * vd_lane_count);
1488        printf(" 0x%0*" PRIx64 ",",
1489               lane_len_in_hex,
1490               static_cast<uint64_t>(results[index]));
1491      }
1492      printf("\n");
1493    }
1494
1495    printf("};\n");
1496    printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
1497           name,
1498           results_length);
1499  } else {
1500    // Check the results.
1501    VIXL_CHECK(expected_length == results_length);
1502    unsigned error_count = 0;
1503    unsigned d = 0;
1504    const char* padding = "                    ";
1505    VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
1506    for (unsigned n = 0; n < inputs_n_length; n++, d++) {
1507      bool error_in_vector = false;
1508
1509      for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1510        unsigned output_index = (n * vd_lane_count) + lane;
1511
1512        if (results[output_index] != expected[output_index]) {
1513          error_in_vector = true;
1514          break;
1515        }
1516      }
1517
1518      if (error_in_vector && (++error_count <= kErrorReportLimit)) {
1519        const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1520
1521        printf("%s\n", name);
1522        printf(" Vn%.*s| Vd%.*s| Expected\n",
1523                lane_len_in_hex+1, padding,
1524                lane_len_in_hex+1, padding);
1525
1526        // TODO: In case of an error, all tests print out as many elements as
1527        //       there are lanes in the output or input vectors. This way
1528        //       the viewer can read all the values that were needed for the
1529        //       operation but the output contains also unnecessary values.
1530        //       These prints can be improved according to the arguments
1531        //       passed to test functions.
1532        //       This output for the 'Across' category has the required
1533        //       modifications.
1534        for (unsigned lane = 0; lane < vn_lane_count; lane++) {
1535          unsigned output_index = n * vd_lane_count;
1536          unsigned input_index_n = (inputs_n_length - vn_lane_count +
1537              n + 1 + lane) % inputs_n_length;
1538
1539          if (vn_lane_count-1 == lane) {  // Is this the last lane?
1540            // Print the result element(s) in the last lane only.
1541            printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " "
1542                  "| 0x%0*" PRIx64 "\n",
1543                  results[output_index] != expected[output_index] ? '*' : ' ',
1544                  lane_len_in_hex,
1545                  static_cast<uint64_t>(inputs_n[input_index_n]),
1546                  lane_len_in_hex,
1547                  static_cast<uint64_t>(results[output_index]),
1548                  lane_len_in_hex,
1549                  static_cast<uint64_t>(expected[output_index]));
1550          } else {
1551            printf(" 0x%0*" PRIx64 " |   %.*s|   %.*s\n",
1552                  lane_len_in_hex,
1553                  static_cast<uint64_t>(inputs_n[input_index_n]),
1554                  lane_len_in_hex+1, padding,
1555                  lane_len_in_hex+1, padding);
1556          }
1557        }
1558      }
1559    }
1560    VIXL_ASSERT(d == expected_length);
1561    if (error_count > kErrorReportLimit) {
1562      printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1563    }
1564    VIXL_CHECK(error_count == 0);
1565  }
1566  delete[] results;
1567}
1568
1569
1570// ==== Tests for instructions of the form <INST> VReg, VReg, VReg. ====
1571
1572// TODO: Iterate over inputs_d once the traces file is split.
1573
1574static void Test2OpNEON_Helper(Test2OpNEONHelper_t helper,
1575                               uintptr_t inputs_d,
1576                               uintptr_t inputs_n, unsigned inputs_n_length,
1577                               uintptr_t inputs_m, unsigned inputs_m_length,
1578                               uintptr_t results,
1579                               VectorFormat vd_form,
1580                               VectorFormat vn_form,
1581                               VectorFormat vm_form) {
1582  VIXL_ASSERT(vd_form != kFormatUndefined);
1583  VIXL_ASSERT(vn_form != kFormatUndefined);
1584  VIXL_ASSERT(vm_form != kFormatUndefined);
1585
1586  SETUP();
1587  START();
1588
1589  // Roll up the loop to keep the code size down.
1590  Label loop_n, loop_m;
1591
1592  Register out = x0;
1593  Register inputs_n_base = x1;
1594  Register inputs_m_base = x2;
1595  Register inputs_d_base = x3;
1596  Register inputs_n_last_16bytes = x4;
1597  Register inputs_m_last_16bytes = x5;
1598  Register index_n = x6;
1599  Register index_m = x7;
1600
1601  // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
1602  const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
1603  const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1604
1605  const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
1606  const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1607  const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1608  const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
1609  const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
1610
1611  const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form);
1612  const unsigned vm_lane_count = LaneCountFromFormat(vm_form);
1613  const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form);
1614  const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form);
1615  const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form);
1616
1617
1618  // Always load and store 128 bits regardless of the format.
1619  VRegister vd = v0.V16B();
1620  VRegister vn = v1.V16B();
1621  VRegister vm = v2.V16B();
1622  VRegister vntmp = v3.V16B();
1623  VRegister vmtmp = v4.V16B();
1624  VRegister vres = v5.V16B();
1625
1626  // These will have the correct format for calling the 'helper'.
1627  VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
1628  VRegister vm_helper = VRegister(2, vm_bits, vm_lane_count);
1629  VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count);
1630
1631  // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
1632  VRegister vntmp_single = VRegister(3, vn_lane_bits);
1633  VRegister vmtmp_single = VRegister(4, vm_lane_bits);
1634
1635  __ Mov(out, results);
1636
1637  __ Mov(inputs_d_base, inputs_d);
1638
1639  __ Mov(inputs_n_base, inputs_n);
1640  __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16));
1641  __ Mov(inputs_m_base, inputs_m);
1642  __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16));
1643
1644  __ Ldr(vd, MemOperand(inputs_d_base));
1645  __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
1646  __ Ldr(vm, MemOperand(inputs_m_last_16bytes));
1647
1648  __ Mov(index_n, 0);
1649  __ Bind(&loop_n);
1650
1651  __ Ldr(vntmp_single, MemOperand(inputs_n_base, index_n, LSL,
1652                                  vn_lane_bytes_log2));
1653  __ Ext(vn, vn, vntmp, vn_lane_bytes);
1654
1655  __ Mov(index_m, 0);
1656  __ Bind(&loop_m);
1657
1658  __ Ldr(vmtmp_single, MemOperand(inputs_m_base, index_m, LSL,
1659                                  vm_lane_bytes_log2));
1660  __ Ext(vm, vm, vmtmp, vm_lane_bytes);
1661
1662  __ Mov(vres, vd);
1663  {
1664    SingleEmissionCheckScope guard(&masm);
1665    (masm.*helper)(vres_helper, vn_helper, vm_helper);
1666  }
1667  __ Str(vres, MemOperand(out, vd.SizeInBytes(), PostIndex));
1668
1669  __ Add(index_m, index_m, 1);
1670  __ Cmp(index_m, inputs_m_length);
1671  __ B(lo, &loop_m);
1672
1673  __ Add(index_n, index_n, 1);
1674  __ Cmp(index_n, inputs_n_length);
1675  __ B(lo, &loop_n);
1676
1677  END();
1678  RUN();
1679  TEARDOWN();
1680}
1681
1682
1683// Test NEON instructions. The inputs_*[] and expected[] arrays should be
1684// arrays of rawbit representation of input values. This ensures that
1685// exact bit comparisons can be performed.
1686template <typename Td, typename Tn, typename Tm>
1687static void Test2OpNEON(const char * name, Test2OpNEONHelper_t helper,
1688                        const Td inputs_d[],
1689                        const Tn inputs_n[], unsigned inputs_n_length,
1690                        const Tm inputs_m[], unsigned inputs_m_length,
1691                        const Td expected[], unsigned expected_length,
1692                        VectorFormat vd_form,
1693                        VectorFormat vn_form,
1694                        VectorFormat vm_form) {
1695  VIXL_ASSERT(inputs_n_length > 0 && inputs_m_length > 0);
1696
1697  const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form);
1698
1699  const unsigned results_length = inputs_n_length * inputs_m_length;
1700  Td* results = new Td[results_length * vd_lane_count];
1701  const unsigned lane_bit = sizeof(Td) * 8;
1702  const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tm>();
1703
1704  Test2OpNEON_Helper(helper,
1705                     reinterpret_cast<uintptr_t>(inputs_d),
1706                     reinterpret_cast<uintptr_t>(inputs_n), inputs_n_length,
1707                     reinterpret_cast<uintptr_t>(inputs_m), inputs_m_length,
1708                     reinterpret_cast<uintptr_t>(results),
1709                     vd_form, vn_form, vm_form);
1710
1711  if (Test::sim_test_trace()) {
1712    // Print the results.
1713    printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
1714    for (unsigned iteration = 0; iteration < results_length; iteration++) {
1715      printf(" ");
1716      // Output a separate result for each element of the result vector.
1717      for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1718        unsigned index = lane + (iteration * vd_lane_count);
1719        printf(" 0x%0*" PRIx64 ",",
1720               lane_len_in_hex,
1721               static_cast<uint64_t>(results[index]));
1722      }
1723      printf("\n");
1724    }
1725
1726    printf("};\n");
1727    printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
1728           name,
1729           results_length);
1730  } else {
1731    // Check the results.
1732    VIXL_CHECK(expected_length == results_length);
1733    unsigned error_count = 0;
1734    unsigned d = 0;
1735    const char* padding = "                    ";
1736    VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
1737    for (unsigned n = 0; n < inputs_n_length; n++) {
1738      for (unsigned m = 0; m < inputs_m_length; m++, d++) {
1739        bool error_in_vector = false;
1740
1741        for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1742          unsigned output_index = (n * inputs_m_length * vd_lane_count) +
1743              (m * vd_lane_count) + lane;
1744
1745          if (results[output_index] != expected[output_index]) {
1746            error_in_vector = true;
1747            break;
1748          }
1749        }
1750
1751        if (error_in_vector && (++error_count <= kErrorReportLimit)) {
1752          printf("%s\n", name);
1753          printf(" Vd%.*s| Vn%.*s| Vm%.*s| Vd%.*s| Expected\n",
1754                 lane_len_in_hex+1, padding,
1755                 lane_len_in_hex+1, padding,
1756                 lane_len_in_hex+1, padding,
1757                 lane_len_in_hex+1, padding);
1758
1759          for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1760            unsigned output_index = (n * inputs_m_length * vd_lane_count) +
1761                (m * vd_lane_count) + lane;
1762            unsigned input_index_n = (inputs_n_length - vd_lane_count +
1763                n + 1 + lane) % inputs_n_length;
1764            unsigned input_index_m = (inputs_m_length - vd_lane_count +
1765                m + 1 + lane) % inputs_m_length;
1766
1767            printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 " "
1768                   "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
1769                   results[output_index] != expected[output_index] ? '*' : ' ',
1770                   lane_len_in_hex,
1771                   static_cast<uint64_t>(inputs_d[lane]),
1772                   lane_len_in_hex,
1773                   static_cast<uint64_t>(inputs_n[input_index_n]),
1774                   lane_len_in_hex,
1775                   static_cast<uint64_t>(inputs_m[input_index_m]),
1776                   lane_len_in_hex,
1777                   static_cast<uint64_t>(results[output_index]),
1778                   lane_len_in_hex,
1779                   static_cast<uint64_t>(expected[output_index]));
1780          }
1781        }
1782      }
1783    }
1784    VIXL_ASSERT(d == expected_length);
1785    if (error_count > kErrorReportLimit) {
1786      printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1787    }
1788    VIXL_CHECK(error_count == 0);
1789  }
1790  delete[] results;
1791}
1792
1793
1794// ==== Tests for instructions of the form <INST> Vd, Vn, Vm[<#index>]. ====
1795
1796static void TestByElementNEON_Helper(TestByElementNEONHelper_t helper,
1797                                     uintptr_t inputs_d,
1798                                     uintptr_t inputs_n,
1799                                     unsigned inputs_n_length,
1800                                     uintptr_t inputs_m,
1801                                     unsigned inputs_m_length,
1802                                     const int indices[],
1803                                     unsigned indices_length,
1804                                     uintptr_t results,
1805                                     VectorFormat vd_form,
1806                                     VectorFormat vn_form,
1807                                     VectorFormat vm_form) {
1808  VIXL_ASSERT(vd_form != kFormatUndefined);
1809  VIXL_ASSERT(vn_form != kFormatUndefined);
1810  VIXL_ASSERT(vm_form != kFormatUndefined);
1811
1812  SETUP();
1813  START();
1814
1815  // Roll up the loop to keep the code size down.
1816  Label loop_n, loop_m;
1817
1818  Register out = x0;
1819  Register inputs_n_base = x1;
1820  Register inputs_m_base = x2;
1821  Register inputs_d_base = x3;
1822  Register inputs_n_last_16bytes = x4;
1823  Register inputs_m_last_16bytes = x5;
1824  Register index_n = x6;
1825  Register index_m = x7;
1826
1827  // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
1828  const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
1829  const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1830
1831  const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
1832  const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1833  const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1834  const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
1835  const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
1836
1837  const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form);
1838  const unsigned vm_lane_count = LaneCountFromFormat(vm_form);
1839  const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form);
1840  const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form);
1841  const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form);
1842
1843
1844  // Always load and store 128 bits regardless of the format.
1845  VRegister vd = v0.V16B();
1846  VRegister vn = v1.V16B();
1847  VRegister vm = v2.V16B();
1848  VRegister vntmp = v3.V16B();
1849  VRegister vmtmp = v4.V16B();
1850  VRegister vres = v5.V16B();
1851
1852  // These will have the correct format for calling the 'helper'.
1853  VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
1854  VRegister vm_helper = VRegister(2, vm_bits, vm_lane_count);
1855  VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count);
1856
1857  // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
1858  VRegister vntmp_single = VRegister(3, vn_lane_bits);
1859  VRegister vmtmp_single = VRegister(4, vm_lane_bits);
1860
1861  __ Mov(out, results);
1862
1863  __ Mov(inputs_d_base, inputs_d);
1864
1865  __ Mov(inputs_n_base, inputs_n);
1866  __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16));
1867  __ Mov(inputs_m_base, inputs_m);
1868  __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16));
1869
1870  __ Ldr(vd, MemOperand(inputs_d_base));
1871  __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
1872  __ Ldr(vm, MemOperand(inputs_m_last_16bytes));
1873
1874  __ Mov(index_n, 0);
1875  __ Bind(&loop_n);
1876
1877  __ Ldr(vntmp_single, MemOperand(inputs_n_base, index_n, LSL,
1878                                  vn_lane_bytes_log2));
1879  __ Ext(vn, vn, vntmp, vn_lane_bytes);
1880
1881  __ Mov(index_m, 0);
1882  __ Bind(&loop_m);
1883
1884  __ Ldr(vmtmp_single, MemOperand(inputs_m_base, index_m, LSL,
1885                                  vm_lane_bytes_log2));
1886  __ Ext(vm, vm, vmtmp, vm_lane_bytes);
1887
1888  __ Mov(vres, vd);
1889  {
1890    for (unsigned i = 0; i < indices_length; i++) {
1891      {
1892        SingleEmissionCheckScope guard(&masm);
1893        (masm.*helper)(vres_helper, vn_helper, vm_helper, indices[i]);
1894      }
1895      __ Str(vres, MemOperand(out, vd.SizeInBytes(), PostIndex));
1896    }
1897  }
1898
1899  __ Add(index_m, index_m, 1);
1900  __ Cmp(index_m, inputs_m_length);
1901  __ B(lo, &loop_m);
1902
1903  __ Add(index_n, index_n, 1);
1904  __ Cmp(index_n, inputs_n_length);
1905  __ B(lo, &loop_n);
1906
1907  END();
1908  RUN();
1909  TEARDOWN();
1910}
1911
1912
1913
1914// Test NEON instructions. The inputs_*[] and expected[] arrays should be
1915// arrays of rawbit representation of input values. This ensures that
1916// exact bit comparisons can be performed.
1917template <typename Td, typename Tn, typename Tm>
1918static void TestByElementNEON(const char *name,
1919                              TestByElementNEONHelper_t helper,
1920                              const Td inputs_d[],
1921                              const Tn inputs_n[], unsigned inputs_n_length,
1922                              const Tm inputs_m[], unsigned inputs_m_length,
1923                              const int indices[], unsigned indices_length,
1924                              const Td expected[], unsigned expected_length,
1925                              VectorFormat vd_form,
1926                              VectorFormat vn_form,
1927                              VectorFormat vm_form) {
1928  VIXL_ASSERT(inputs_n_length > 0);
1929  VIXL_ASSERT(inputs_m_length > 0);
1930  VIXL_ASSERT(indices_length > 0);
1931
1932  const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form);
1933
1934  const unsigned results_length = inputs_n_length * inputs_m_length *
1935                                  indices_length;
1936  Td* results = new Td[results_length * vd_lane_count];
1937  const unsigned lane_bit = sizeof(Td) * 8;
1938  const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tm>();
1939
1940  TestByElementNEON_Helper(helper,
1941    reinterpret_cast<uintptr_t>(inputs_d),
1942    reinterpret_cast<uintptr_t>(inputs_n), inputs_n_length,
1943    reinterpret_cast<uintptr_t>(inputs_m), inputs_m_length,
1944    indices, indices_length,
1945    reinterpret_cast<uintptr_t>(results),
1946    vd_form, vn_form, vm_form);
1947
1948  if (Test::sim_test_trace()) {
1949    // Print the results.
1950    printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
1951    for (unsigned iteration = 0; iteration < results_length; iteration++) {
1952      printf(" ");
1953      // Output a separate result for each element of the result vector.
1954      for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1955        unsigned index = lane + (iteration * vd_lane_count);
1956        printf(" 0x%0*" PRIx64 ",",
1957               lane_len_in_hex,
1958               static_cast<uint64_t>(results[index]));
1959      }
1960      printf("\n");
1961    }
1962
1963    printf("};\n");
1964    printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
1965           name,
1966           results_length);
1967  } else {
1968    // Check the results.
1969    VIXL_CHECK(expected_length == results_length);
1970    unsigned error_count = 0;
1971    unsigned d = 0;
1972    const char* padding = "                    ";
1973    VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
1974    for (unsigned n = 0; n < inputs_n_length; n++) {
1975      for (unsigned m = 0; m < inputs_m_length; m++) {
1976        for (unsigned index = 0; index < indices_length; index++, d++) {
1977          bool error_in_vector = false;
1978
1979          for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1980            unsigned output_index =
1981                (n * inputs_m_length * indices_length * vd_lane_count) +
1982                (m * indices_length * vd_lane_count) +
1983                (index * vd_lane_count) + lane;
1984
1985            if (results[output_index] != expected[output_index]) {
1986              error_in_vector = true;
1987              break;
1988            }
1989          }
1990
1991          if (error_in_vector && (++error_count <= kErrorReportLimit)) {
1992            printf("%s\n", name);
1993            printf(" Vd%.*s| Vn%.*s| Vm%.*s| Index | Vd%.*s| Expected\n",
1994                  lane_len_in_hex+1, padding,
1995                  lane_len_in_hex+1, padding,
1996                  lane_len_in_hex+1, padding,
1997                  lane_len_in_hex+1, padding);
1998
1999            for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2000              unsigned output_index =
2001                  (n * inputs_m_length * indices_length * vd_lane_count) +
2002                  (m * indices_length * vd_lane_count) +
2003                  (index * vd_lane_count) + lane;
2004              unsigned input_index_n = (inputs_n_length - vd_lane_count +
2005                  n + 1 + lane) % inputs_n_length;
2006              unsigned input_index_m = (inputs_m_length - vd_lane_count +
2007                  m + 1 + lane) % inputs_m_length;
2008
2009              printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 " "
2010                "| [%3d] | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
2011                results[output_index] != expected[output_index] ? '*' : ' ',
2012                lane_len_in_hex,
2013                static_cast<uint64_t>(inputs_d[lane]),
2014                lane_len_in_hex,
2015                static_cast<uint64_t>(inputs_n[input_index_n]),
2016                lane_len_in_hex,
2017                static_cast<uint64_t>(inputs_m[input_index_m]),
2018                indices[index],
2019                lane_len_in_hex,
2020                static_cast<uint64_t>(results[output_index]),
2021                lane_len_in_hex,
2022                static_cast<uint64_t>(expected[output_index]));
2023            }
2024          }
2025        }
2026      }
2027    }
2028    VIXL_ASSERT(d == expected_length);
2029    if (error_count > kErrorReportLimit) {
2030      printf("%u other errors follow.\n", error_count - kErrorReportLimit);
2031    }
2032    VIXL_CHECK(error_count == 0);
2033  }
2034  delete[] results;
2035}
2036
2037
2038// ==== Tests for instructions of the form <INST> VReg, VReg, #Immediate. ====
2039
2040
2041template <typename Tm>
2042void Test2OpImmNEON_Helper(
2043    typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,
2044    uintptr_t inputs_n,
2045    unsigned inputs_n_length,
2046    const Tm inputs_m[],
2047    unsigned inputs_m_length,
2048    uintptr_t results,
2049    VectorFormat vd_form,
2050    VectorFormat vn_form) {
2051  VIXL_ASSERT(vd_form != kFormatUndefined &&
2052              vn_form != kFormatUndefined);
2053
2054  SETUP();
2055  START();
2056
2057  // Roll up the loop to keep the code size down.
2058  Label loop_n;
2059
2060  Register out = x0;
2061  Register inputs_n_base = x1;
2062  Register inputs_n_last_16bytes = x3;
2063  Register index_n = x5;
2064
2065  // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
2066  const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
2067  const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2068
2069  const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
2070  const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
2071  const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
2072  const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
2073  const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
2074
2075
2076  // These will be either a D- or a Q-register form, with a single lane
2077  // (for use in scalar load and store operations).
2078  VRegister vd = VRegister(0, vd_bits);
2079  VRegister vn = v1.V16B();
2080  VRegister vntmp = v3.V16B();
2081
2082  // These will have the correct format for use when calling 'helper'.
2083  VRegister vd_helper = VRegister(0, vd_bits, vd_lane_count);
2084  VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
2085
2086  // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
2087  VRegister vntmp_single = VRegister(3, vn_lane_bits);
2088
2089  __ Mov(out, results);
2090
2091  __ Mov(inputs_n_base, inputs_n);
2092  __ Mov(inputs_n_last_16bytes,
2093         inputs_n + (vn_lane_bytes * inputs_n_length) - 16);
2094
2095  __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
2096
2097  __ Mov(index_n, 0);
2098  __ Bind(&loop_n);
2099
2100  __ Ldr(vntmp_single, MemOperand(inputs_n_base, index_n, LSL,
2101                                  vn_lane_bytes_log2));
2102  __ Ext(vn, vn, vntmp, vn_lane_bytes);
2103
2104  // Set the destination to zero for tests such as '[r]shrn2'.
2105  // TODO: Setting the destination to values other than zero might be a better
2106  //       test for shift and accumulate instructions (srsra/ssra/usra/ursra).
2107  __ Movi(vd.V16B(), 0);
2108
2109  {
2110    for (unsigned i = 0; i < inputs_m_length; i++) {
2111      {
2112        SingleEmissionCheckScope guard(&masm);
2113        (masm.*helper)(vd_helper, vn_helper, inputs_m[i]);
2114      }
2115      __ Str(vd, MemOperand(out, vd.SizeInBytes(), PostIndex));
2116    }
2117  }
2118
2119  __ Add(index_n, index_n, 1);
2120  __ Cmp(index_n, inputs_n_length);
2121  __ B(lo, &loop_n);
2122
2123  END();
2124  RUN();
2125  TEARDOWN();
2126}
2127
2128
2129// Test NEON instructions. The inputs_*[] and expected[] arrays should be
2130// arrays of rawbit representation of input values. This ensures that
2131// exact bit comparisons can be performed.
2132template <typename Td, typename Tn, typename Tm>
2133static void Test2OpImmNEON(
2134    const char * name,
2135    typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,
2136    const Tn inputs_n[], unsigned inputs_n_length,
2137    const Tm inputs_m[], unsigned inputs_m_length,
2138    const Td expected[], unsigned expected_length,
2139    VectorFormat vd_form,
2140    VectorFormat vn_form) {
2141  VIXL_ASSERT(inputs_n_length > 0 && inputs_m_length > 0);
2142
2143  const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2144  const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
2145  const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
2146
2147  const unsigned results_length = inputs_n_length * inputs_m_length;
2148  Td* results = new Td[results_length * vd_lane_count];
2149  const unsigned lane_bit = sizeof(Td) * 8;
2150  const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
2151
2152  Test2OpImmNEON_Helper(helper,
2153                        reinterpret_cast<uintptr_t>(inputs_n), inputs_n_length,
2154                        inputs_m, inputs_m_length,
2155                        reinterpret_cast<uintptr_t>(results),
2156                        vd_form, vn_form);
2157
2158  if (Test::sim_test_trace()) {
2159    // Print the results.
2160    printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
2161    for (unsigned iteration = 0; iteration < results_length; iteration++) {
2162      printf(" ");
2163      // Output a separate result for each element of the result vector.
2164      for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2165        unsigned index = lane + (iteration * vd_lane_count);
2166        printf(" 0x%0*" PRIx64 ",",
2167               lane_len_in_hex,
2168               static_cast<uint64_t>(results[index]));
2169      }
2170      printf("\n");
2171    }
2172
2173    printf("};\n");
2174    printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
2175           name,
2176           results_length);
2177  } else {
2178    // Check the results.
2179    VIXL_CHECK(expected_length == results_length);
2180    unsigned error_count = 0;
2181    unsigned d = 0;
2182    const char* padding = "                    ";
2183    VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
2184    for (unsigned n = 0; n < inputs_n_length; n++) {
2185      for (unsigned m = 0; m < inputs_m_length; m++, d++) {
2186        bool error_in_vector = false;
2187
2188        for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2189          unsigned output_index = (n * inputs_m_length * vd_lane_count) +
2190              (m * vd_lane_count) + lane;
2191
2192          if (results[output_index] != expected[output_index]) {
2193            error_in_vector = true;
2194            break;
2195          }
2196        }
2197
2198        if (error_in_vector && (++error_count <= kErrorReportLimit)) {
2199          printf("%s\n", name);
2200          printf(" Vn%.*s| Imm%.*s| Vd%.*s| Expected\n",
2201                 lane_len_in_hex+1, padding,
2202                 lane_len_in_hex, padding,
2203                 lane_len_in_hex+1, padding);
2204
2205        const unsigned first_index_n =
2206          inputs_n_length - (16 / vn_lane_bytes) + n + 1;
2207
2208        for (unsigned lane = 0;
2209             lane < std::max(vd_lane_count, vn_lane_count);
2210             lane++) {
2211            unsigned output_index = (n * inputs_m_length * vd_lane_count) +
2212                (m * vd_lane_count) + lane;
2213            unsigned input_index_n = (first_index_n + lane) % inputs_n_length;
2214            unsigned input_index_m = m;
2215
2216            printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " "
2217                   "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
2218                   results[output_index] != expected[output_index] ? '*' : ' ',
2219                   lane_len_in_hex,
2220                   static_cast<uint64_t>(inputs_n[input_index_n]),
2221                   lane_len_in_hex,
2222                   static_cast<uint64_t>(inputs_m[input_index_m]),
2223                   lane_len_in_hex,
2224                   static_cast<uint64_t>(results[output_index]),
2225                   lane_len_in_hex,
2226                   static_cast<uint64_t>(expected[output_index]));
2227          }
2228        }
2229      }
2230    }
2231    VIXL_ASSERT(d == expected_length);
2232    if (error_count > kErrorReportLimit) {
2233      printf("%u other errors follow.\n", error_count - kErrorReportLimit);
2234    }
2235    VIXL_CHECK(error_count == 0);
2236  }
2237  delete[] results;
2238}
2239
2240
2241// ==== Tests for instructions of the form <INST> VReg, #Imm, VReg, #Imm. ====
2242
2243
2244static void TestOpImmOpImmNEON_Helper(
2245  TestOpImmOpImmVdUpdateNEONHelper_t helper,
2246  uintptr_t inputs_d,
2247  const int inputs_imm1[], unsigned inputs_imm1_length,
2248  uintptr_t inputs_n, unsigned inputs_n_length,
2249  const int inputs_imm2[], unsigned inputs_imm2_length,
2250  uintptr_t results,
2251  VectorFormat vd_form, VectorFormat vn_form) {
2252  VIXL_ASSERT(vd_form != kFormatUndefined);
2253  VIXL_ASSERT(vn_form != kFormatUndefined);
2254
2255  SETUP();
2256  START();
2257
2258  // Roll up the loop to keep the code size down.
2259  Label loop_n;
2260
2261  Register out = x0;
2262  Register inputs_d_base = x1;
2263  Register inputs_n_base = x2;
2264  Register inputs_n_last_vector = x4;
2265  Register index_n = x6;
2266
2267  // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
2268  const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
2269  const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2270
2271  const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
2272  const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
2273  const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
2274  const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
2275  const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
2276
2277
2278  // These will be either a D- or a Q-register form, with a single lane
2279  // (for use in scalar load and store operations).
2280  VRegister vd = VRegister(0, vd_bits);
2281  VRegister vn = VRegister(1, vn_bits);
2282  VRegister vntmp = VRegister(4, vn_bits);
2283  VRegister vres = VRegister(5, vn_bits);
2284
2285  VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
2286  VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count);
2287
2288  // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
2289  VRegister vntmp_single = VRegister(4, vn_lane_bits);
2290
2291  // Same registers for use in the 'ext' instructions.
2292  VRegister vn_ext = (kDRegSize == vn_bits) ? vn.V8B() : vn.V16B();
2293  VRegister vntmp_ext = (kDRegSize == vn_bits) ? vntmp.V8B() : vntmp.V16B();
2294
2295  __ Mov(out, results);
2296
2297  __ Mov(inputs_d_base, inputs_d);
2298
2299  __ Mov(inputs_n_base, inputs_n);
2300  __ Mov(inputs_n_last_vector,
2301         inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count));
2302
2303  __ Ldr(vd, MemOperand(inputs_d_base));
2304
2305  __ Ldr(vn, MemOperand(inputs_n_last_vector));
2306
2307  __ Mov(index_n, 0);
2308  __ Bind(&loop_n);
2309
2310  __ Ldr(vntmp_single, MemOperand(inputs_n_base, index_n, LSL,
2311                                  vn_lane_bytes_log2));
2312  __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes);
2313
2314  {
2315    EmissionCheckScope guard(&masm,
2316        kInstructionSize * inputs_imm1_length * inputs_imm2_length * 3);
2317    for (unsigned i = 0; i < inputs_imm1_length; i++) {
2318      for (unsigned j = 0; j < inputs_imm2_length; j++) {
2319        __ Mov(vres, vd);
2320        (masm.*helper)(vres_helper, inputs_imm1[i], vn_helper, inputs_imm2[j]);
2321        __ Str(vres, MemOperand(out, vd.SizeInBytes(), PostIndex));
2322      }
2323    }
2324  }
2325
2326  __ Add(index_n, index_n, 1);
2327  __ Cmp(index_n, inputs_n_length);
2328  __ B(lo, &loop_n);
2329
2330  END();
2331  RUN();
2332  TEARDOWN();
2333}
2334
2335
2336// Test NEON instructions. The inputs_*[] and expected[] arrays should be
2337// arrays of rawbit representation of input values. This ensures that
2338// exact bit comparisons can be performed.
2339template <typename Td, typename Tn>
2340static void TestOpImmOpImmNEON(const char * name,
2341                               TestOpImmOpImmVdUpdateNEONHelper_t helper,
2342                               const Td inputs_d[],
2343                               const int inputs_imm1[],
2344                               unsigned inputs_imm1_length,
2345                               const Tn inputs_n[],
2346                               unsigned inputs_n_length,
2347                               const int inputs_imm2[],
2348                               unsigned inputs_imm2_length,
2349                               const Td expected[],
2350                               unsigned expected_length,
2351                               VectorFormat vd_form,
2352                               VectorFormat vn_form) {
2353  VIXL_ASSERT(inputs_n_length > 0);
2354  VIXL_ASSERT(inputs_imm1_length > 0);
2355  VIXL_ASSERT(inputs_imm2_length > 0);
2356
2357  const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2358
2359  const unsigned results_length = inputs_n_length *
2360      inputs_imm1_length * inputs_imm2_length;
2361
2362  Td* results = new Td[results_length * vd_lane_count];
2363  const unsigned lane_bit = sizeof(Td) * 8;
2364  const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
2365
2366  TestOpImmOpImmNEON_Helper(helper,
2367                            reinterpret_cast<uintptr_t>(inputs_d),
2368                            inputs_imm1,
2369                            inputs_imm1_length,
2370                            reinterpret_cast<uintptr_t>(inputs_n),
2371                            inputs_n_length,
2372                            inputs_imm2,
2373                            inputs_imm2_length,
2374                            reinterpret_cast<uintptr_t>(results),
2375                            vd_form, vn_form);
2376
2377  if (Test::sim_test_trace()) {
2378    // Print the results.
2379    printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
2380    for (unsigned iteration = 0; iteration < results_length; iteration++) {
2381      printf(" ");
2382      // Output a separate result for each element of the result vector.
2383      for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2384        unsigned index = lane + (iteration * vd_lane_count);
2385        printf(" 0x%0*" PRIx64 ",",
2386               lane_len_in_hex,
2387               static_cast<uint64_t>(results[index]));
2388      }
2389      printf("\n");
2390    }
2391
2392    printf("};\n");
2393    printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
2394           name,
2395           results_length);
2396  } else {
2397    // Check the results.
2398    VIXL_CHECK(expected_length == results_length);
2399    unsigned error_count = 0;
2400    unsigned counted_length = 0;
2401    const char* padding = "                    ";
2402    VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
2403    for (unsigned n = 0; n < inputs_n_length; n++) {
2404      for (unsigned imm1 = 0; imm1 < inputs_imm1_length; imm1++) {
2405        for (unsigned imm2 = 0; imm2 < inputs_imm2_length; imm2++) {
2406          bool error_in_vector = false;
2407
2408          counted_length++;
2409
2410          for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2411            unsigned output_index =
2412                (n * inputs_imm1_length *
2413                 inputs_imm2_length * vd_lane_count) +
2414                (imm1 * inputs_imm2_length * vd_lane_count) +
2415                (imm2 * vd_lane_count) + lane;
2416
2417            if (results[output_index] != expected[output_index]) {
2418              error_in_vector = true;
2419              break;
2420            }
2421          }
2422
2423          if (error_in_vector && (++error_count <= kErrorReportLimit)) {
2424            printf("%s\n", name);
2425            printf(" Vd%.*s| Imm%.*s| Vn%.*s| Imm%.*s| Vd%.*s| Expected\n",
2426                   lane_len_in_hex+1, padding,
2427                   lane_len_in_hex, padding,
2428                   lane_len_in_hex+1, padding,
2429                   lane_len_in_hex, padding,
2430                   lane_len_in_hex+1, padding);
2431
2432            for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2433              unsigned output_index =
2434                (n * inputs_imm1_length *
2435                 inputs_imm2_length * vd_lane_count) +
2436                (imm1 * inputs_imm2_length * vd_lane_count) +
2437                (imm2 * vd_lane_count) + lane;
2438              unsigned input_index_n = (inputs_n_length - vd_lane_count +
2439                  n + 1 + lane) % inputs_n_length;
2440              unsigned input_index_imm1 = imm1;
2441              unsigned input_index_imm2 = imm2;
2442
2443              printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 " "
2444                "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
2445                results[output_index] !=
2446                  expected[output_index] ? '*' : ' ',
2447                lane_len_in_hex,
2448                static_cast<uint64_t>(inputs_d[lane]),
2449                lane_len_in_hex,
2450                static_cast<uint64_t>(inputs_imm1[input_index_imm1]),
2451                lane_len_in_hex,
2452                static_cast<uint64_t>(inputs_n[input_index_n]),
2453                lane_len_in_hex,
2454                static_cast<uint64_t>(inputs_imm2[input_index_imm2]),
2455                lane_len_in_hex,
2456                static_cast<uint64_t>(results[output_index]),
2457                lane_len_in_hex,
2458                static_cast<uint64_t>(expected[output_index]));
2459            }
2460          }
2461        }
2462      }
2463    }
2464    VIXL_ASSERT(counted_length == expected_length);
2465    if (error_count > kErrorReportLimit) {
2466      printf("%u other errors follow.\n", error_count - kErrorReportLimit);
2467    }
2468    VIXL_CHECK(error_count == 0);
2469  }
2470  delete[] results;
2471}
2472
2473
2474// ==== Floating-point tests. ====
2475
2476
2477// Standard floating-point test expansion for both double- and single-precision
2478// operations.
2479#define STRINGIFY(s) #s
2480
2481#define CALL_TEST_FP_HELPER(mnemonic, variant, type, input)         \
2482    Test##type(STRINGIFY(mnemonic) "_" STRINGIFY(variant),          \
2483               &MacroAssembler::mnemonic,                           \
2484               input, sizeof(input) / sizeof(input[0]),             \
2485               kExpected_##mnemonic##_##variant,                    \
2486               kExpectedCount_##mnemonic##_##variant)
2487
2488#define DEFINE_TEST_FP(mnemonic, type, input)                       \
2489    TEST(mnemonic##_d) {                                            \
2490      CALL_TEST_FP_HELPER(mnemonic, d, type, kInputDouble##input);  \
2491    }                                                               \
2492    TEST(mnemonic##_s) {                                            \
2493      CALL_TEST_FP_HELPER(mnemonic, s, type, kInputFloat##input);   \
2494    }
2495
2496// TODO: Test with a newer version of valgrind.
2497//
2498// Note: valgrind-3.10.0 does not properly interpret libm's fma() on x86_64.
2499// Therefore this test will be exiting though an ASSERT and thus leaking
2500// memory.
2501DEFINE_TEST_FP(fmadd, 3Op, Basic)
2502DEFINE_TEST_FP(fmsub, 3Op, Basic)
2503DEFINE_TEST_FP(fnmadd, 3Op, Basic)
2504DEFINE_TEST_FP(fnmsub, 3Op, Basic)
2505
2506DEFINE_TEST_FP(fadd, 2Op, Basic)
2507DEFINE_TEST_FP(fdiv, 2Op, Basic)
2508DEFINE_TEST_FP(fmax, 2Op, Basic)
2509DEFINE_TEST_FP(fmaxnm, 2Op, Basic)
2510DEFINE_TEST_FP(fmin, 2Op, Basic)
2511DEFINE_TEST_FP(fminnm, 2Op, Basic)
2512DEFINE_TEST_FP(fmul, 2Op, Basic)
2513DEFINE_TEST_FP(fsub, 2Op, Basic)
2514DEFINE_TEST_FP(fnmul, 2Op, Basic)
2515
2516DEFINE_TEST_FP(fabs, 1Op, Basic)
2517DEFINE_TEST_FP(fmov, 1Op, Basic)
2518DEFINE_TEST_FP(fneg, 1Op, Basic)
2519DEFINE_TEST_FP(fsqrt, 1Op, Basic)
2520DEFINE_TEST_FP(frinta, 1Op, Conversions)
2521DEFINE_TEST_FP(frinti, 1Op, Conversions)
2522DEFINE_TEST_FP(frintm, 1Op, Conversions)
2523DEFINE_TEST_FP(frintn, 1Op, Conversions)
2524DEFINE_TEST_FP(frintp, 1Op, Conversions)
2525DEFINE_TEST_FP(frintx, 1Op, Conversions)
2526DEFINE_TEST_FP(frintz, 1Op, Conversions)
2527
2528TEST(fcmp_d) { CALL_TEST_FP_HELPER(fcmp, d, Cmp, kInputDoubleBasic); }
2529TEST(fcmp_s) { CALL_TEST_FP_HELPER(fcmp, s, Cmp, kInputFloatBasic); }
2530TEST(fcmp_dz) { CALL_TEST_FP_HELPER(fcmp, dz, CmpZero, kInputDoubleBasic); }
2531TEST(fcmp_sz) { CALL_TEST_FP_HELPER(fcmp, sz, CmpZero, kInputFloatBasic); }
2532
2533TEST(fcvt_sd) { CALL_TEST_FP_HELPER(fcvt, sd, 1Op, kInputDoubleConversions); }
2534TEST(fcvt_ds) { CALL_TEST_FP_HELPER(fcvt, ds, 1Op, kInputFloatConversions); }
2535
2536#define DEFINE_TEST_FP_TO_INT(mnemonic, type, input)                \
2537    TEST(mnemonic##_xd) {                                           \
2538      CALL_TEST_FP_HELPER(mnemonic, xd, type, kInputDouble##input); \
2539    }                                                               \
2540    TEST(mnemonic##_xs) {                                           \
2541      CALL_TEST_FP_HELPER(mnemonic, xs, type, kInputFloat##input);  \
2542    }                                                               \
2543    TEST(mnemonic##_wd) {                                           \
2544      CALL_TEST_FP_HELPER(mnemonic, wd, type, kInputDouble##input); \
2545    }                                                               \
2546    TEST(mnemonic##_ws) {                                           \
2547      CALL_TEST_FP_HELPER(mnemonic, ws, type, kInputFloat##input);  \
2548    }
2549
2550DEFINE_TEST_FP_TO_INT(fcvtas, FPToS, Conversions)
2551DEFINE_TEST_FP_TO_INT(fcvtau, FPToU, Conversions)
2552DEFINE_TEST_FP_TO_INT(fcvtms, FPToS, Conversions)
2553DEFINE_TEST_FP_TO_INT(fcvtmu, FPToU, Conversions)
2554DEFINE_TEST_FP_TO_INT(fcvtns, FPToS, Conversions)
2555DEFINE_TEST_FP_TO_INT(fcvtnu, FPToU, Conversions)
2556DEFINE_TEST_FP_TO_INT(fcvtzs, FPToFixedS, Conversions)
2557DEFINE_TEST_FP_TO_INT(fcvtzu, FPToFixedU, Conversions)
2558
2559// TODO: Scvtf-fixed-point
2560// TODO: Scvtf-integer
2561// TODO: Ucvtf-fixed-point
2562// TODO: Ucvtf-integer
2563
2564// TODO: Fccmp
2565// TODO: Fcsel
2566
2567
2568// ==== NEON Tests. ====
2569
2570#define CALL_TEST_NEON_HELPER_1Op(mnemonic,                                  \
2571                                  vdform, vnform,                            \
2572                                  input_n)                                   \
2573    Test1OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform),                   \
2574                &MacroAssembler::mnemonic,                                   \
2575                input_n,                                                     \
2576                (sizeof(input_n) / sizeof(input_n[0])),                      \
2577                kExpected_NEON_##mnemonic##_##vdform,                        \
2578                kExpectedCount_NEON_##mnemonic##_##vdform,                   \
2579                kFormat##vdform,                                             \
2580                kFormat##vnform)
2581
2582#define CALL_TEST_NEON_HELPER_1OpAcross(mnemonic,                            \
2583                                        vdform, vnform,                      \
2584                                        input_n)                             \
2585    Test1OpAcrossNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform)              \
2586                                          "_" STRINGIFY(vnform),             \
2587                      &MacroAssembler::mnemonic,                             \
2588                      input_n,                                               \
2589                      (sizeof(input_n) / sizeof(input_n[0])),                \
2590                      kExpected_NEON_##mnemonic##_##vdform##_##vnform,       \
2591                      kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform,  \
2592                      kFormat##vdform,                                       \
2593                      kFormat##vnform)
2594
2595#define CALL_TEST_NEON_HELPER_2Op(mnemonic,                                  \
2596                                  vdform, vnform, vmform,                    \
2597                                  input_d, input_n, input_m)                 \
2598    Test2OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform),                   \
2599                &MacroAssembler::mnemonic,                                   \
2600                input_d,                                                     \
2601                input_n,                                                     \
2602                (sizeof(input_n) / sizeof(input_n[0])),                      \
2603                input_m,                                                     \
2604                (sizeof(input_m) / sizeof(input_m[0])),                      \
2605                kExpected_NEON_##mnemonic##_##vdform,                        \
2606                kExpectedCount_NEON_##mnemonic##_##vdform,                   \
2607                kFormat##vdform,                                             \
2608                kFormat##vnform,                                             \
2609                kFormat##vmform)
2610
2611#define CALL_TEST_NEON_HELPER_2OpImm(mnemonic,                               \
2612                                     vdform, vnform,                         \
2613                                     input_n, input_m)                       \
2614    Test2OpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_2OPIMM",      \
2615                   &MacroAssembler::mnemonic,                                \
2616                   input_n,                                                  \
2617                   (sizeof(input_n) / sizeof(input_n[0])),                   \
2618                   input_m,                                                  \
2619                   (sizeof(input_m) / sizeof(input_m[0])),                   \
2620                   kExpected_NEON_##mnemonic##_##vdform##_2OPIMM,            \
2621                   kExpectedCount_NEON_##mnemonic##_##vdform##_2OPIMM,       \
2622                   kFormat##vdform,                                          \
2623                   kFormat##vnform)
2624
2625#define CALL_TEST_NEON_HELPER_ByElement(mnemonic,                            \
2626                                        vdform, vnform, vmform,              \
2627                                        input_d, input_n, input_m, indices)  \
2628    TestByElementNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform)              \
2629        "_" STRINGIFY(vnform) "_" STRINGIFY(vmform),                         \
2630        &MacroAssembler::mnemonic,                                           \
2631        input_d,                                                             \
2632        input_n,                                                             \
2633        (sizeof(input_n) / sizeof(input_n[0])),                              \
2634        input_m,                                                             \
2635        (sizeof(input_m) / sizeof(input_m[0])),                              \
2636        indices,                                                             \
2637        (sizeof(indices) / sizeof(indices[0])),                              \
2638        kExpected_NEON_##mnemonic##_##vdform##_##vnform##_##vmform,          \
2639        kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform##_##vmform,     \
2640        kFormat##vdform,                                                     \
2641        kFormat##vnform,                                                     \
2642        kFormat##vmform)
2643
2644#define CALL_TEST_NEON_HELPER_OpImmOpImm(helper,                             \
2645                                         mnemonic,                           \
2646                                         vdform, vnform,                     \
2647                                         input_d, input_imm1,                \
2648                                         input_n, input_imm2)                \
2649    TestOpImmOpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform),            \
2650                       helper,                                               \
2651                       input_d,                                              \
2652                       input_imm1,                                           \
2653                       (sizeof(input_imm1) / sizeof(input_imm1[0])),         \
2654                       input_n,                                              \
2655                       (sizeof(input_n) / sizeof(input_n[0])),               \
2656                       input_imm2,                                           \
2657                       (sizeof(input_imm2) / sizeof(input_imm2[0])),         \
2658                       kExpected_NEON_##mnemonic##_##vdform,                 \
2659                       kExpectedCount_NEON_##mnemonic##_##vdform,            \
2660                       kFormat##vdform,                                      \
2661                       kFormat##vnform)
2662
2663#define CALL_TEST_NEON_HELPER_2SAME(mnemonic, variant, input)                \
2664    CALL_TEST_NEON_HELPER_1Op(mnemonic,                                      \
2665                              variant, variant,                              \
2666                              input)
2667
2668#define DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input)                       \
2669    TEST(mnemonic##_8B) {                                                    \
2670      CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8B, kInput8bits##input);         \
2671    }                                                                        \
2672    TEST(mnemonic##_16B) {                                                   \
2673      CALL_TEST_NEON_HELPER_2SAME(mnemonic, 16B, kInput8bits##input);        \
2674    }
2675
2676#define DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input)                        \
2677    TEST(mnemonic##_4H) {                                                    \
2678      CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4H, kInput16bits##input);        \
2679    }                                                                        \
2680    TEST(mnemonic##_8H) {                                                    \
2681      CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8H, kInput16bits##input);        \
2682    }
2683
2684#define DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input)                        \
2685    TEST(mnemonic##_2S) {                                                    \
2686      CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInput32bits##input);        \
2687    }                                                                        \
2688    TEST(mnemonic##_4S) {                                                    \
2689      CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInput32bits##input);        \
2690    }
2691
2692#define DEFINE_TEST_NEON_2SAME_BH(mnemonic, input)                           \
2693    DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input)                           \
2694    DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input)
2695
2696#define DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input)                         \
2697    DEFINE_TEST_NEON_2SAME_BH(mnemonic, input)                               \
2698    DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input)
2699
2700#define DEFINE_TEST_NEON_2SAME(mnemonic, input)                              \
2701    DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input)                             \
2702    TEST(mnemonic##_2D) {                                                    \
2703      CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input);        \
2704    }
2705#define DEFINE_TEST_NEON_2SAME_SD(mnemonic, input)                           \
2706    DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input)                            \
2707    TEST(mnemonic##_2D) {                                                    \
2708      CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input);        \
2709    }
2710
2711#define DEFINE_TEST_NEON_2SAME_FP(mnemonic, input)                           \
2712    TEST(mnemonic##_2S) {                                                    \
2713      CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInputFloat##input);         \
2714    }                                                                        \
2715    TEST(mnemonic##_4S) {                                                    \
2716      CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInputFloat##input);         \
2717    }                                                                        \
2718    TEST(mnemonic##_2D) {                                                    \
2719      CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInputDouble##input);        \
2720    }
2721
2722#define DEFINE_TEST_NEON_2SAME_FP_SCALAR(mnemonic, input)                    \
2723    TEST(mnemonic##_S) {                                                     \
2724      CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInputFloat##input);          \
2725    }                                                                        \
2726    TEST(mnemonic##_D) {                                                     \
2727      CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInputDouble##input);         \
2728    }
2729
2730#define DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input)                     \
2731    TEST(mnemonic##_B) {                                                     \
2732      CALL_TEST_NEON_HELPER_2SAME(mnemonic, B, kInput8bits##input);          \
2733    }
2734#define DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input)                     \
2735    TEST(mnemonic##_H) {                                                     \
2736      CALL_TEST_NEON_HELPER_2SAME(mnemonic, H, kInput16bits##input);         \
2737    }
2738#define DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input)                     \
2739    TEST(mnemonic##_S) {                                                     \
2740      CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInput32bits##input);         \
2741    }
2742#define DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)                     \
2743    TEST(mnemonic##_D) {                                                     \
2744      CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInput64bits##input);         \
2745    }
2746
2747#define DEFINE_TEST_NEON_2SAME_SCALAR(mnemonic, input)                       \
2748    DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input)                         \
2749    DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input)                         \
2750    DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input)                         \
2751    DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)
2752
2753#define DEFINE_TEST_NEON_2SAME_SCALAR_SD(mnemonic, input)                    \
2754    DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input)                         \
2755    DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)
2756
2757
2758#define CALL_TEST_NEON_HELPER_ACROSS(mnemonic, vd_form, vn_form, input_n)    \
2759    CALL_TEST_NEON_HELPER_1OpAcross(mnemonic,                                \
2760                                    vd_form, vn_form,                        \
2761                                    input_n)
2762
2763#define DEFINE_TEST_NEON_ACROSS(mnemonic, input)                             \
2764    TEST(mnemonic##_B_8B) {                                                  \
2765      CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 8B, kInput8bits##input);     \
2766    }                                                                        \
2767    TEST(mnemonic##_B_16B) {                                                 \
2768      CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 16B, kInput8bits##input);    \
2769    }                                                                        \
2770    TEST(mnemonic##_H_4H) {                                                  \
2771      CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 4H, kInput16bits##input);    \
2772    }                                                                        \
2773    TEST(mnemonic##_H_8H) {                                                  \
2774      CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8H, kInput16bits##input);    \
2775    }                                                                        \
2776    TEST(mnemonic##_S_4S) {                                                  \
2777      CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInput32bits##input);    \
2778    }
2779
2780#define DEFINE_TEST_NEON_ACROSS_LONG(mnemonic, input)                        \
2781    TEST(mnemonic##_H_8B) {                                                  \
2782      CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8B, kInput8bits##input);     \
2783    }                                                                        \
2784    TEST(mnemonic##_H_16B) {                                                 \
2785      CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 16B, kInput8bits##input);    \
2786    }                                                                        \
2787    TEST(mnemonic##_S_4H) {                                                  \
2788      CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4H, kInput16bits##input);    \
2789    }                                                                        \
2790    TEST(mnemonic##_S_8H) {                                                  \
2791      CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 8H, kInput16bits##input);    \
2792    }                                                                        \
2793    TEST(mnemonic##_D_4S) {                                                  \
2794      CALL_TEST_NEON_HELPER_ACROSS(mnemonic, D, 4S, kInput32bits##input);    \
2795    }
2796
2797#define DEFINE_TEST_NEON_ACROSS_FP(mnemonic, input)                          \
2798    TEST(mnemonic##_S_4S) {                                                  \
2799      CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInputFloat##input);     \
2800    }
2801
2802#define CALL_TEST_NEON_HELPER_2DIFF(mnemonic,                                \
2803                                    vdform, vnform,                          \
2804                                    input_n)                                 \
2805    CALL_TEST_NEON_HELPER_1Op(mnemonic,                                      \
2806                              vdform, vnform,                                \
2807                              input_n)
2808
2809#define DEFINE_TEST_NEON_2DIFF_LONG(mnemonic, input)                         \
2810    TEST(mnemonic##_4H) {                                                    \
2811      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 8B, kInput8bits##input);     \
2812    }                                                                        \
2813    TEST(mnemonic##_8H) {                                                    \
2814      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8H, 16B, kInput8bits##input);    \
2815    }                                                                        \
2816    TEST(mnemonic##_2S) {                                                    \
2817      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 4H, kInput16bits##input);    \
2818    }                                                                        \
2819    TEST(mnemonic##_4S) {                                                    \
2820      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 8H, kInput16bits##input);    \
2821    }                                                                        \
2822    TEST(mnemonic##_1D) {                                                    \
2823      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 1D, 2S, kInput32bits##input);    \
2824    }                                                                        \
2825    TEST(mnemonic##_2D) {                                                    \
2826      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 4S, kInput32bits##input);    \
2827    }
2828
2829#define DEFINE_TEST_NEON_2DIFF_NARROW(mnemonic, input)                       \
2830    TEST(mnemonic##_8B) {                                                    \
2831      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8B, 8H, kInput16bits##input);    \
2832    }                                                                        \
2833    TEST(mnemonic##_4H) {                                                    \
2834      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInput32bits##input);    \
2835    }                                                                        \
2836    TEST(mnemonic##_2S) {                                                    \
2837      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInput64bits##input);    \
2838    }                                                                        \
2839    TEST(mnemonic##2_16B) {                                                  \
2840      CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 16B, 8H, kInput16bits##input);\
2841    }                                                                        \
2842    TEST(mnemonic##2_8H) {                                                   \
2843      CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInput32bits##input); \
2844    }                                                                        \
2845    TEST(mnemonic##2_4S) {                                                   \
2846      CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInput64bits##input); \
2847    }
2848
2849#define DEFINE_TEST_NEON_2DIFF_FP_LONG(mnemonic, input)                      \
2850    TEST(mnemonic##_4S) {                                                    \
2851      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 4H, kInputFloat16##input);   \
2852    }                                                                        \
2853    TEST(mnemonic##_2D) {                                                    \
2854      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 2S, kInputFloat##input);     \
2855    }                                                                        \
2856    TEST(mnemonic##2_4S) {                                                   \
2857      CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 8H, kInputFloat16##input);\
2858    }                                                                        \
2859    TEST(mnemonic##2_2D) {                                                   \
2860      CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 2D, 4S, kInputFloat##input);  \
2861    }
2862
2863#define DEFINE_TEST_NEON_2DIFF_FP_NARROW(mnemonic, input)                    \
2864    TEST(mnemonic##_4H) {                                                    \
2865      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInputFloat##input);     \
2866    }                                                                        \
2867    TEST(mnemonic##_2S) {                                                    \
2868      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input);    \
2869    }                                                                        \
2870    TEST(mnemonic##2_8H) {                                                   \
2871      CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInputFloat##input);  \
2872    }                                                                        \
2873    TEST(mnemonic##2_4S) {                                                   \
2874      CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \
2875    }
2876
2877#define DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(mnemonic, input)                 \
2878    TEST(mnemonic##_2S) {                                                    \
2879      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input);    \
2880    }                                                                        \
2881    TEST(mnemonic##2_4S) {                                                   \
2882      CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \
2883    }
2884
2885#define DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(mnemonic, input)                \
2886    TEST(mnemonic##_B) {                                                     \
2887      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, B, H, kInput16bits##input);      \
2888    }                                                                        \
2889    TEST(mnemonic##_H) {                                                     \
2890      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, H, S, kInput32bits##input);      \
2891    }                                                                        \
2892    TEST(mnemonic##_S) {                                                     \
2893      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, D, kInput64bits##input);      \
2894    }
2895
2896#define DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(mnemonic, input)                 \
2897    TEST(mnemonic##_S) {                                                     \
2898      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, 2S, kInputFloat##input);      \
2899    }                                                                        \
2900    TEST(mnemonic##_D) {                                                     \
2901      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, D, 2D, kInputDouble##input);     \
2902    }
2903
2904#define CALL_TEST_NEON_HELPER_3SAME(mnemonic, variant, input_d, input_nm) {  \
2905    CALL_TEST_NEON_HELPER_2Op(mnemonic,                                      \
2906                              variant, variant, variant,                     \
2907                              input_d, input_nm, input_nm);                  \
2908    }
2909
2910#define DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input)                       \
2911    TEST(mnemonic##_8B) {                                                    \
2912      CALL_TEST_NEON_HELPER_3SAME(mnemonic, 8B,                              \
2913                                  kInput8bitsAccDestination,                 \
2914                                  kInput8bits##input);                       \
2915    }                                                                        \
2916    TEST(mnemonic##_16B) {                                                   \
2917      CALL_TEST_NEON_HELPER_3SAME(mnemonic, 16B,                             \
2918                                  kInput8bitsAccDestination,                 \
2919                                  kInput8bits##input);                       \
2920    }                                                                        \
2921
2922#define DEFINE_TEST_NEON_3SAME_HS(mnemonic, input)                           \
2923    TEST(mnemonic##_4H) {                                                    \
2924      CALL_TEST_NEON_HELPER_3SAME(mnemonic, 4H,                              \
2925                                  kInput16bitsAccDestination,                \
2926                                  kInput16bits##input);                      \
2927    }                                                                        \
2928    TEST(mnemonic##_8H) {                                                    \
2929      CALL_TEST_NEON_HELPER_3SAME(mnemonic, 8H,                              \
2930                                  kInput16bitsAccDestination,                \
2931                                  kInput16bits##input);                      \
2932    }                                                                        \
2933    TEST(mnemonic##_2S) {                                                    \
2934      CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2S,                              \
2935                                  kInput32bitsAccDestination,                \
2936                                  kInput32bits##input);                      \
2937    }                                                                        \
2938    TEST(mnemonic##_4S) {                                                    \
2939      CALL_TEST_NEON_HELPER_3SAME(mnemonic, 4S,                              \
2940                                  kInput32bitsAccDestination,                \
2941                                  kInput32bits##input);                      \
2942    }
2943
2944#define DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input)                         \
2945    DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input)                           \
2946    DEFINE_TEST_NEON_3SAME_HS(mnemonic, input)
2947
2948#define DEFINE_TEST_NEON_3SAME(mnemonic, input)                              \
2949    DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input)                             \
2950    TEST(mnemonic##_2D) {                                                    \
2951      CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2D,                              \
2952                                  kInput64bitsAccDestination,                \
2953                                  kInput64bits##input);                      \
2954    }
2955
2956#define DEFINE_TEST_NEON_3SAME_FP(mnemonic, input)                           \
2957    TEST(mnemonic##_2S) {                                                    \
2958      CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2S,                              \
2959                                  kInputFloatAccDestination,                 \
2960                                  kInputFloat##input);                       \
2961    }                                                                        \
2962    TEST(mnemonic##_4S) {                                                    \
2963      CALL_TEST_NEON_HELPER_3SAME(mnemonic, 4S,                              \
2964                                  kInputFloatAccDestination,                 \
2965                                  kInputFloat##input);                       \
2966    }                                                                        \
2967    TEST(mnemonic##_2D) {                                                    \
2968      CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2D,                              \
2969                                  kInputDoubleAccDestination,                \
2970                                  kInputDouble##input);                      \
2971    }
2972
2973#define DEFINE_TEST_NEON_3SAME_SCALAR_D(mnemonic, input)                     \
2974    TEST(mnemonic##_D) {                                                     \
2975      CALL_TEST_NEON_HELPER_3SAME(mnemonic, D,                               \
2976                                  kInput64bitsAccDestination,                \
2977                                  kInput64bits##input);                      \
2978    }
2979
2980#define DEFINE_TEST_NEON_3SAME_SCALAR_HS(mnemonic, input)                    \
2981    TEST(mnemonic##_H) {                                                     \
2982      CALL_TEST_NEON_HELPER_3SAME(mnemonic, H,                               \
2983                                  kInput16bitsAccDestination,                \
2984                                  kInput16bits##input);                      \
2985    }                                                                        \
2986    TEST(mnemonic##_S) {                                                     \
2987      CALL_TEST_NEON_HELPER_3SAME(mnemonic, S,                               \
2988                                  kInput32bitsAccDestination,                \
2989                                  kInput32bits##input);                      \
2990    }                                                                        \
2991
2992#define DEFINE_TEST_NEON_3SAME_SCALAR(mnemonic, input)                       \
2993    TEST(mnemonic##_B) {                                                     \
2994      CALL_TEST_NEON_HELPER_3SAME(mnemonic, B,                               \
2995                                  kInput8bitsAccDestination,                 \
2996                                  kInput8bits##input);                       \
2997    }                                                                        \
2998    TEST(mnemonic##_H) {                                                     \
2999      CALL_TEST_NEON_HELPER_3SAME(mnemonic, H,                               \
3000                                  kInput16bitsAccDestination,                \
3001                                  kInput16bits##input);                      \
3002    }                                                                        \
3003    TEST(mnemonic##_S) {                                                     \
3004      CALL_TEST_NEON_HELPER_3SAME(mnemonic, S,                               \
3005                                  kInput32bitsAccDestination,                \
3006                                  kInput32bits##input);                      \
3007    }                                                                        \
3008    TEST(mnemonic##_D) {                                                     \
3009      CALL_TEST_NEON_HELPER_3SAME(mnemonic, D,                               \
3010                                  kInput64bitsAccDestination,                \
3011                                  kInput64bits##input);                      \
3012    }
3013
3014#define DEFINE_TEST_NEON_3SAME_FP_SCALAR(mnemonic, input)                    \
3015    TEST(mnemonic##_S) {                                                     \
3016      CALL_TEST_NEON_HELPER_3SAME(mnemonic, S,                               \
3017                                  kInputFloatAccDestination,                 \
3018                                  kInputFloat##input);                       \
3019    }                                                                        \
3020    TEST(mnemonic##_D) {                                                     \
3021      CALL_TEST_NEON_HELPER_3SAME(mnemonic, D,                               \
3022                                  kInputDoubleAccDestination,                \
3023                                  kInputDouble##input);                      \
3024    }
3025
3026#define CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                                \
3027                                    vdform, vnform, vmform,                  \
3028                                    input_d, input_n, input_m) {             \
3029    CALL_TEST_NEON_HELPER_2Op(mnemonic,                                      \
3030                              vdform, vnform, vmform,                        \
3031                              input_d, input_n, input_m);                    \
3032    }
3033
3034#define DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input)                      \
3035    TEST(mnemonic##_8H) {                                                    \
3036      CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 8H, 8B, 8B,                      \
3037                                  kInput16bitsAccDestination,                \
3038                                  kInput8bits##input, kInput8bits##input);   \
3039    }                                                                        \
3040    TEST(mnemonic##2_8H) {                                                   \
3041      CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 8H, 16B, 16B,                 \
3042                                  kInput16bitsAccDestination,                \
3043                                  kInput8bits##input, kInput8bits##input);   \
3044    }
3045
3046#define DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input)                      \
3047    TEST(mnemonic##_4S) {                                                    \
3048      CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 4S, 4H, 4H,                      \
3049                                  kInput32bitsAccDestination,                \
3050                                  kInput16bits##input, kInput16bits##input); \
3051    }                                                                        \
3052    TEST(mnemonic##2_4S) {                                                   \
3053      CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 4S, 8H, 8H,                   \
3054                                  kInput32bitsAccDestination,                \
3055                                  kInput16bits##input, kInput16bits##input); \
3056    }
3057
3058#define DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)                      \
3059    TEST(mnemonic##_2D) {                                                    \
3060      CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 2D, 2S, 2S,                      \
3061                                  kInput64bitsAccDestination,                \
3062                                  kInput32bits##input, kInput32bits##input); \
3063    }                                                                        \
3064    TEST(mnemonic##2_2D) {                                                   \
3065      CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 2D, 4S, 4S,                   \
3066                                  kInput64bitsAccDestination,                \
3067                                  kInput32bits##input, kInput32bits##input); \
3068    }
3069
3070#define DEFINE_TEST_NEON_3DIFF_LONG_SD(mnemonic, input)                      \
3071    DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input)                          \
3072    DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)
3073
3074#define DEFINE_TEST_NEON_3DIFF_LONG(mnemonic, input)                         \
3075    DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input)                          \
3076    DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input)                          \
3077    DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)
3078
3079#define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input)                \
3080    TEST(mnemonic##_S) {                                                     \
3081      CALL_TEST_NEON_HELPER_3DIFF(mnemonic, S, H, H,                         \
3082                                  kInput32bitsAccDestination,                \
3083                                  kInput16bits##input,                       \
3084                                  kInput16bits##input);                      \
3085    }
3086
3087#define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input)                \
3088    TEST(mnemonic##_D) {                                                     \
3089      CALL_TEST_NEON_HELPER_3DIFF(mnemonic, D, S, S,                         \
3090                                  kInput64bitsAccDestination,                \
3091                                  kInput32bits##input,                       \
3092                                  kInput32bits##input);                      \
3093    }
3094
3095#define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(mnemonic, input)               \
3096    DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input)                    \
3097    DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input)
3098
3099#define DEFINE_TEST_NEON_3DIFF_WIDE(mnemonic, input)                         \
3100    TEST(mnemonic##_8H) {                                                    \
3101      CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 8H, 8H, 8B,                      \
3102                                  kInput16bitsAccDestination,                \
3103                                  kInput16bits##input, kInput8bits##input);  \
3104    }                                                                        \
3105    TEST(mnemonic##_4S) {                                                    \
3106      CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 4S, 4S, 4H,                      \
3107                                  kInput32bitsAccDestination,                \
3108                                  kInput32bits##input, kInput16bits##input); \
3109    }                                                                        \
3110    TEST(mnemonic##_2D) {                                                    \
3111      CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 2D, 2D, 2S,                      \
3112                                  kInput64bitsAccDestination,                \
3113                                  kInput64bits##input, kInput32bits##input); \
3114    }                                                                        \
3115    TEST(mnemonic##2_8H) {                                                   \
3116      CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 8H, 8H, 16B,                  \
3117                                  kInput16bitsAccDestination,                \
3118                                  kInput16bits##input, kInput8bits##input);  \
3119    }                                                                        \
3120    TEST(mnemonic##2_4S) {                                                   \
3121      CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 4S, 4S, 8H,                   \
3122                                  kInput32bitsAccDestination,                \
3123                                  kInput32bits##input, kInput16bits##input); \
3124    }                                                                        \
3125    TEST(mnemonic##2_2D) {                                                   \
3126      CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 2D, 2D, 4S,                   \
3127                                  kInput64bitsAccDestination,                \
3128                                  kInput64bits##input, kInput32bits##input); \
3129    }
3130
3131#define DEFINE_TEST_NEON_3DIFF_NARROW(mnemonic, input)                       \
3132    TEST(mnemonic##_8B) {                                                    \
3133      CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 8B, 8H, 8H,                      \
3134                                  kInput8bitsAccDestination,                 \
3135                                  kInput16bits##input, kInput16bits##input); \
3136    }                                                                        \
3137    TEST(mnemonic##_4H) {                                                    \
3138      CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 4H, 4S, 4S,                      \
3139                                  kInput16bitsAccDestination,                \
3140                                  kInput32bits##input, kInput32bits##input); \
3141    }                                                                        \
3142    TEST(mnemonic##_2S) {                                                    \
3143      CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 2S, 2D, 2D,                      \
3144                                  kInput32bitsAccDestination,                \
3145                                  kInput64bits##input, kInput64bits##input); \
3146    }                                                                        \
3147    TEST(mnemonic##2_16B) {                                                  \
3148      CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 16B, 8H, 8H,                  \
3149                                  kInput8bitsAccDestination,                 \
3150                                  kInput16bits##input, kInput16bits##input); \
3151    }                                                                        \
3152    TEST(mnemonic##2_8H) {                                                   \
3153      CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 8H, 4S, 4S,                   \
3154                                  kInput16bitsAccDestination,                \
3155                                  kInput32bits##input, kInput32bits##input); \
3156    }                                                                        \
3157    TEST(mnemonic##2_4S) {                                                   \
3158      CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 4S, 2D, 2D,                   \
3159                                  kInput32bitsAccDestination,                \
3160                                  kInput64bits##input, kInput64bits##input); \
3161    }
3162
3163#define CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                               \
3164                                     vdform, vnform,                         \
3165                                     input_n,                                \
3166                                     input_imm) {                            \
3167    CALL_TEST_NEON_HELPER_2OpImm(mnemonic,                                   \
3168                                 vdform, vnform,                             \
3169                                 input_n, input_imm);                        \
3170    }
3171
3172#define DEFINE_TEST_NEON_2OPIMM(mnemonic, input, input_imm)                  \
3173    TEST(mnemonic##_8B_2OPIMM) {                                             \
3174      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3175                                   8B, 8B,                                   \
3176                                   kInput8bits##input,                       \
3177                                   kInput8bitsImm##input_imm);               \
3178    }                                                                        \
3179    TEST(mnemonic##_16B_2OPIMM) {                                            \
3180      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3181                                   16B, 16B,                                 \
3182                                   kInput8bits##input,                       \
3183                                   kInput8bitsImm##input_imm);               \
3184    }                                                                        \
3185    TEST(mnemonic##_4H_2OPIMM) {                                             \
3186      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3187                                   4H, 4H,                                   \
3188                                   kInput16bits##input,                      \
3189                                   kInput16bitsImm##input_imm);              \
3190    }                                                                        \
3191    TEST(mnemonic##_8H_2OPIMM) {                                             \
3192      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3193                                   8H, 8H,                                   \
3194                                   kInput16bits##input,                      \
3195                                   kInput16bitsImm##input_imm);              \
3196    }                                                                        \
3197    TEST(mnemonic##_2S_2OPIMM) {                                             \
3198      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3199                                   2S, 2S,                                   \
3200                                   kInput32bits##input,                      \
3201                                   kInput32bitsImm##input_imm);              \
3202    }                                                                        \
3203    TEST(mnemonic##_4S_2OPIMM) {                                             \
3204      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3205                                   4S, 4S,                                   \
3206                                   kInput32bits##input,                      \
3207                                   kInput32bitsImm##input_imm);              \
3208    }                                                                        \
3209    TEST(mnemonic##_2D_2OPIMM) {                                             \
3210      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3211                                   2D, 2D,                                   \
3212                                   kInput64bits##input,                      \
3213                                   kInput64bitsImm##input_imm);              \
3214    }
3215
3216#define DEFINE_TEST_NEON_2OPIMM_COPY(mnemonic, input, input_imm)             \
3217    TEST(mnemonic##_8B_2OPIMM) {                                             \
3218      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3219                                   8B, B,                                    \
3220                                   kInput8bits##input,                       \
3221                                   kInput8bitsImm##input_imm);               \
3222    }                                                                        \
3223    TEST(mnemonic##_16B_2OPIMM) {                                            \
3224      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3225                                   16B, B,                                   \
3226                                   kInput8bits##input,                       \
3227                                   kInput8bitsImm##input_imm);               \
3228    }                                                                        \
3229    TEST(mnemonic##_4H_2OPIMM) {                                             \
3230      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3231                                   4H, H,                                    \
3232                                   kInput16bits##input,                      \
3233                                   kInput16bitsImm##input_imm);              \
3234    }                                                                        \
3235    TEST(mnemonic##_8H_2OPIMM) {                                             \
3236      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3237                                   8H, H,                                    \
3238                                   kInput16bits##input,                      \
3239                                   kInput16bitsImm##input_imm);              \
3240    }                                                                        \
3241    TEST(mnemonic##_2S_2OPIMM) {                                             \
3242      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3243                                   2S, S,                                    \
3244                                   kInput32bits##input,                      \
3245                                   kInput32bitsImm##input_imm);              \
3246    }                                                                        \
3247    TEST(mnemonic##_4S_2OPIMM) {                                             \
3248      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3249                                   4S, S,                                    \
3250                                   kInput32bits##input,                      \
3251                                   kInput32bitsImm##input_imm);              \
3252    }                                                                        \
3253    TEST(mnemonic##_2D_2OPIMM) {                                             \
3254      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3255                                   2D, D,                                    \
3256                                   kInput64bits##input,                      \
3257                                   kInput64bitsImm##input_imm);              \
3258    }
3259
3260#define DEFINE_TEST_NEON_2OPIMM_NARROW(mnemonic, input, input_imm)           \
3261    TEST(mnemonic##_8B_2OPIMM) {                                             \
3262      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3263                                   8B, 8H,                                   \
3264                                   kInput16bits##input,                      \
3265                                   kInput8bitsImm##input_imm);               \
3266    }                                                                        \
3267    TEST(mnemonic##_4H_2OPIMM) {                                             \
3268      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3269                                   4H, 4S,                                   \
3270                                   kInput32bits##input,                      \
3271                                   kInput16bitsImm##input_imm);              \
3272    }                                                                        \
3273    TEST(mnemonic##_2S_2OPIMM) {                                             \
3274      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3275                                   2S, 2D,                                   \
3276                                   kInput64bits##input,                      \
3277                                   kInput32bitsImm##input_imm);              \
3278    }                                                                        \
3279    TEST(mnemonic##2_16B_2OPIMM) {                                           \
3280      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                              \
3281                                   16B, 8H,                                  \
3282                                   kInput16bits##input,                      \
3283                                   kInput8bitsImm##input_imm);               \
3284    }                                                                        \
3285    TEST(mnemonic##2_8H_2OPIMM) {                                            \
3286      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                              \
3287                                   8H, 4S,                                   \
3288                                   kInput32bits##input,                      \
3289                                   kInput16bitsImm##input_imm);              \
3290    }                                                                        \
3291    TEST(mnemonic##2_4S_2OPIMM) {                                            \
3292      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                              \
3293                                   4S, 2D,                                   \
3294                                   kInput64bits##input,                      \
3295                                   kInput32bitsImm##input_imm);              \
3296    }
3297
3298#define DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(mnemonic, input, input_imm)    \
3299    TEST(mnemonic##_B_2OPIMM) {                                              \
3300      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3301                                   B, H,                                     \
3302                                   kInput16bits##input,                      \
3303                                   kInput8bitsImm##input_imm);               \
3304    }                                                                        \
3305    TEST(mnemonic##_H_2OPIMM) {                                              \
3306      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3307                                   H, S,                                     \
3308                                   kInput32bits##input,                      \
3309                                   kInput16bitsImm##input_imm);              \
3310    }                                                                        \
3311    TEST(mnemonic##_S_2OPIMM) {                                              \
3312      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3313                                   S, D,                                     \
3314                                   kInput64bits##input,                      \
3315                                   kInput32bitsImm##input_imm);              \
3316    }
3317
3318#define DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(mnemonic, input, input_imm)        \
3319    TEST(mnemonic##_2S_2OPIMM) {                                             \
3320      CALL_TEST_NEON_HELPER_2OPIMM(                                          \
3321          mnemonic,                                                          \
3322          2S, 2S,                                                            \
3323          kInputFloat##Basic,                                                \
3324          kInputDoubleImm##input_imm)                                        \
3325    }                                                                        \
3326    TEST(mnemonic##_4S_2OPIMM) {                                             \
3327      CALL_TEST_NEON_HELPER_2OPIMM(                                          \
3328          mnemonic,                                                          \
3329          4S, 4S,                                                            \
3330          kInputFloat##input,                                                \
3331          kInputDoubleImm##input_imm);                                       \
3332    }                                                                        \
3333    TEST(mnemonic##_2D_2OPIMM) {                                             \
3334      CALL_TEST_NEON_HELPER_2OPIMM(                                          \
3335          mnemonic,                                                          \
3336          2D, 2D,                                                            \
3337          kInputDouble##input,                                               \
3338          kInputDoubleImm##input_imm);                                       \
3339    }
3340
3341#define DEFINE_TEST_NEON_2OPIMM_FP(mnemonic, input, input_imm)               \
3342    TEST(mnemonic##_2S_2OPIMM) {                                             \
3343      CALL_TEST_NEON_HELPER_2OPIMM(                                          \
3344          mnemonic,                                                          \
3345          2S, 2S,                                                            \
3346          kInputFloat##Basic,                                                \
3347          kInput32bitsImm##input_imm)                                        \
3348    }                                                                        \
3349    TEST(mnemonic##_4S_2OPIMM) {                                             \
3350      CALL_TEST_NEON_HELPER_2OPIMM(                                          \
3351          mnemonic,                                                          \
3352          4S, 4S,                                                            \
3353          kInputFloat##input,                                                \
3354          kInput32bitsImm##input_imm)                                        \
3355    }                                                                        \
3356    TEST(mnemonic##_2D_2OPIMM) {                                             \
3357      CALL_TEST_NEON_HELPER_2OPIMM(                                          \
3358          mnemonic,                                                          \
3359          2D, 2D,                                                            \
3360          kInputDouble##input,                                               \
3361          kInput64bitsImm##input_imm)                                        \
3362    }
3363
3364#define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(mnemonic, input, input_imm)        \
3365    TEST(mnemonic##_S_2OPIMM) {                                              \
3366      CALL_TEST_NEON_HELPER_2OPIMM(                                          \
3367          mnemonic,                                                          \
3368          S, S,                                                              \
3369          kInputFloat##Basic,                                                \
3370          kInput32bitsImm##input_imm)                                        \
3371    }                                                                        \
3372    TEST(mnemonic##_D_2OPIMM) {                                              \
3373      CALL_TEST_NEON_HELPER_2OPIMM(                                          \
3374          mnemonic,                                                          \
3375          D, D,                                                              \
3376          kInputDouble##input,                                               \
3377          kInput64bitsImm##input_imm)                                        \
3378    }
3379
3380#define DEFINE_TEST_NEON_2OPIMM_SD(mnemonic, input, input_imm)               \
3381    TEST(mnemonic##_2S_2OPIMM) {                                             \
3382      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3383                                   2S, 2S,                                   \
3384                                   kInput32bits##input,                      \
3385                                   kInput32bitsImm##input_imm);              \
3386    }                                                                        \
3387    TEST(mnemonic##_4S_2OPIMM) {                                             \
3388      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3389                                   4S, 4S,                                   \
3390                                   kInput32bits##input,                      \
3391                                   kInput32bitsImm##input_imm);              \
3392    }                                                                        \
3393    TEST(mnemonic##_2D_2OPIMM) {                                             \
3394      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3395                                   2D, 2D,                                   \
3396                                   kInput64bits##input,                      \
3397                                   kInput64bitsImm##input_imm);              \
3398    }
3399
3400#define DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm)         \
3401    TEST(mnemonic##_D_2OPIMM) {                                              \
3402      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3403                                   D, D,                                     \
3404                                   kInput64bits##input,                      \
3405                                   kInput64bitsImm##input_imm);              \
3406    }
3407
3408#define DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(mnemonic, input, input_imm)        \
3409    TEST(mnemonic##_S_2OPIMM) {                                              \
3410      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3411                                   S, S,                                     \
3412                                   kInput32bits##input,                      \
3413                                   kInput32bitsImm##input_imm);              \
3414    }                                                                        \
3415    DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm)
3416
3417#define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm)      \
3418    TEST(mnemonic##_D_2OPIMM) {                                              \
3419      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3420                                   D, D,                                     \
3421                                   kInputDouble##input,                      \
3422                                   kInputDoubleImm##input_imm);              \
3423    }
3424
3425#define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(mnemonic, input, input_imm)     \
3426    TEST(mnemonic##_S_2OPIMM) {                                              \
3427      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3428                                   S, S,                                     \
3429                                   kInputFloat##input,                       \
3430                                   kInputDoubleImm##input_imm);              \
3431    }                                                                        \
3432    DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm)
3433
3434#define DEFINE_TEST_NEON_2OPIMM_SCALAR(mnemonic, input, input_imm)           \
3435    TEST(mnemonic##_B_2OPIMM) {                                              \
3436      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3437                                   B, B,                                     \
3438                                   kInput8bits##input,                       \
3439                                   kInput8bitsImm##input_imm);               \
3440    }                                                                        \
3441    TEST(mnemonic##_H_2OPIMM) {                                              \
3442      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3443                                   H, H,                                     \
3444                                   kInput16bits##input,                      \
3445                                   kInput16bitsImm##input_imm);              \
3446    }                                                                        \
3447    DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(mnemonic, input, input_imm)
3448
3449#define DEFINE_TEST_NEON_2OPIMM_LONG(mnemonic, input, input_imm)             \
3450    TEST(mnemonic##_8H_2OPIMM) {                                             \
3451      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3452                                   8H, 8B,                                   \
3453                                   kInput8bits##input,                       \
3454                                   kInput8bitsImm##input_imm);               \
3455    }                                                                        \
3456    TEST(mnemonic##_4S_2OPIMM) {                                             \
3457      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3458                                   4S, 4H,                                   \
3459                                   kInput16bits##input,                      \
3460                                   kInput16bitsImm##input_imm);              \
3461    }                                                                        \
3462    TEST(mnemonic##_2D_2OPIMM) {                                             \
3463      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
3464                                   2D, 2S,                                   \
3465                                   kInput32bits##input,                      \
3466                                   kInput32bitsImm##input_imm);              \
3467    }                                                                        \
3468    TEST(mnemonic##2_8H_2OPIMM) {                                            \
3469      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                              \
3470                                   8H, 16B,                                  \
3471                                   kInput8bits##input,                       \
3472                                   kInput8bitsImm##input_imm);               \
3473    }                                                                        \
3474    TEST(mnemonic##2_4S_2OPIMM) {                                            \
3475      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                              \
3476                                   4S, 8H,                                   \
3477                                   kInput16bits##input,                      \
3478                                   kInput16bitsImm##input_imm);              \
3479    }                                                                        \
3480    TEST(mnemonic##2_2D_2OPIMM) {                                            \
3481      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                              \
3482                                   2D, 4S,                                   \
3483                                   kInput32bits##input,                      \
3484                                   kInput32bitsImm##input_imm);              \
3485    }
3486
3487#define CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                            \
3488                                        vdform, vnform, vmform,              \
3489                                        input_d, input_n,                    \
3490                                        input_m, indices) {                  \
3491    CALL_TEST_NEON_HELPER_ByElement(mnemonic,                                \
3492                                    vdform, vnform, vmform,                  \
3493                                    input_d, input_n,                        \
3494                                    input_m, indices);                       \
3495    }
3496
3497#define DEFINE_TEST_NEON_BYELEMENT(mnemonic, input_d, input_n, input_m)      \
3498    TEST(mnemonic##_4H_4H_H) {                                               \
3499      CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
3500                                      4H, 4H, H,                             \
3501                                      kInput16bits##input_d,                 \
3502                                      kInput16bits##input_n,                 \
3503                                      kInput16bits##input_m,                 \
3504                                      kInputHIndices);                       \
3505    }                                                                        \
3506    TEST(mnemonic##_8H_8H_H) {                                               \
3507      CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
3508                                      8H, 8H, H,                             \
3509                                      kInput16bits##input_d,                 \
3510                                      kInput16bits##input_n,                 \
3511                                      kInput16bits##input_m,                 \
3512                                      kInputHIndices);                       \
3513    }                                                                        \
3514    TEST(mnemonic##_2S_2S_S) {                                               \
3515      CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
3516                                      2S, 2S, S,                             \
3517                                      kInput32bits##input_d,                 \
3518                                      kInput32bits##input_n,                 \
3519                                      kInput32bits##input_m,                 \
3520                                      kInputSIndices);                       \
3521    }                                                                        \
3522    TEST(mnemonic##_4S_4S_S) {                                               \
3523      CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
3524                                      4S, 4S, S,                             \
3525                                      kInput32bits##input_d,                 \
3526                                      kInput32bits##input_n,                 \
3527                                      kInput32bits##input_m,                 \
3528                                      kInputSIndices);                       \
3529    }
3530
3531#define DEFINE_TEST_NEON_BYELEMENT_SCALAR(mnemonic,                          \
3532                                          input_d, input_n, input_m)         \
3533    TEST(mnemonic##_H_H_H) {                                                 \
3534      CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
3535                                      H, H, H,                               \
3536                                      kInput16bits##input_d,                 \
3537                                      kInput16bits##input_n,                 \
3538                                      kInput16bits##input_m,                 \
3539                                      kInputHIndices);                       \
3540    }                                                                        \
3541    TEST(mnemonic##_S_S_S) {                                                 \
3542      CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
3543                                      S, S, S,                               \
3544                                      kInput32bits##input_d,                 \
3545                                      kInput32bits##input_n,                 \
3546                                      kInput32bits##input_m,                 \
3547                                      kInputSIndices);                       \
3548    }
3549
3550#define DEFINE_TEST_NEON_FP_BYELEMENT(mnemonic, input_d, input_n, input_m)   \
3551    TEST(mnemonic##_2S_2S_S) {                                               \
3552      CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
3553                                      2S, 2S, S,                             \
3554                                      kInputFloat##input_d,                  \
3555                                      kInputFloat##input_n,                  \
3556                                      kInputFloat##input_m,                  \
3557                                      kInputSIndices);                       \
3558    }                                                                        \
3559    TEST(mnemonic##_4S_4S_S) {                                               \
3560      CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
3561                                      4S, 4S, S,                             \
3562                                      kInputFloat##input_d,                  \
3563                                      kInputFloat##input_n,                  \
3564                                      kInputFloat##input_m,                  \
3565                                      kInputSIndices);                       \
3566    }                                                                        \
3567    TEST(mnemonic##_2D_2D_D) {                                               \
3568      CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
3569                                      2D, 2D, D,                             \
3570                                      kInputDouble##input_d,                 \
3571                                      kInputDouble##input_n,                 \
3572                                      kInputDouble##input_m,                 \
3573                                      kInputDIndices);                       \
3574    }                                                                        \
3575
3576#define DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(mnemonic, inp_d, inp_n, inp_m)  \
3577    TEST(mnemonic##_S_S_S) {                                                 \
3578      CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
3579                                      S, S, S,                               \
3580                                      kInputFloat##inp_d,                    \
3581                                      kInputFloat##inp_n,                    \
3582                                      kInputFloat##inp_m,                    \
3583                                      kInputSIndices);                       \
3584    }                                                                        \
3585    TEST(mnemonic##_D_D_D) {                                                 \
3586      CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
3587                                      D, D, D,                               \
3588                                      kInputDouble##inp_d,                   \
3589                                      kInputDouble##inp_n,                   \
3590                                      kInputDouble##inp_m,                   \
3591                                      kInputDIndices);                       \
3592    }                                                                        \
3593
3594
3595#define DEFINE_TEST_NEON_BYELEMENT_DIFF(mnemonic, input_d, input_n, input_m) \
3596    TEST(mnemonic##_4S_4H_H) {                                               \
3597      CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
3598                                      4S, 4H, H,                             \
3599                                      kInput32bits##input_d,                 \
3600                                      kInput16bits##input_n,                 \
3601                                      kInput16bits##input_m,                 \
3602                                      kInputHIndices);                       \
3603    }                                                                        \
3604    TEST(mnemonic##2_4S_8H_H) {                                              \
3605      CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic##2,                           \
3606                                      4S, 8H, H,                             \
3607                                      kInput32bits##input_d,                 \
3608                                      kInput16bits##input_n,                 \
3609                                      kInput16bits##input_m,                 \
3610                                      kInputHIndices);                       \
3611    }                                                                        \
3612    TEST(mnemonic##_2D_2S_S) {                                               \
3613      CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
3614                                      2D, 2S, S,                             \
3615                                      kInput64bits##input_d,                 \
3616                                      kInput32bits##input_n,                 \
3617                                      kInput32bits##input_m,                 \
3618                                      kInputSIndices);                       \
3619    }                                                                        \
3620    TEST(mnemonic##2_2D_4S_S) {                                              \
3621      CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic##2,                           \
3622                                      2D, 4S, S,                             \
3623                                      kInput64bits##input_d,                 \
3624                                      kInput32bits##input_n,                 \
3625                                      kInput32bits##input_m,                 \
3626                                      kInputSIndices);                       \
3627    }
3628
3629#define DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(mnemonic,                     \
3630                                               input_d, input_n, input_m)    \
3631    TEST(mnemonic##_S_H_H) {                                                 \
3632      CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
3633                                      S, H, H,                               \
3634                                      kInput32bits##input_d,                 \
3635                                      kInput16bits##input_n,                 \
3636                                      kInput16bits##input_m,                 \
3637                                      kInputHIndices);                       \
3638    }                                                                        \
3639    TEST(mnemonic##_D_S_S) {                                                 \
3640      CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
3641                                      D, S, S,                               \
3642                                      kInput64bits##input_d,                 \
3643                                      kInput32bits##input_n,                 \
3644                                      kInput32bits##input_m,                 \
3645                                      kInputSIndices);                       \
3646    }
3647
3648
3649#define CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                              \
3650                                      variant,                               \
3651                                      input_d,                               \
3652                                      input_imm1,                            \
3653                                      input_n,                               \
3654                                      input_imm2) {                          \
3655    CALL_TEST_NEON_HELPER_OpImmOpImm(&MacroAssembler::mnemonic,              \
3656                                     mnemonic,                               \
3657                                     variant, variant,                       \
3658                                     input_d, input_imm1,                    \
3659                                     input_n, input_imm2);                   \
3660    }
3661
3662#define DEFINE_TEST_NEON_2OP2IMM(mnemonic,                                   \
3663                                 input_d, input_imm1,                        \
3664                                 input_n, input_imm2)                        \
3665    TEST(mnemonic##_B) {                                                     \
3666      CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                                \
3667                                    16B,                                     \
3668                                    kInput8bits##input_d,                    \
3669                                    kInput8bitsImm##input_imm1,              \
3670                                    kInput8bits##input_n,                    \
3671                                    kInput8bitsImm##input_imm2);             \
3672    }                                                                        \
3673    TEST(mnemonic##_H) {                                                     \
3674      CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                                \
3675                                    8H,                                      \
3676                                    kInput16bits##input_d,                   \
3677                                    kInput16bitsImm##input_imm1,             \
3678                                    kInput16bits##input_n,                   \
3679                                    kInput16bitsImm##input_imm2);            \
3680    }                                                                        \
3681    TEST(mnemonic##_S) {                                                     \
3682      CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                                \
3683                                    4S,                                      \
3684                                    kInput32bits##input_d,                   \
3685                                    kInput32bitsImm##input_imm1,             \
3686                                    kInput32bits##input_n,                   \
3687                                    kInput32bitsImm##input_imm2);            \
3688    }                                                                        \
3689    TEST(mnemonic##_D) {                                                     \
3690      CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                                \
3691                                    2D,                                      \
3692                                    kInput64bits##input_d,                   \
3693                                    kInput64bitsImm##input_imm1,             \
3694                                    kInput64bits##input_n,                   \
3695                                    kInput64bitsImm##input_imm2);            \
3696    }
3697
3698
3699// Advanced SIMD copy.
3700DEFINE_TEST_NEON_2OP2IMM(ins,
3701                         Basic, LaneCountFromZero,
3702                         Basic, LaneCountFromZero)
3703DEFINE_TEST_NEON_2OPIMM_COPY(dup, Basic, LaneCountFromZero)
3704
3705
3706// Advanced SIMD scalar copy.
3707DEFINE_TEST_NEON_2OPIMM_SCALAR(dup, Basic, LaneCountFromZero)
3708
3709
3710// Advanced SIMD three same.
3711DEFINE_TEST_NEON_3SAME_NO2D(shadd, Basic)
3712DEFINE_TEST_NEON_3SAME(sqadd, Basic)
3713DEFINE_TEST_NEON_3SAME_NO2D(srhadd, Basic)
3714DEFINE_TEST_NEON_3SAME_NO2D(shsub, Basic)
3715DEFINE_TEST_NEON_3SAME(sqsub, Basic)
3716DEFINE_TEST_NEON_3SAME(cmgt, Basic)
3717DEFINE_TEST_NEON_3SAME(cmge, Basic)
3718DEFINE_TEST_NEON_3SAME(sshl, Basic)
3719DEFINE_TEST_NEON_3SAME(sqshl, Basic)
3720DEFINE_TEST_NEON_3SAME(srshl, Basic)
3721DEFINE_TEST_NEON_3SAME(sqrshl, Basic)
3722DEFINE_TEST_NEON_3SAME_NO2D(smax, Basic)
3723DEFINE_TEST_NEON_3SAME_NO2D(smin, Basic)
3724DEFINE_TEST_NEON_3SAME_NO2D(sabd, Basic)
3725DEFINE_TEST_NEON_3SAME_NO2D(saba, Basic)
3726DEFINE_TEST_NEON_3SAME(add, Basic)
3727DEFINE_TEST_NEON_3SAME(cmtst, Basic)
3728DEFINE_TEST_NEON_3SAME_NO2D(mla, Basic)
3729DEFINE_TEST_NEON_3SAME_NO2D(mul, Basic)
3730DEFINE_TEST_NEON_3SAME_NO2D(smaxp, Basic)
3731DEFINE_TEST_NEON_3SAME_NO2D(sminp, Basic)
3732DEFINE_TEST_NEON_3SAME_HS(sqdmulh, Basic)
3733DEFINE_TEST_NEON_3SAME(addp, Basic)
3734DEFINE_TEST_NEON_3SAME_FP(fmaxnm, Basic)
3735DEFINE_TEST_NEON_3SAME_FP(fmla, Basic)
3736DEFINE_TEST_NEON_3SAME_FP(fadd, Basic)
3737DEFINE_TEST_NEON_3SAME_FP(fmulx, Basic)
3738DEFINE_TEST_NEON_3SAME_FP(fcmeq, Basic)
3739DEFINE_TEST_NEON_3SAME_FP(fmax, Basic)
3740DEFINE_TEST_NEON_3SAME_FP(frecps, Basic)
3741DEFINE_TEST_NEON_3SAME_8B_16B(and_, Basic)
3742DEFINE_TEST_NEON_3SAME_8B_16B(bic, Basic)
3743DEFINE_TEST_NEON_3SAME_FP(fminnm, Basic)
3744DEFINE_TEST_NEON_3SAME_FP(fmls, Basic)
3745DEFINE_TEST_NEON_3SAME_FP(fsub, Basic)
3746DEFINE_TEST_NEON_3SAME_FP(fmin, Basic)
3747DEFINE_TEST_NEON_3SAME_FP(frsqrts, Basic)
3748DEFINE_TEST_NEON_3SAME_8B_16B(orr, Basic)
3749DEFINE_TEST_NEON_3SAME_8B_16B(orn, Basic)
3750DEFINE_TEST_NEON_3SAME_NO2D(uhadd, Basic)
3751DEFINE_TEST_NEON_3SAME(uqadd, Basic)
3752DEFINE_TEST_NEON_3SAME_NO2D(urhadd, Basic)
3753DEFINE_TEST_NEON_3SAME_NO2D(uhsub, Basic)
3754DEFINE_TEST_NEON_3SAME(uqsub, Basic)
3755DEFINE_TEST_NEON_3SAME(cmhi, Basic)
3756DEFINE_TEST_NEON_3SAME(cmhs, Basic)
3757DEFINE_TEST_NEON_3SAME(ushl, Basic)
3758DEFINE_TEST_NEON_3SAME(uqshl, Basic)
3759DEFINE_TEST_NEON_3SAME(urshl, Basic)
3760DEFINE_TEST_NEON_3SAME(uqrshl, Basic)
3761DEFINE_TEST_NEON_3SAME_NO2D(umax, Basic)
3762DEFINE_TEST_NEON_3SAME_NO2D(umin, Basic)
3763DEFINE_TEST_NEON_3SAME_NO2D(uabd, Basic)
3764DEFINE_TEST_NEON_3SAME_NO2D(uaba, Basic)
3765DEFINE_TEST_NEON_3SAME(sub, Basic)
3766DEFINE_TEST_NEON_3SAME(cmeq, Basic)
3767DEFINE_TEST_NEON_3SAME_NO2D(mls, Basic)
3768DEFINE_TEST_NEON_3SAME_8B_16B(pmul, Basic)
3769DEFINE_TEST_NEON_3SAME_NO2D(uminp, Basic)
3770DEFINE_TEST_NEON_3SAME_NO2D(umaxp, Basic)
3771DEFINE_TEST_NEON_3SAME_HS(sqrdmulh, Basic)
3772DEFINE_TEST_NEON_3SAME_FP(fmaxnmp, Basic)
3773DEFINE_TEST_NEON_3SAME_FP(faddp, Basic)
3774DEFINE_TEST_NEON_3SAME_FP(fmul, Basic)
3775DEFINE_TEST_NEON_3SAME_FP(fcmge, Basic)
3776DEFINE_TEST_NEON_3SAME_FP(facge, Basic)
3777DEFINE_TEST_NEON_3SAME_FP(fmaxp, Basic)
3778DEFINE_TEST_NEON_3SAME_FP(fdiv, Basic)
3779DEFINE_TEST_NEON_3SAME_8B_16B(eor, Basic)
3780DEFINE_TEST_NEON_3SAME_8B_16B(bsl, Basic)
3781DEFINE_TEST_NEON_3SAME_FP(fminnmp, Basic)
3782DEFINE_TEST_NEON_3SAME_FP(fabd, Basic)
3783DEFINE_TEST_NEON_3SAME_FP(fcmgt, Basic)
3784DEFINE_TEST_NEON_3SAME_FP(facgt, Basic)
3785DEFINE_TEST_NEON_3SAME_FP(fminp, Basic)
3786DEFINE_TEST_NEON_3SAME_8B_16B(bit, Basic)
3787DEFINE_TEST_NEON_3SAME_8B_16B(bif, Basic)
3788
3789
3790// Advanced SIMD scalar three same.
3791DEFINE_TEST_NEON_3SAME_SCALAR(sqadd, Basic)
3792DEFINE_TEST_NEON_3SAME_SCALAR(sqsub, Basic)
3793DEFINE_TEST_NEON_3SAME_SCALAR_D(cmgt, Basic)
3794DEFINE_TEST_NEON_3SAME_SCALAR_D(cmge, Basic)
3795DEFINE_TEST_NEON_3SAME_SCALAR_D(sshl, Basic)
3796DEFINE_TEST_NEON_3SAME_SCALAR(sqshl, Basic)
3797DEFINE_TEST_NEON_3SAME_SCALAR_D(srshl, Basic)
3798DEFINE_TEST_NEON_3SAME_SCALAR(sqrshl, Basic)
3799DEFINE_TEST_NEON_3SAME_SCALAR_D(add, Basic)
3800DEFINE_TEST_NEON_3SAME_SCALAR_D(cmtst, Basic)
3801DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqdmulh, Basic)
3802DEFINE_TEST_NEON_3SAME_FP_SCALAR(fmulx, Basic)
3803DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmeq, Basic)
3804DEFINE_TEST_NEON_3SAME_FP_SCALAR(frecps, Basic)
3805DEFINE_TEST_NEON_3SAME_FP_SCALAR(frsqrts, Basic)
3806DEFINE_TEST_NEON_3SAME_SCALAR_D(uqadd, Basic)
3807DEFINE_TEST_NEON_3SAME_SCALAR_D(uqsub, Basic)
3808DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhi, Basic)
3809DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhs, Basic)
3810DEFINE_TEST_NEON_3SAME_SCALAR_D(ushl, Basic)
3811DEFINE_TEST_NEON_3SAME_SCALAR(uqshl, Basic)
3812DEFINE_TEST_NEON_3SAME_SCALAR_D(urshl, Basic)
3813DEFINE_TEST_NEON_3SAME_SCALAR(uqrshl, Basic)
3814DEFINE_TEST_NEON_3SAME_SCALAR_D(sub, Basic)
3815DEFINE_TEST_NEON_3SAME_SCALAR_D(cmeq, Basic)
3816DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqrdmulh, Basic)
3817DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmge, Basic)
3818DEFINE_TEST_NEON_3SAME_FP_SCALAR(facge, Basic)
3819DEFINE_TEST_NEON_3SAME_FP_SCALAR(fabd, Basic)
3820DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmgt, Basic)
3821DEFINE_TEST_NEON_3SAME_FP_SCALAR(facgt, Basic)
3822
3823
3824// Advanced SIMD three different.
3825DEFINE_TEST_NEON_3DIFF_LONG(saddl, Basic)
3826DEFINE_TEST_NEON_3DIFF_WIDE(saddw, Basic)
3827DEFINE_TEST_NEON_3DIFF_LONG(ssubl, Basic)
3828DEFINE_TEST_NEON_3DIFF_WIDE(ssubw, Basic)
3829DEFINE_TEST_NEON_3DIFF_NARROW(addhn, Basic)
3830DEFINE_TEST_NEON_3DIFF_LONG(sabal, Basic)
3831DEFINE_TEST_NEON_3DIFF_NARROW(subhn, Basic)
3832DEFINE_TEST_NEON_3DIFF_LONG(sabdl, Basic)
3833DEFINE_TEST_NEON_3DIFF_LONG(smlal, Basic)
3834DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlal, Basic)
3835DEFINE_TEST_NEON_3DIFF_LONG(smlsl, Basic)
3836DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlsl, Basic)
3837DEFINE_TEST_NEON_3DIFF_LONG(smull, Basic)
3838DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmull, Basic)
3839DEFINE_TEST_NEON_3DIFF_LONG_8H(pmull, Basic)
3840DEFINE_TEST_NEON_3DIFF_LONG(uaddl, Basic)
3841DEFINE_TEST_NEON_3DIFF_WIDE(uaddw, Basic)
3842DEFINE_TEST_NEON_3DIFF_LONG(usubl, Basic)
3843DEFINE_TEST_NEON_3DIFF_WIDE(usubw, Basic)
3844DEFINE_TEST_NEON_3DIFF_NARROW(raddhn, Basic)
3845DEFINE_TEST_NEON_3DIFF_LONG(uabal, Basic)
3846DEFINE_TEST_NEON_3DIFF_NARROW(rsubhn, Basic)
3847DEFINE_TEST_NEON_3DIFF_LONG(uabdl, Basic)
3848DEFINE_TEST_NEON_3DIFF_LONG(umlal, Basic)
3849DEFINE_TEST_NEON_3DIFF_LONG(umlsl, Basic)
3850DEFINE_TEST_NEON_3DIFF_LONG(umull, Basic)
3851
3852
3853// Advanced SIMD scalar three different.
3854DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlal, Basic)
3855DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlsl, Basic)
3856DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmull, Basic)
3857
3858
3859// Advanced SIMD scalar pairwise.
3860TEST(addp_SCALAR) {
3861  CALL_TEST_NEON_HELPER_2DIFF(addp, D, 2D, kInput64bitsBasic);
3862}
3863DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxnmp, Basic)
3864DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(faddp, Basic)
3865DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxp, Basic)
3866DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminnmp, Basic)
3867DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminp, Basic)
3868
3869
3870// Advanced SIMD shift by immediate.
3871DEFINE_TEST_NEON_2OPIMM(sshr, Basic, TypeWidth)
3872DEFINE_TEST_NEON_2OPIMM(ssra, Basic, TypeWidth)
3873DEFINE_TEST_NEON_2OPIMM(srshr, Basic, TypeWidth)
3874DEFINE_TEST_NEON_2OPIMM(srsra, Basic, TypeWidth)
3875DEFINE_TEST_NEON_2OPIMM(shl, Basic, TypeWidthFromZero)
3876DEFINE_TEST_NEON_2OPIMM(sqshl, Basic, TypeWidthFromZero)
3877DEFINE_TEST_NEON_2OPIMM_NARROW(shrn, Basic, TypeWidth)
3878DEFINE_TEST_NEON_2OPIMM_NARROW(rshrn, Basic, TypeWidth)
3879DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrn, Basic, TypeWidth)
3880DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrn, Basic, TypeWidth)
3881DEFINE_TEST_NEON_2OPIMM_LONG(sshll, Basic, TypeWidthFromZero)
3882DEFINE_TEST_NEON_2OPIMM_SD(scvtf, FixedPointConversions, \
3883                           TypeWidthFromZeroToWidth)
3884DEFINE_TEST_NEON_2OPIMM_FP(fcvtzs, Conversions, TypeWidthFromZeroToWidth)
3885DEFINE_TEST_NEON_2OPIMM(ushr, Basic, TypeWidth)
3886DEFINE_TEST_NEON_2OPIMM(usra, Basic, TypeWidth)
3887DEFINE_TEST_NEON_2OPIMM(urshr, Basic, TypeWidth)
3888DEFINE_TEST_NEON_2OPIMM(ursra, Basic, TypeWidth)
3889DEFINE_TEST_NEON_2OPIMM(sri, Basic, TypeWidth)
3890DEFINE_TEST_NEON_2OPIMM(sli, Basic, TypeWidthFromZero)
3891DEFINE_TEST_NEON_2OPIMM(sqshlu, Basic, TypeWidthFromZero)
3892DEFINE_TEST_NEON_2OPIMM(uqshl, Basic, TypeWidthFromZero)
3893DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrun, Basic, TypeWidth)
3894DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrun, Basic, TypeWidth)
3895DEFINE_TEST_NEON_2OPIMM_NARROW(uqshrn, Basic, TypeWidth)
3896DEFINE_TEST_NEON_2OPIMM_NARROW(uqrshrn, Basic, TypeWidth)
3897DEFINE_TEST_NEON_2OPIMM_LONG(ushll, Basic, TypeWidthFromZero)
3898DEFINE_TEST_NEON_2OPIMM_SD(ucvtf, FixedPointConversions, \
3899                           TypeWidthFromZeroToWidth)
3900DEFINE_TEST_NEON_2OPIMM_FP(fcvtzu, Conversions, TypeWidthFromZeroToWidth)
3901
3902
3903// Advanced SIMD scalar shift by immediate..
3904DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sshr, Basic, TypeWidth)
3905DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ssra, Basic, TypeWidth)
3906DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srshr, Basic, TypeWidth)
3907DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srsra, Basic, TypeWidth)
3908DEFINE_TEST_NEON_2OPIMM_SCALAR_D(shl, Basic, TypeWidthFromZero)
3909DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshl, Basic, TypeWidthFromZero)
3910DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrn, Basic, TypeWidth)
3911DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrn, Basic, TypeWidth)
3912DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(scvtf, FixedPointConversions, \
3913                                  TypeWidthFromZeroToWidth)
3914DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzs, Conversions, TypeWidthFromZeroToWidth)
3915DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ushr, Basic, TypeWidth)
3916DEFINE_TEST_NEON_2OPIMM_SCALAR_D(usra, Basic, TypeWidth)
3917DEFINE_TEST_NEON_2OPIMM_SCALAR_D(urshr, Basic, TypeWidth)
3918DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ursra, Basic, TypeWidth)
3919DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sri, Basic, TypeWidth)
3920DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sli, Basic, TypeWidthFromZero)
3921DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshlu, Basic, TypeWidthFromZero)
3922DEFINE_TEST_NEON_2OPIMM_SCALAR(uqshl, Basic, TypeWidthFromZero)
3923DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrun, Basic, TypeWidth)
3924DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrun, Basic, TypeWidth)
3925DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqshrn, Basic, TypeWidth)
3926DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqrshrn, Basic, TypeWidth)
3927DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(ucvtf, FixedPointConversions, \
3928                                  TypeWidthFromZeroToWidth)
3929DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzu, Conversions, TypeWidthFromZeroToWidth)
3930
3931
3932// Advanced SIMD two-register miscellaneous.
3933DEFINE_TEST_NEON_2SAME_NO2D(rev64, Basic)
3934DEFINE_TEST_NEON_2SAME_8B_16B(rev16, Basic)
3935DEFINE_TEST_NEON_2DIFF_LONG(saddlp, Basic)
3936DEFINE_TEST_NEON_2SAME(suqadd, Basic)
3937DEFINE_TEST_NEON_2SAME_NO2D(cls, Basic)
3938DEFINE_TEST_NEON_2SAME_8B_16B(cnt, Basic)
3939DEFINE_TEST_NEON_2DIFF_LONG(sadalp, Basic)
3940DEFINE_TEST_NEON_2SAME(sqabs, Basic)
3941DEFINE_TEST_NEON_2OPIMM(cmgt, Basic, Zero)
3942DEFINE_TEST_NEON_2OPIMM(cmeq, Basic, Zero)
3943DEFINE_TEST_NEON_2OPIMM(cmlt, Basic, Zero)
3944DEFINE_TEST_NEON_2SAME(abs, Basic)
3945DEFINE_TEST_NEON_2DIFF_NARROW(xtn, Basic)
3946DEFINE_TEST_NEON_2DIFF_NARROW(sqxtn, Basic)
3947DEFINE_TEST_NEON_2DIFF_FP_NARROW(fcvtn, Conversions)
3948DEFINE_TEST_NEON_2DIFF_FP_LONG(fcvtl, Conversions)
3949DEFINE_TEST_NEON_2SAME_FP(frintn, Conversions)
3950DEFINE_TEST_NEON_2SAME_FP(frintm, Conversions)
3951DEFINE_TEST_NEON_2SAME_FP(fcvtns, Conversions)
3952DEFINE_TEST_NEON_2SAME_FP(fcvtms, Conversions)
3953DEFINE_TEST_NEON_2SAME_FP(fcvtas, Conversions)
3954// SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0.
3955DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmgt, Basic, Zero)
3956DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmeq, Basic, Zero)
3957DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmlt, Basic, Zero)
3958DEFINE_TEST_NEON_2SAME_FP(fabs, Basic)
3959DEFINE_TEST_NEON_2SAME_FP(frintp, Conversions)
3960DEFINE_TEST_NEON_2SAME_FP(frintz, Conversions)
3961DEFINE_TEST_NEON_2SAME_FP(fcvtps, Conversions)
3962// FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0.
3963DEFINE_TEST_NEON_2SAME_2S_4S(urecpe, Basic)
3964DEFINE_TEST_NEON_2SAME_FP(frecpe, Basic)
3965DEFINE_TEST_NEON_2SAME_BH(rev32, Basic)
3966DEFINE_TEST_NEON_2DIFF_LONG(uaddlp, Basic)
3967DEFINE_TEST_NEON_2SAME(usqadd, Basic)
3968DEFINE_TEST_NEON_2SAME_NO2D(clz, Basic)
3969DEFINE_TEST_NEON_2DIFF_LONG(uadalp, Basic)
3970DEFINE_TEST_NEON_2SAME(sqneg, Basic)
3971DEFINE_TEST_NEON_2OPIMM(cmge, Basic, Zero)
3972DEFINE_TEST_NEON_2OPIMM(cmle, Basic, Zero)
3973DEFINE_TEST_NEON_2SAME(neg, Basic)
3974DEFINE_TEST_NEON_2DIFF_NARROW(sqxtun, Basic)
3975DEFINE_TEST_NEON_2OPIMM_LONG(shll, Basic, SHLL)
3976DEFINE_TEST_NEON_2DIFF_NARROW(uqxtn, Basic)
3977DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(fcvtxn, Conversions)
3978DEFINE_TEST_NEON_2SAME_FP(frinta, Conversions)
3979DEFINE_TEST_NEON_2SAME_FP(frintx, Conversions)
3980DEFINE_TEST_NEON_2SAME_FP(fcvtnu, Conversions)
3981DEFINE_TEST_NEON_2SAME_FP(fcvtmu, Conversions)
3982DEFINE_TEST_NEON_2SAME_FP(fcvtau, Conversions)
3983// UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0.
3984DEFINE_TEST_NEON_2SAME_8B_16B(not_, Basic)
3985DEFINE_TEST_NEON_2SAME_8B_16B(rbit, Basic)
3986DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmge, Basic, Zero)
3987DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmle, Basic, Zero)
3988DEFINE_TEST_NEON_2SAME_FP(fneg, Basic)
3989DEFINE_TEST_NEON_2SAME_FP(frinti, Conversions)
3990DEFINE_TEST_NEON_2SAME_FP(fcvtpu, Conversions)
3991// FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0.
3992DEFINE_TEST_NEON_2SAME_2S_4S(ursqrte, Basic)
3993DEFINE_TEST_NEON_2SAME_FP(frsqrte, Basic)
3994DEFINE_TEST_NEON_2SAME_FP(fsqrt, Basic)
3995
3996
3997// Advanced SIMD scalar two-register miscellaneous.
3998DEFINE_TEST_NEON_2SAME_SCALAR(suqadd, Basic)
3999DEFINE_TEST_NEON_2SAME_SCALAR(sqabs, Basic)
4000DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmgt, Basic, Zero)
4001DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmeq, Basic, Zero)
4002DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmlt, Basic, Zero)
4003DEFINE_TEST_NEON_2SAME_SCALAR_D(abs, Basic)
4004DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtn, Basic)
4005DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtns, Conversions)
4006DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtms, Conversions)
4007DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtas, Conversions)
4008// SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0.
4009DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmgt, Basic, Zero)
4010DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmeq, Basic, Zero)
4011DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmlt, Basic, Zero)
4012DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtps, Conversions)
4013// FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0.
4014DEFINE_TEST_NEON_2SAME_FP_SCALAR(frecpe, Basic)
4015DEFINE_TEST_NEON_2SAME_FP_SCALAR(frecpx, Basic)
4016DEFINE_TEST_NEON_2SAME_SCALAR(usqadd, Basic)
4017DEFINE_TEST_NEON_2SAME_SCALAR(sqneg, Basic)
4018DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmge, Basic, Zero)
4019DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmle, Basic, Zero)
4020DEFINE_TEST_NEON_2SAME_SCALAR_D(neg, Basic)
4021DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtun, Basic)
4022DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(uqxtn, Basic)
4023TEST(fcvtxn_SCALAR) {
4024  CALL_TEST_NEON_HELPER_2DIFF(fcvtxn, S, D, kInputDoubleConversions);
4025}
4026DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtnu, Conversions)
4027DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtmu, Conversions)
4028DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtau, Conversions)
4029// UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0.
4030DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmge, Basic, Zero)
4031DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmle, Basic, Zero)
4032DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtpu, Conversions)
4033// FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0.
4034DEFINE_TEST_NEON_2SAME_FP_SCALAR(frsqrte, Basic)
4035
4036
4037// Advanced SIMD across lanes.
4038DEFINE_TEST_NEON_ACROSS_LONG(saddlv, Basic)
4039DEFINE_TEST_NEON_ACROSS(smaxv, Basic)
4040DEFINE_TEST_NEON_ACROSS(sminv, Basic)
4041DEFINE_TEST_NEON_ACROSS(addv, Basic)
4042DEFINE_TEST_NEON_ACROSS_LONG(uaddlv, Basic)
4043DEFINE_TEST_NEON_ACROSS(umaxv, Basic)
4044DEFINE_TEST_NEON_ACROSS(uminv, Basic)
4045DEFINE_TEST_NEON_ACROSS_FP(fmaxnmv, Basic)
4046DEFINE_TEST_NEON_ACROSS_FP(fmaxv, Basic)
4047DEFINE_TEST_NEON_ACROSS_FP(fminnmv, Basic)
4048DEFINE_TEST_NEON_ACROSS_FP(fminv, Basic)
4049
4050
4051// Advanced SIMD permute.
4052DEFINE_TEST_NEON_3SAME(uzp1, Basic)
4053DEFINE_TEST_NEON_3SAME(trn1, Basic)
4054DEFINE_TEST_NEON_3SAME(zip1, Basic)
4055DEFINE_TEST_NEON_3SAME(uzp2, Basic)
4056DEFINE_TEST_NEON_3SAME(trn2, Basic)
4057DEFINE_TEST_NEON_3SAME(zip2, Basic)
4058
4059
4060// Advanced SIMD vector x indexed element.
4061DEFINE_TEST_NEON_BYELEMENT_DIFF(smlal, Basic, Basic, Basic)
4062DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlal, Basic, Basic, Basic)
4063DEFINE_TEST_NEON_BYELEMENT_DIFF(smlsl, Basic, Basic, Basic)
4064DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlsl, Basic, Basic, Basic)
4065DEFINE_TEST_NEON_BYELEMENT(mul, Basic, Basic, Basic)
4066DEFINE_TEST_NEON_BYELEMENT_DIFF(smull, Basic, Basic, Basic)
4067DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmull, Basic, Basic, Basic)
4068DEFINE_TEST_NEON_BYELEMENT(sqdmulh, Basic, Basic, Basic)
4069DEFINE_TEST_NEON_BYELEMENT(sqrdmulh, Basic, Basic, Basic)
4070DEFINE_TEST_NEON_FP_BYELEMENT(fmla, Basic, Basic, Basic)
4071DEFINE_TEST_NEON_FP_BYELEMENT(fmls, Basic, Basic, Basic)
4072DEFINE_TEST_NEON_FP_BYELEMENT(fmul, Basic, Basic, Basic)
4073DEFINE_TEST_NEON_BYELEMENT(mla, Basic, Basic, Basic)
4074DEFINE_TEST_NEON_BYELEMENT_DIFF(umlal, Basic, Basic, Basic)
4075DEFINE_TEST_NEON_BYELEMENT(mls, Basic, Basic, Basic)
4076DEFINE_TEST_NEON_BYELEMENT_DIFF(umlsl, Basic, Basic, Basic)
4077DEFINE_TEST_NEON_BYELEMENT_DIFF(umull, Basic, Basic, Basic)
4078DEFINE_TEST_NEON_FP_BYELEMENT(fmulx, Basic, Basic, Basic)
4079
4080
4081// Advanced SIMD scalar x indexed element.
4082DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlal, Basic, Basic, Basic)
4083DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlsl, Basic, Basic, Basic)
4084DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmull, Basic, Basic, Basic)
4085DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqdmulh, Basic, Basic, Basic)
4086DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqrdmulh, Basic, Basic, Basic)
4087DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmla, Basic, Basic, Basic)
4088DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmls, Basic, Basic, Basic)
4089DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmul, Basic, Basic, Basic)
4090DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmulx, Basic, Basic, Basic)
4091
4092}  // namespace vixl
4093