1// Copyright 2015, ARM Limited
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are met:
6//
7//   * Redistributions of source code must retain the above copyright notice,
8//     this list of conditions and the following disclaimer.
9//   * Redistributions in binary form must reproduce the above copyright notice,
10//     this list of conditions and the following disclaimer in the documentation
11//     and/or other materials provided with the distribution.
12//   * Neither the name of ARM Limited nor the names of its contributors may be
13//     used to endorse or promote products derived from this software without
14//     specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27#include <stdio.h>
28#include <stdlib.h>
29#include <string.h>
30#include <float.h>
31#include <cmath>
32
33#include "test-runner.h"
34#include "test-utils-a64.h"
35#include "vixl/a64/macro-assembler-a64.h"
36#include "vixl/a64/simulator-a64.h"
37#include "vixl/a64/debugger-a64.h"
38#include "vixl/a64/disasm-a64.h"
39#include "vixl/a64/cpu-a64.h"
40
41namespace vixl {
42
43// Test infrastructure.
44//
45// Tests are functions which accept no parameters and have no return values.
46// The testing code should not perform an explicit return once completed. For
47// example to test the mov immediate instruction a very simple test would be:
48//
49//   TEST(mov_x0_one) {
50//     SETUP();
51//
52//     START();
53//     __ mov(x0, Operand(1));
54//     END();
55//
56//     RUN();
57//
58//     ASSERT_EQUAL_64(1, x0);
59//
60//     TEARDOWN();
61//   }
62//
63// Within a START ... END block all registers but sp can be modified. sp has to
64// be explicitly saved/restored. The END() macro replaces the function return
65// so it may appear multiple times in a test if the test has multiple exit
66// points.
67//
68// Once the test has been run all integer and floating point registers as well
69// as flags are accessible through a RegisterDump instance, see
70// utils-a64.cc for more info on RegisterDump.
71//
72// We provide some helper assert to handle common cases:
73//
74//   ASSERT_EQUAL_32(int32_t, int_32t)
75//   ASSERT_EQUAL_FP32(float, float)
76//   ASSERT_EQUAL_32(int32_t, W register)
77//   ASSERT_EQUAL_FP32(float, S register)
78//   ASSERT_EQUAL_64(int64_t, int_64t)
79//   ASSERT_EQUAL_FP64(double, double)
80//   ASSERT_EQUAL_64(int64_t, X register)
81//   ASSERT_EQUAL_64(X register, X register)
82//   ASSERT_EQUAL_FP64(double, D register)
83//
84// e.g. ASSERT_EQUAL_64(0.5, d30);
85//
86// If more advanced computation is required before the assert then access the
87// RegisterDump named core directly:
88//
89//   ASSERT_EQUAL_64(0x1234, core->reg_x0() & 0xffff);
90
91
92#define __ masm.
93#define TEST(name)  TEST_(ASM_##name)
94
95#define BUF_SIZE (4096)
96
97#ifdef VIXL_INCLUDE_SIMULATOR
98// Run tests with the simulator.
99
100#define SETUP()                                                                \
101  MacroAssembler masm(BUF_SIZE);                                               \
102  SETUP_COMMON()
103
104#define SETUP_CUSTOM(size, pic)                                                \
105  byte* buf = new byte[size + BUF_SIZE];                                       \
106  MacroAssembler masm(buf, size + BUF_SIZE, pic);                              \
107  SETUP_COMMON()
108
109#define SETUP_COMMON()                                                         \
110  masm.SetAllowSimulatorInstructions(true);                                    \
111  Decoder decoder;                                                             \
112  Simulator* simulator = Test::run_debugger() ? new Debugger(&decoder)         \
113                                              : new Simulator(&decoder);       \
114  simulator->set_coloured_trace(Test::coloured_trace());                       \
115  simulator->set_instruction_stats(Test::instruction_stats());                 \
116  RegisterDump core
117
118// This is a convenience macro to avoid creating a scope for every assembler
119// function called. It will still assert the buffer hasn't been exceeded.
120#define ALLOW_ASM()                                                            \
121  CodeBufferCheckScope guard(&masm, masm.BufferCapacity())
122
123#define START()                                                                \
124  masm.Reset();                                                                \
125  simulator->ResetState();                                                     \
126  __ PushCalleeSavedRegisters();                                               \
127  if (Test::trace_reg()) {                                                     \
128    __ Trace(LOG_STATE, TRACE_ENABLE);                                         \
129  }                                                                            \
130  if (Test::trace_write()) {                                                   \
131    __ Trace(LOG_WRITE, TRACE_ENABLE);                                         \
132  }                                                                            \
133  if (Test::trace_sim()) {                                                     \
134    __ Trace(LOG_DISASM, TRACE_ENABLE);                                        \
135  }                                                                            \
136  if (Test::instruction_stats()) {                                             \
137    __ EnableInstrumentation();                                                \
138  }
139
140#define END()                                                                  \
141  if (Test::instruction_stats()) {                                             \
142    __ DisableInstrumentation();                                               \
143  }                                                                            \
144  __ Trace(LOG_ALL, TRACE_DISABLE);                                            \
145  core.Dump(&masm);                                                            \
146  __ PopCalleeSavedRegisters();                                                \
147  __ Ret();                                                                    \
148  masm.FinalizeCode()
149
150#define RUN()                                                                  \
151  simulator->RunFrom(masm.GetStartAddress<Instruction*>())
152
153#define TEARDOWN() TEARDOWN_COMMON()
154
155#define TEARDOWN_CUSTOM()                                                      \
156  delete[] buf;                                                                \
157  TEARDOWN_COMMON()
158
159#define TEARDOWN_COMMON()                                                      \
160  delete simulator;
161
162#else  // ifdef VIXL_INCLUDE_SIMULATOR.
163// Run the test on real hardware or models.
164#define SETUP()                                                                \
165  MacroAssembler masm(BUF_SIZE);                                               \
166  SETUP_COMMON()
167
168#define SETUP_CUSTOM(size, pic)                                                \
169  byte* buf = new byte[size + BUF_SIZE];                                       \
170  MacroAssembler masm(buf, size + BUF_SIZE, pic);                              \
171  SETUP_COMMON()
172
173#define SETUP_COMMON()                                                         \
174  masm.SetAllowSimulatorInstructions(false);                                   \
175  RegisterDump core;                                                           \
176  CPU::SetUp()
177
178// This is a convenience macro to avoid creating a scope for every assembler
179// function called. It will still assert the buffer hasn't been exceeded.
180#define ALLOW_ASM()                                                            \
181  CodeBufferCheckScope guard(&masm, masm.BufferCapacity())
182
183#define START()                                                                \
184  masm.Reset();                                                                \
185  __ PushCalleeSavedRegisters()
186
187#define END()                                                                  \
188  core.Dump(&masm);                                                            \
189  __ PopCalleeSavedRegisters();                                                \
190  __ Ret();                                                                    \
191  masm.FinalizeCode()
192
193#define RUN()                                                                  \
194  {                                                                            \
195    byte* buffer_start = masm.GetStartAddress<byte*>();                        \
196    size_t buffer_length = masm.CursorOffset();                                \
197    void (*test_function)(void);                                               \
198                                                                               \
199    CPU::EnsureIAndDCacheCoherency(buffer_start, buffer_length);               \
200    VIXL_STATIC_ASSERT(sizeof(buffer_start) == sizeof(test_function));         \
201    memcpy(&test_function, &buffer_start, sizeof(buffer_start));               \
202    test_function();                                                           \
203  }
204
205#define TEARDOWN()
206
207#define TEARDOWN_CUSTOM()                                                      \
208  delete[] buf;                                                                \
209
210#endif  // ifdef VIXL_INCLUDE_SIMULATOR.
211
212#define ASSERT_EQUAL_NZCV(expected)                                            \
213  assert(EqualNzcv(expected, core.flags_nzcv()))
214
215#define ASSERT_EQUAL_REGISTERS(expected)                                       \
216  assert(EqualRegisters(&expected, &core))
217
218#define ASSERT_EQUAL_32(expected, result)                                      \
219  assert(Equal32(static_cast<uint32_t>(expected), &core, result))
220
221#define ASSERT_EQUAL_FP32(expected, result)                                    \
222  assert(EqualFP32(expected, &core, result))
223
224#define ASSERT_EQUAL_64(expected, result)                                      \
225  assert(Equal64(expected, &core, result))
226
227#define ASSERT_EQUAL_FP64(expected, result)                                    \
228  assert(EqualFP64(expected, &core, result))
229
230#define ASSERT_EQUAL_128(expected_h, expected_l, result)                       \
231  assert(Equal128(expected_h, expected_l, &core, result))
232
233#define ASSERT_LITERAL_POOL_SIZE(expected)                                     \
234  assert((expected + kInstructionSize) == (masm.LiteralPoolSize()))
235
236
237TEST(stack_ops) {
238  SETUP();
239
240  START();
241  // save sp.
242  __ Mov(x29, sp);
243
244  // Set the sp to a known value.
245  __ Mov(sp, 0x1004);
246  __ Mov(x0, sp);
247
248  // Add immediate to the sp, and move the result to a normal register.
249  __ Add(sp, sp, 0x50);
250  __ Mov(x1, sp);
251
252  // Add extended to the sp, and move the result to a normal register.
253  __ Mov(x17, 0xfff);
254  __ Add(sp, sp, Operand(x17, SXTB));
255  __ Mov(x2, sp);
256
257  // Create an sp using a logical instruction, and move to normal register.
258  __ Orr(sp, xzr, 0x1fff);
259  __ Mov(x3, sp);
260
261  // Write wsp using a logical instruction.
262  __ Orr(wsp, wzr, 0xfffffff8);
263  __ Mov(x4, sp);
264
265  // Write sp, and read back wsp.
266  __ Orr(sp, xzr, 0xfffffff8);
267  __ Mov(w5, wsp);
268
269  //  restore sp.
270  __ Mov(sp, x29);
271  END();
272
273  RUN();
274
275  ASSERT_EQUAL_64(0x1004, x0);
276  ASSERT_EQUAL_64(0x1054, x1);
277  ASSERT_EQUAL_64(0x1053, x2);
278  ASSERT_EQUAL_64(0x1fff, x3);
279  ASSERT_EQUAL_64(0xfffffff8, x4);
280  ASSERT_EQUAL_64(0xfffffff8, x5);
281
282  TEARDOWN();
283}
284
285
286TEST(mvn) {
287  SETUP();
288
289  START();
290  __ Mvn(w0, 0xfff);
291  __ Mvn(x1, 0xfff);
292  __ Mvn(w2, Operand(w0, LSL, 1));
293  __ Mvn(x3, Operand(x1, LSL, 2));
294  __ Mvn(w4, Operand(w0, LSR, 3));
295  __ Mvn(x5, Operand(x1, LSR, 4));
296  __ Mvn(w6, Operand(w0, ASR, 11));
297  __ Mvn(x7, Operand(x1, ASR, 12));
298  __ Mvn(w8, Operand(w0, ROR, 13));
299  __ Mvn(x9, Operand(x1, ROR, 14));
300  __ Mvn(w10, Operand(w2, UXTB));
301  __ Mvn(x11, Operand(x2, SXTB, 1));
302  __ Mvn(w12, Operand(w2, UXTH, 2));
303  __ Mvn(x13, Operand(x2, SXTH, 3));
304  __ Mvn(x14, Operand(w2, UXTW, 4));
305  __ Mvn(x15, Operand(w2, SXTW, 4));
306  END();
307
308  RUN();
309
310  ASSERT_EQUAL_64(0xfffff000, x0);
311  ASSERT_EQUAL_64(0xfffffffffffff000, x1);
312  ASSERT_EQUAL_64(0x00001fff, x2);
313  ASSERT_EQUAL_64(0x0000000000003fff, x3);
314  ASSERT_EQUAL_64(0xe00001ff, x4);
315  ASSERT_EQUAL_64(0xf0000000000000ff, x5);
316  ASSERT_EQUAL_64(0x00000001, x6);
317  ASSERT_EQUAL_64(0x0000000000000000, x7);
318  ASSERT_EQUAL_64(0x7ff80000, x8);
319  ASSERT_EQUAL_64(0x3ffc000000000000, x9);
320  ASSERT_EQUAL_64(0xffffff00, x10);
321  ASSERT_EQUAL_64(0x0000000000000001, x11);
322  ASSERT_EQUAL_64(0xffff8003, x12);
323  ASSERT_EQUAL_64(0xffffffffffff0007, x13);
324  ASSERT_EQUAL_64(0xfffffffffffe000f, x14);
325  ASSERT_EQUAL_64(0xfffffffffffe000f, x15);
326
327  TEARDOWN();
328}
329
330
331TEST(mov_imm_w) {
332  SETUP();
333
334  START();
335  __ Mov(w0, 0xffffffff);
336  __ Mov(w1, 0xffff1234);
337  __ Mov(w2, 0x1234ffff);
338  __ Mov(w3, 0x00000000);
339  __ Mov(w4, 0x00001234);
340  __ Mov(w5, 0x12340000);
341  __ Mov(w6, 0x12345678);
342  __ Mov(w7, (int32_t)0x80000000);
343  __ Mov(w8, (int32_t)0xffff0000);
344  __ Mov(w9, kWMinInt);
345  END();
346
347  RUN();
348
349  ASSERT_EQUAL_64(0xffffffff, x0);
350  ASSERT_EQUAL_64(0xffff1234, x1);
351  ASSERT_EQUAL_64(0x1234ffff, x2);
352  ASSERT_EQUAL_64(0x00000000, x3);
353  ASSERT_EQUAL_64(0x00001234, x4);
354  ASSERT_EQUAL_64(0x12340000, x5);
355  ASSERT_EQUAL_64(0x12345678, x6);
356  ASSERT_EQUAL_64(0x80000000, x7);
357  ASSERT_EQUAL_64(0xffff0000, x8);
358  ASSERT_EQUAL_32(kWMinInt, w9);
359
360  TEARDOWN();
361}
362
363
364TEST(mov_imm_x) {
365  SETUP();
366
367  START();
368  __ Mov(x0, 0xffffffffffffffff);
369  __ Mov(x1, 0xffffffffffff1234);
370  __ Mov(x2, 0xffffffff12345678);
371  __ Mov(x3, 0xffff1234ffff5678);
372  __ Mov(x4, 0x1234ffffffff5678);
373  __ Mov(x5, 0x1234ffff5678ffff);
374  __ Mov(x6, 0x12345678ffffffff);
375  __ Mov(x7, 0x1234ffffffffffff);
376  __ Mov(x8, 0x123456789abcffff);
377  __ Mov(x9, 0x12345678ffff9abc);
378  __ Mov(x10, 0x1234ffff56789abc);
379  __ Mov(x11, 0xffff123456789abc);
380  __ Mov(x12, 0x0000000000000000);
381  __ Mov(x13, 0x0000000000001234);
382  __ Mov(x14, 0x0000000012345678);
383  __ Mov(x15, 0x0000123400005678);
384  __ Mov(x18, 0x1234000000005678);
385  __ Mov(x19, 0x1234000056780000);
386  __ Mov(x20, 0x1234567800000000);
387  __ Mov(x21, 0x1234000000000000);
388  __ Mov(x22, 0x123456789abc0000);
389  __ Mov(x23, 0x1234567800009abc);
390  __ Mov(x24, 0x1234000056789abc);
391  __ Mov(x25, 0x0000123456789abc);
392  __ Mov(x26, 0x123456789abcdef0);
393  __ Mov(x27, 0xffff000000000001);
394  __ Mov(x28, 0x8000ffff00000000);
395  END();
396
397  RUN();
398
399  ASSERT_EQUAL_64(0xffffffffffff1234, x1);
400  ASSERT_EQUAL_64(0xffffffff12345678, x2);
401  ASSERT_EQUAL_64(0xffff1234ffff5678, x3);
402  ASSERT_EQUAL_64(0x1234ffffffff5678, x4);
403  ASSERT_EQUAL_64(0x1234ffff5678ffff, x5);
404  ASSERT_EQUAL_64(0x12345678ffffffff, x6);
405  ASSERT_EQUAL_64(0x1234ffffffffffff, x7);
406  ASSERT_EQUAL_64(0x123456789abcffff, x8);
407  ASSERT_EQUAL_64(0x12345678ffff9abc, x9);
408  ASSERT_EQUAL_64(0x1234ffff56789abc, x10);
409  ASSERT_EQUAL_64(0xffff123456789abc, x11);
410  ASSERT_EQUAL_64(0x0000000000000000, x12);
411  ASSERT_EQUAL_64(0x0000000000001234, x13);
412  ASSERT_EQUAL_64(0x0000000012345678, x14);
413  ASSERT_EQUAL_64(0x0000123400005678, x15);
414  ASSERT_EQUAL_64(0x1234000000005678, x18);
415  ASSERT_EQUAL_64(0x1234000056780000, x19);
416  ASSERT_EQUAL_64(0x1234567800000000, x20);
417  ASSERT_EQUAL_64(0x1234000000000000, x21);
418  ASSERT_EQUAL_64(0x123456789abc0000, x22);
419  ASSERT_EQUAL_64(0x1234567800009abc, x23);
420  ASSERT_EQUAL_64(0x1234000056789abc, x24);
421  ASSERT_EQUAL_64(0x0000123456789abc, x25);
422  ASSERT_EQUAL_64(0x123456789abcdef0, x26);
423  ASSERT_EQUAL_64(0xffff000000000001, x27);
424  ASSERT_EQUAL_64(0x8000ffff00000000, x28);
425
426
427  TEARDOWN();
428}
429
430
431TEST(mov) {
432  SETUP();
433  ALLOW_ASM();
434
435  START();
436  __ Mov(x0, 0xffffffffffffffff);
437  __ Mov(x1, 0xffffffffffffffff);
438  __ Mov(x2, 0xffffffffffffffff);
439  __ Mov(x3, 0xffffffffffffffff);
440
441  __ Mov(x0, 0x0123456789abcdef);
442
443  __ movz(x1, UINT64_C(0xabcd) << 16);
444  __ movk(x2, UINT64_C(0xabcd) << 32);
445  __ movn(x3, UINT64_C(0xabcd) << 48);
446
447  __ Mov(x4, 0x0123456789abcdef);
448  __ Mov(x5, x4);
449
450  __ Mov(w6, -1);
451
452  // Test that moves back to the same register have the desired effect. This
453  // is a no-op for X registers, and a truncation for W registers.
454  __ Mov(x7, 0x0123456789abcdef);
455  __ Mov(x7, x7);
456  __ Mov(x8, 0x0123456789abcdef);
457  __ Mov(w8, w8);
458  __ Mov(x9, 0x0123456789abcdef);
459  __ Mov(x9, Operand(x9));
460  __ Mov(x10, 0x0123456789abcdef);
461  __ Mov(w10, Operand(w10));
462
463  __ Mov(w11, 0xfff);
464  __ Mov(x12, 0xfff);
465  __ Mov(w13, Operand(w11, LSL, 1));
466  __ Mov(x14, Operand(x12, LSL, 2));
467  __ Mov(w15, Operand(w11, LSR, 3));
468  __ Mov(x18, Operand(x12, LSR, 4));
469  __ Mov(w19, Operand(w11, ASR, 11));
470  __ Mov(x20, Operand(x12, ASR, 12));
471  __ Mov(w21, Operand(w11, ROR, 13));
472  __ Mov(x22, Operand(x12, ROR, 14));
473  __ Mov(w23, Operand(w13, UXTB));
474  __ Mov(x24, Operand(x13, SXTB, 1));
475  __ Mov(w25, Operand(w13, UXTH, 2));
476  __ Mov(x26, Operand(x13, SXTH, 3));
477  __ Mov(x27, Operand(w13, UXTW, 4));
478
479  __ Mov(x28, 0x0123456789abcdef);
480  __ Mov(w28, w28, kDiscardForSameWReg);
481  END();
482
483  RUN();
484
485  ASSERT_EQUAL_64(0x0123456789abcdef, x0);
486  ASSERT_EQUAL_64(0x00000000abcd0000, x1);
487  ASSERT_EQUAL_64(0xffffabcdffffffff, x2);
488  ASSERT_EQUAL_64(0x5432ffffffffffff, x3);
489  ASSERT_EQUAL_64(x4, x5);
490  ASSERT_EQUAL_32(-1, w6);
491  ASSERT_EQUAL_64(0x0123456789abcdef, x7);
492  ASSERT_EQUAL_32(0x89abcdef, w8);
493  ASSERT_EQUAL_64(0x0123456789abcdef, x9);
494  ASSERT_EQUAL_32(0x89abcdef, w10);
495  ASSERT_EQUAL_64(0x00000fff, x11);
496  ASSERT_EQUAL_64(0x0000000000000fff, x12);
497  ASSERT_EQUAL_64(0x00001ffe, x13);
498  ASSERT_EQUAL_64(0x0000000000003ffc, x14);
499  ASSERT_EQUAL_64(0x000001ff, x15);
500  ASSERT_EQUAL_64(0x00000000000000ff, x18);
501  ASSERT_EQUAL_64(0x00000001, x19);
502  ASSERT_EQUAL_64(0x0000000000000000, x20);
503  ASSERT_EQUAL_64(0x7ff80000, x21);
504  ASSERT_EQUAL_64(0x3ffc000000000000, x22);
505  ASSERT_EQUAL_64(0x000000fe, x23);
506  ASSERT_EQUAL_64(0xfffffffffffffffc, x24);
507  ASSERT_EQUAL_64(0x00007ff8, x25);
508  ASSERT_EQUAL_64(0x000000000000fff0, x26);
509  ASSERT_EQUAL_64(0x000000000001ffe0, x27);
510  ASSERT_EQUAL_64(0x0123456789abcdef, x28);
511
512  TEARDOWN();
513}
514
515
516TEST(orr) {
517  SETUP();
518
519  START();
520  __ Mov(x0, 0xf0f0);
521  __ Mov(x1, 0xf00000ff);
522
523  __ Orr(x2, x0, Operand(x1));
524  __ Orr(w3, w0, Operand(w1, LSL, 28));
525  __ Orr(x4, x0, Operand(x1, LSL, 32));
526  __ Orr(x5, x0, Operand(x1, LSR, 4));
527  __ Orr(w6, w0, Operand(w1, ASR, 4));
528  __ Orr(x7, x0, Operand(x1, ASR, 4));
529  __ Orr(w8, w0, Operand(w1, ROR, 12));
530  __ Orr(x9, x0, Operand(x1, ROR, 12));
531  __ Orr(w10, w0, 0xf);
532  __ Orr(x11, x0, 0xf0000000f0000000);
533  END();
534
535  RUN();
536
537  ASSERT_EQUAL_64(0x00000000f000f0ff, x2);
538  ASSERT_EQUAL_64(0xf000f0f0, x3);
539  ASSERT_EQUAL_64(0xf00000ff0000f0f0, x4);
540  ASSERT_EQUAL_64(0x000000000f00f0ff, x5);
541  ASSERT_EQUAL_64(0xff00f0ff, x6);
542  ASSERT_EQUAL_64(0x000000000f00f0ff, x7);
543  ASSERT_EQUAL_64(0x0ffff0f0, x8);
544  ASSERT_EQUAL_64(0x0ff00000000ff0f0, x9);
545  ASSERT_EQUAL_64(0x0000f0ff, x10);
546  ASSERT_EQUAL_64(0xf0000000f000f0f0, x11);
547
548  TEARDOWN();
549}
550
551
552TEST(orr_extend) {
553  SETUP();
554
555  START();
556  __ Mov(x0, 1);
557  __ Mov(x1, 0x8000000080008080);
558  __ Orr(w6, w0, Operand(w1, UXTB));
559  __ Orr(x7, x0, Operand(x1, UXTH, 1));
560  __ Orr(w8, w0, Operand(w1, UXTW, 2));
561  __ Orr(x9, x0, Operand(x1, UXTX, 3));
562  __ Orr(w10, w0, Operand(w1, SXTB));
563  __ Orr(x11, x0, Operand(x1, SXTH, 1));
564  __ Orr(x12, x0, Operand(x1, SXTW, 2));
565  __ Orr(x13, x0, Operand(x1, SXTX, 3));
566  END();
567
568  RUN();
569
570  ASSERT_EQUAL_64(0x00000081, x6);
571  ASSERT_EQUAL_64(0x0000000000010101, x7);
572  ASSERT_EQUAL_64(0x00020201, x8);
573  ASSERT_EQUAL_64(0x0000000400040401, x9);
574  ASSERT_EQUAL_64(0xffffff81, x10);
575  ASSERT_EQUAL_64(0xffffffffffff0101, x11);
576  ASSERT_EQUAL_64(0xfffffffe00020201, x12);
577  ASSERT_EQUAL_64(0x0000000400040401, x13);
578
579  TEARDOWN();
580}
581
582
583TEST(bitwise_wide_imm) {
584  SETUP();
585
586  START();
587  __ Mov(x0, 0);
588  __ Mov(x1, 0xf0f0f0f0f0f0f0f0);
589
590  __ Orr(x10, x0, 0x1234567890abcdef);
591  __ Orr(w11, w1, 0x90abcdef);
592
593  __ Orr(w12, w0, kWMinInt);
594  __ Eor(w13, w0, kWMinInt);
595  END();
596
597  RUN();
598
599  ASSERT_EQUAL_64(0, x0);
600  ASSERT_EQUAL_64(0xf0f0f0f0f0f0f0f0, x1);
601  ASSERT_EQUAL_64(0x1234567890abcdef, x10);
602  ASSERT_EQUAL_64(0x00000000f0fbfdff, x11);
603  ASSERT_EQUAL_32(kWMinInt, w12);
604  ASSERT_EQUAL_32(kWMinInt, w13);
605
606  TEARDOWN();
607}
608
609
610TEST(orn) {
611  SETUP();
612
613  START();
614  __ Mov(x0, 0xf0f0);
615  __ Mov(x1, 0xf00000ff);
616
617  __ Orn(x2, x0, Operand(x1));
618  __ Orn(w3, w0, Operand(w1, LSL, 4));
619  __ Orn(x4, x0, Operand(x1, LSL, 4));
620  __ Orn(x5, x0, Operand(x1, LSR, 1));
621  __ Orn(w6, w0, Operand(w1, ASR, 1));
622  __ Orn(x7, x0, Operand(x1, ASR, 1));
623  __ Orn(w8, w0, Operand(w1, ROR, 16));
624  __ Orn(x9, x0, Operand(x1, ROR, 16));
625  __ Orn(w10, w0, 0x0000ffff);
626  __ Orn(x11, x0, 0x0000ffff0000ffff);
627  END();
628
629  RUN();
630
631  ASSERT_EQUAL_64(0xffffffff0ffffff0, x2);
632  ASSERT_EQUAL_64(0xfffff0ff, x3);
633  ASSERT_EQUAL_64(0xfffffff0fffff0ff, x4);
634  ASSERT_EQUAL_64(0xffffffff87fffff0, x5);
635  ASSERT_EQUAL_64(0x07fffff0, x6);
636  ASSERT_EQUAL_64(0xffffffff87fffff0, x7);
637  ASSERT_EQUAL_64(0xff00ffff, x8);
638  ASSERT_EQUAL_64(0xff00ffffffffffff, x9);
639  ASSERT_EQUAL_64(0xfffff0f0, x10);
640  ASSERT_EQUAL_64(0xffff0000fffff0f0, x11);
641
642  TEARDOWN();
643}
644
645
646TEST(orn_extend) {
647  SETUP();
648
649  START();
650  __ Mov(x0, 1);
651  __ Mov(x1, 0x8000000080008081);
652  __ Orn(w6, w0, Operand(w1, UXTB));
653  __ Orn(x7, x0, Operand(x1, UXTH, 1));
654  __ Orn(w8, w0, Operand(w1, UXTW, 2));
655  __ Orn(x9, x0, Operand(x1, UXTX, 3));
656  __ Orn(w10, w0, Operand(w1, SXTB));
657  __ Orn(x11, x0, Operand(x1, SXTH, 1));
658  __ Orn(x12, x0, Operand(x1, SXTW, 2));
659  __ Orn(x13, x0, Operand(x1, SXTX, 3));
660  END();
661
662  RUN();
663
664  ASSERT_EQUAL_64(0xffffff7f, x6);
665  ASSERT_EQUAL_64(0xfffffffffffefefd, x7);
666  ASSERT_EQUAL_64(0xfffdfdfb, x8);
667  ASSERT_EQUAL_64(0xfffffffbfffbfbf7, x9);
668  ASSERT_EQUAL_64(0x0000007f, x10);
669  ASSERT_EQUAL_64(0x000000000000fefd, x11);
670  ASSERT_EQUAL_64(0x00000001fffdfdfb, x12);
671  ASSERT_EQUAL_64(0xfffffffbfffbfbf7, x13);
672
673  TEARDOWN();
674}
675
676
677TEST(and_) {
678  SETUP();
679
680  START();
681  __ Mov(x0, 0xfff0);
682  __ Mov(x1, 0xf00000ff);
683
684  __ And(x2, x0, Operand(x1));
685  __ And(w3, w0, Operand(w1, LSL, 4));
686  __ And(x4, x0, Operand(x1, LSL, 4));
687  __ And(x5, x0, Operand(x1, LSR, 1));
688  __ And(w6, w0, Operand(w1, ASR, 20));
689  __ And(x7, x0, Operand(x1, ASR, 20));
690  __ And(w8, w0, Operand(w1, ROR, 28));
691  __ And(x9, x0, Operand(x1, ROR, 28));
692  __ And(w10, w0, Operand(0xff00));
693  __ And(x11, x0, Operand(0xff));
694  END();
695
696  RUN();
697
698  ASSERT_EQUAL_64(0x000000f0, x2);
699  ASSERT_EQUAL_64(0x00000ff0, x3);
700  ASSERT_EQUAL_64(0x00000ff0, x4);
701  ASSERT_EQUAL_64(0x00000070, x5);
702  ASSERT_EQUAL_64(0x0000ff00, x6);
703  ASSERT_EQUAL_64(0x00000f00, x7);
704  ASSERT_EQUAL_64(0x00000ff0, x8);
705  ASSERT_EQUAL_64(0x00000000, x9);
706  ASSERT_EQUAL_64(0x0000ff00, x10);
707  ASSERT_EQUAL_64(0x000000f0, x11);
708
709  TEARDOWN();
710}
711
712
713TEST(and_extend) {
714  SETUP();
715
716  START();
717  __ Mov(x0, 0xffffffffffffffff);
718  __ Mov(x1, 0x8000000080008081);
719  __ And(w6, w0, Operand(w1, UXTB));
720  __ And(x7, x0, Operand(x1, UXTH, 1));
721  __ And(w8, w0, Operand(w1, UXTW, 2));
722  __ And(x9, x0, Operand(x1, UXTX, 3));
723  __ And(w10, w0, Operand(w1, SXTB));
724  __ And(x11, x0, Operand(x1, SXTH, 1));
725  __ And(x12, x0, Operand(x1, SXTW, 2));
726  __ And(x13, x0, Operand(x1, SXTX, 3));
727  END();
728
729  RUN();
730
731  ASSERT_EQUAL_64(0x00000081, x6);
732  ASSERT_EQUAL_64(0x0000000000010102, x7);
733  ASSERT_EQUAL_64(0x00020204, x8);
734  ASSERT_EQUAL_64(0x0000000400040408, x9);
735  ASSERT_EQUAL_64(0xffffff81, x10);
736  ASSERT_EQUAL_64(0xffffffffffff0102, x11);
737  ASSERT_EQUAL_64(0xfffffffe00020204, x12);
738  ASSERT_EQUAL_64(0x0000000400040408, x13);
739
740  TEARDOWN();
741}
742
743
744TEST(ands) {
745  SETUP();
746
747  START();
748  __ Mov(x1, 0xf00000ff);
749  __ Ands(w0, w1, Operand(w1));
750  END();
751
752  RUN();
753
754  ASSERT_EQUAL_NZCV(NFlag);
755  ASSERT_EQUAL_64(0xf00000ff, x0);
756
757  START();
758  __ Mov(x0, 0xfff0);
759  __ Mov(x1, 0xf00000ff);
760  __ Ands(w0, w0, Operand(w1, LSR, 4));
761  END();
762
763  RUN();
764
765  ASSERT_EQUAL_NZCV(ZFlag);
766  ASSERT_EQUAL_64(0x00000000, x0);
767
768  START();
769  __ Mov(x0, 0x8000000000000000);
770  __ Mov(x1, 0x00000001);
771  __ Ands(x0, x0, Operand(x1, ROR, 1));
772  END();
773
774  RUN();
775
776  ASSERT_EQUAL_NZCV(NFlag);
777  ASSERT_EQUAL_64(0x8000000000000000, x0);
778
779  START();
780  __ Mov(x0, 0xfff0);
781  __ Ands(w0, w0, Operand(0xf));
782  END();
783
784  RUN();
785
786  ASSERT_EQUAL_NZCV(ZFlag);
787  ASSERT_EQUAL_64(0x00000000, x0);
788
789  START();
790  __ Mov(x0, 0xff000000);
791  __ Ands(w0, w0, Operand(0x80000000));
792  END();
793
794  RUN();
795
796  ASSERT_EQUAL_NZCV(NFlag);
797  ASSERT_EQUAL_64(0x80000000, x0);
798
799  TEARDOWN();
800}
801
802
803TEST(bic) {
804  SETUP();
805
806  START();
807  __ Mov(x0, 0xfff0);
808  __ Mov(x1, 0xf00000ff);
809
810  __ Bic(x2, x0, Operand(x1));
811  __ Bic(w3, w0, Operand(w1, LSL, 4));
812  __ Bic(x4, x0, Operand(x1, LSL, 4));
813  __ Bic(x5, x0, Operand(x1, LSR, 1));
814  __ Bic(w6, w0, Operand(w1, ASR, 20));
815  __ Bic(x7, x0, Operand(x1, ASR, 20));
816  __ Bic(w8, w0, Operand(w1, ROR, 28));
817  __ Bic(x9, x0, Operand(x1, ROR, 24));
818  __ Bic(x10, x0, Operand(0x1f));
819  __ Bic(x11, x0, Operand(0x100));
820
821  // Test bic into sp when the constant cannot be encoded in the immediate
822  // field.
823  // Use x20 to preserve sp. We check for the result via x21 because the
824  // test infrastructure requires that sp be restored to its original value.
825  __ Mov(x20, sp);
826  __ Mov(x0, 0xffffff);
827  __ Bic(sp, x0, Operand(0xabcdef));
828  __ Mov(x21, sp);
829  __ Mov(sp, x20);
830  END();
831
832  RUN();
833
834  ASSERT_EQUAL_64(0x0000ff00, x2);
835  ASSERT_EQUAL_64(0x0000f000, x3);
836  ASSERT_EQUAL_64(0x0000f000, x4);
837  ASSERT_EQUAL_64(0x0000ff80, x5);
838  ASSERT_EQUAL_64(0x000000f0, x6);
839  ASSERT_EQUAL_64(0x0000f0f0, x7);
840  ASSERT_EQUAL_64(0x0000f000, x8);
841  ASSERT_EQUAL_64(0x0000ff00, x9);
842  ASSERT_EQUAL_64(0x0000ffe0, x10);
843  ASSERT_EQUAL_64(0x0000fef0, x11);
844
845  ASSERT_EQUAL_64(0x543210, x21);
846
847  TEARDOWN();
848}
849
850
851TEST(bic_extend) {
852  SETUP();
853
854  START();
855  __ Mov(x0, 0xffffffffffffffff);
856  __ Mov(x1, 0x8000000080008081);
857  __ Bic(w6, w0, Operand(w1, UXTB));
858  __ Bic(x7, x0, Operand(x1, UXTH, 1));
859  __ Bic(w8, w0, Operand(w1, UXTW, 2));
860  __ Bic(x9, x0, Operand(x1, UXTX, 3));
861  __ Bic(w10, w0, Operand(w1, SXTB));
862  __ Bic(x11, x0, Operand(x1, SXTH, 1));
863  __ Bic(x12, x0, Operand(x1, SXTW, 2));
864  __ Bic(x13, x0, Operand(x1, SXTX, 3));
865  END();
866
867  RUN();
868
869  ASSERT_EQUAL_64(0xffffff7e, x6);
870  ASSERT_EQUAL_64(0xfffffffffffefefd, x7);
871  ASSERT_EQUAL_64(0xfffdfdfb, x8);
872  ASSERT_EQUAL_64(0xfffffffbfffbfbf7, x9);
873  ASSERT_EQUAL_64(0x0000007e, x10);
874  ASSERT_EQUAL_64(0x000000000000fefd, x11);
875  ASSERT_EQUAL_64(0x00000001fffdfdfb, x12);
876  ASSERT_EQUAL_64(0xfffffffbfffbfbf7, x13);
877
878  TEARDOWN();
879}
880
881
882TEST(bics) {
883  SETUP();
884
885  START();
886  __ Mov(x1, 0xffff);
887  __ Bics(w0, w1, Operand(w1));
888  END();
889
890  RUN();
891
892  ASSERT_EQUAL_NZCV(ZFlag);
893  ASSERT_EQUAL_64(0x00000000, x0);
894
895  START();
896  __ Mov(x0, 0xffffffff);
897  __ Bics(w0, w0, Operand(w0, LSR, 1));
898  END();
899
900  RUN();
901
902  ASSERT_EQUAL_NZCV(NFlag);
903  ASSERT_EQUAL_64(0x80000000, x0);
904
905  START();
906  __ Mov(x0, 0x8000000000000000);
907  __ Mov(x1, 0x00000001);
908  __ Bics(x0, x0, Operand(x1, ROR, 1));
909  END();
910
911  RUN();
912
913  ASSERT_EQUAL_NZCV(ZFlag);
914  ASSERT_EQUAL_64(0x00000000, x0);
915
916  START();
917  __ Mov(x0, 0xffffffffffffffff);
918  __ Bics(x0, x0, 0x7fffffffffffffff);
919  END();
920
921  RUN();
922
923  ASSERT_EQUAL_NZCV(NFlag);
924  ASSERT_EQUAL_64(0x8000000000000000, x0);
925
926  START();
927  __ Mov(w0, 0xffff0000);
928  __ Bics(w0, w0, 0xfffffff0);
929  END();
930
931  RUN();
932
933  ASSERT_EQUAL_NZCV(ZFlag);
934  ASSERT_EQUAL_64(0x00000000, x0);
935
936  TEARDOWN();
937}
938
939
940TEST(eor) {
941  SETUP();
942
943  START();
944  __ Mov(x0, 0xfff0);
945  __ Mov(x1, 0xf00000ff);
946
947  __ Eor(x2, x0, Operand(x1));
948  __ Eor(w3, w0, Operand(w1, LSL, 4));
949  __ Eor(x4, x0, Operand(x1, LSL, 4));
950  __ Eor(x5, x0, Operand(x1, LSR, 1));
951  __ Eor(w6, w0, Operand(w1, ASR, 20));
952  __ Eor(x7, x0, Operand(x1, ASR, 20));
953  __ Eor(w8, w0, Operand(w1, ROR, 28));
954  __ Eor(x9, x0, Operand(x1, ROR, 28));
955  __ Eor(w10, w0, 0xff00ff00);
956  __ Eor(x11, x0, 0xff00ff00ff00ff00);
957  END();
958
959  RUN();
960
961  ASSERT_EQUAL_64(0x00000000f000ff0f, x2);
962  ASSERT_EQUAL_64(0x0000f000, x3);
963  ASSERT_EQUAL_64(0x0000000f0000f000, x4);
964  ASSERT_EQUAL_64(0x000000007800ff8f, x5);
965  ASSERT_EQUAL_64(0xffff00f0, x6);
966  ASSERT_EQUAL_64(0x000000000000f0f0, x7);
967  ASSERT_EQUAL_64(0x0000f00f, x8);
968  ASSERT_EQUAL_64(0x00000ff00000ffff, x9);
969  ASSERT_EQUAL_64(0xff0000f0, x10);
970  ASSERT_EQUAL_64(0xff00ff00ff0000f0, x11);
971
972  TEARDOWN();
973}
974
975TEST(eor_extend) {
976  SETUP();
977
978  START();
979  __ Mov(x0, 0x1111111111111111);
980  __ Mov(x1, 0x8000000080008081);
981  __ Eor(w6, w0, Operand(w1, UXTB));
982  __ Eor(x7, x0, Operand(x1, UXTH, 1));
983  __ Eor(w8, w0, Operand(w1, UXTW, 2));
984  __ Eor(x9, x0, Operand(x1, UXTX, 3));
985  __ Eor(w10, w0, Operand(w1, SXTB));
986  __ Eor(x11, x0, Operand(x1, SXTH, 1));
987  __ Eor(x12, x0, Operand(x1, SXTW, 2));
988  __ Eor(x13, x0, Operand(x1, SXTX, 3));
989  END();
990
991  RUN();
992
993  ASSERT_EQUAL_64(0x11111190, x6);
994  ASSERT_EQUAL_64(0x1111111111101013, x7);
995  ASSERT_EQUAL_64(0x11131315, x8);
996  ASSERT_EQUAL_64(0x1111111511151519, x9);
997  ASSERT_EQUAL_64(0xeeeeee90, x10);
998  ASSERT_EQUAL_64(0xeeeeeeeeeeee1013, x11);
999  ASSERT_EQUAL_64(0xeeeeeeef11131315, x12);
1000  ASSERT_EQUAL_64(0x1111111511151519, x13);
1001
1002  TEARDOWN();
1003}
1004
1005
1006TEST(eon) {
1007  SETUP();
1008
1009  START();
1010  __ Mov(x0, 0xfff0);
1011  __ Mov(x1, 0xf00000ff);
1012
1013  __ Eon(x2, x0, Operand(x1));
1014  __ Eon(w3, w0, Operand(w1, LSL, 4));
1015  __ Eon(x4, x0, Operand(x1, LSL, 4));
1016  __ Eon(x5, x0, Operand(x1, LSR, 1));
1017  __ Eon(w6, w0, Operand(w1, ASR, 20));
1018  __ Eon(x7, x0, Operand(x1, ASR, 20));
1019  __ Eon(w8, w0, Operand(w1, ROR, 28));
1020  __ Eon(x9, x0, Operand(x1, ROR, 28));
1021  __ Eon(w10, w0, 0x03c003c0);
1022  __ Eon(x11, x0, 0x0000100000001000);
1023  END();
1024
1025  RUN();
1026
1027  ASSERT_EQUAL_64(0xffffffff0fff00f0, x2);
1028  ASSERT_EQUAL_64(0xffff0fff, x3);
1029  ASSERT_EQUAL_64(0xfffffff0ffff0fff, x4);
1030  ASSERT_EQUAL_64(0xffffffff87ff0070, x5);
1031  ASSERT_EQUAL_64(0x0000ff0f, x6);
1032  ASSERT_EQUAL_64(0xffffffffffff0f0f, x7);
1033  ASSERT_EQUAL_64(0xffff0ff0, x8);
1034  ASSERT_EQUAL_64(0xfffff00fffff0000, x9);
1035  ASSERT_EQUAL_64(0xfc3f03cf, x10);
1036  ASSERT_EQUAL_64(0xffffefffffff100f, x11);
1037
1038  TEARDOWN();
1039}
1040
1041
1042TEST(eon_extend) {
1043  SETUP();
1044
1045  START();
1046  __ Mov(x0, 0x1111111111111111);
1047  __ Mov(x1, 0x8000000080008081);
1048  __ Eon(w6, w0, Operand(w1, UXTB));
1049  __ Eon(x7, x0, Operand(x1, UXTH, 1));
1050  __ Eon(w8, w0, Operand(w1, UXTW, 2));
1051  __ Eon(x9, x0, Operand(x1, UXTX, 3));
1052  __ Eon(w10, w0, Operand(w1, SXTB));
1053  __ Eon(x11, x0, Operand(x1, SXTH, 1));
1054  __ Eon(x12, x0, Operand(x1, SXTW, 2));
1055  __ Eon(x13, x0, Operand(x1, SXTX, 3));
1056  END();
1057
1058  RUN();
1059
1060  ASSERT_EQUAL_64(0xeeeeee6f, x6);
1061  ASSERT_EQUAL_64(0xeeeeeeeeeeefefec, x7);
1062  ASSERT_EQUAL_64(0xeeececea, x8);
1063  ASSERT_EQUAL_64(0xeeeeeeeaeeeaeae6, x9);
1064  ASSERT_EQUAL_64(0x1111116f, x10);
1065  ASSERT_EQUAL_64(0x111111111111efec, x11);
1066  ASSERT_EQUAL_64(0x11111110eeececea, x12);
1067  ASSERT_EQUAL_64(0xeeeeeeeaeeeaeae6, x13);
1068
1069  TEARDOWN();
1070}
1071
1072
1073TEST(mul) {
1074  SETUP();
1075
1076  START();
1077  __ Mov(x25, 0);
1078  __ Mov(x26, 1);
1079  __ Mov(x18, 0xffffffff);
1080  __ Mov(x19, 0xffffffffffffffff);
1081
1082  __ Mul(w0, w25, w25);
1083  __ Mul(w1, w25, w26);
1084  __ Mul(w2, w26, w18);
1085  __ Mul(w3, w18, w19);
1086  __ Mul(x4, x25, x25);
1087  __ Mul(x5, x26, x18);
1088  __ Mul(x6, x18, x19);
1089  __ Mul(x7, x19, x19);
1090  __ Smull(x8, w26, w18);
1091  __ Smull(x9, w18, w18);
1092  __ Smull(x10, w19, w19);
1093  __ Mneg(w11, w25, w25);
1094  __ Mneg(w12, w25, w26);
1095  __ Mneg(w13, w26, w18);
1096  __ Mneg(w14, w18, w19);
1097  __ Mneg(x20, x25, x25);
1098  __ Mneg(x21, x26, x18);
1099  __ Mneg(x22, x18, x19);
1100  __ Mneg(x23, x19, x19);
1101  END();
1102
1103  RUN();
1104
1105  ASSERT_EQUAL_64(0, x0);
1106  ASSERT_EQUAL_64(0, x1);
1107  ASSERT_EQUAL_64(0xffffffff, x2);
1108  ASSERT_EQUAL_64(1, x3);
1109  ASSERT_EQUAL_64(0, x4);
1110  ASSERT_EQUAL_64(0xffffffff, x5);
1111  ASSERT_EQUAL_64(0xffffffff00000001, x6);
1112  ASSERT_EQUAL_64(1, x7);
1113  ASSERT_EQUAL_64(0xffffffffffffffff, x8);
1114  ASSERT_EQUAL_64(1, x9);
1115  ASSERT_EQUAL_64(1, x10);
1116  ASSERT_EQUAL_64(0, x11);
1117  ASSERT_EQUAL_64(0, x12);
1118  ASSERT_EQUAL_64(1, x13);
1119  ASSERT_EQUAL_64(0xffffffff, x14);
1120  ASSERT_EQUAL_64(0, x20);
1121  ASSERT_EQUAL_64(0xffffffff00000001, x21);
1122  ASSERT_EQUAL_64(0xffffffff, x22);
1123  ASSERT_EQUAL_64(0xffffffffffffffff, x23);
1124
1125  TEARDOWN();
1126}
1127
1128
1129static void SmullHelper(int64_t expected, int64_t a, int64_t b) {
1130  SETUP();
1131  START();
1132  __ Mov(w0, a);
1133  __ Mov(w1, b);
1134  __ Smull(x2, w0, w1);
1135  END();
1136  RUN();
1137  ASSERT_EQUAL_64(expected, x2);
1138  TEARDOWN();
1139}
1140
1141
1142TEST(smull) {
1143  SmullHelper(0, 0, 0);
1144  SmullHelper(1, 1, 1);
1145  SmullHelper(-1, -1, 1);
1146  SmullHelper(1, -1, -1);
1147  SmullHelper(0xffffffff80000000, 0x80000000, 1);
1148  SmullHelper(0x0000000080000000, 0x00010000, 0x00008000);
1149}
1150
1151
1152TEST(madd) {
1153  SETUP();
1154
1155  START();
1156  __ Mov(x16, 0);
1157  __ Mov(x17, 1);
1158  __ Mov(x18, 0xffffffff);
1159  __ Mov(x19, 0xffffffffffffffff);
1160
1161  __ Madd(w0, w16, w16, w16);
1162  __ Madd(w1, w16, w16, w17);
1163  __ Madd(w2, w16, w16, w18);
1164  __ Madd(w3, w16, w16, w19);
1165  __ Madd(w4, w16, w17, w17);
1166  __ Madd(w5, w17, w17, w18);
1167  __ Madd(w6, w17, w17, w19);
1168  __ Madd(w7, w17, w18, w16);
1169  __ Madd(w8, w17, w18, w18);
1170  __ Madd(w9, w18, w18, w17);
1171  __ Madd(w10, w18, w19, w18);
1172  __ Madd(w11, w19, w19, w19);
1173
1174  __ Madd(x12, x16, x16, x16);
1175  __ Madd(x13, x16, x16, x17);
1176  __ Madd(x14, x16, x16, x18);
1177  __ Madd(x15, x16, x16, x19);
1178  __ Madd(x20, x16, x17, x17);
1179  __ Madd(x21, x17, x17, x18);
1180  __ Madd(x22, x17, x17, x19);
1181  __ Madd(x23, x17, x18, x16);
1182  __ Madd(x24, x17, x18, x18);
1183  __ Madd(x25, x18, x18, x17);
1184  __ Madd(x26, x18, x19, x18);
1185  __ Madd(x27, x19, x19, x19);
1186
1187  END();
1188
1189  RUN();
1190
1191  ASSERT_EQUAL_64(0, x0);
1192  ASSERT_EQUAL_64(1, x1);
1193  ASSERT_EQUAL_64(0xffffffff, x2);
1194  ASSERT_EQUAL_64(0xffffffff, x3);
1195  ASSERT_EQUAL_64(1, x4);
1196  ASSERT_EQUAL_64(0, x5);
1197  ASSERT_EQUAL_64(0, x6);
1198  ASSERT_EQUAL_64(0xffffffff, x7);
1199  ASSERT_EQUAL_64(0xfffffffe, x8);
1200  ASSERT_EQUAL_64(2, x9);
1201  ASSERT_EQUAL_64(0, x10);
1202  ASSERT_EQUAL_64(0, x11);
1203
1204  ASSERT_EQUAL_64(0, x12);
1205  ASSERT_EQUAL_64(1, x13);
1206  ASSERT_EQUAL_64(0x00000000ffffffff, x14);
1207  ASSERT_EQUAL_64(0xffffffffffffffff, x15);
1208  ASSERT_EQUAL_64(1, x20);
1209  ASSERT_EQUAL_64(0x0000000100000000, x21);
1210  ASSERT_EQUAL_64(0, x22);
1211  ASSERT_EQUAL_64(0x00000000ffffffff, x23);
1212  ASSERT_EQUAL_64(0x00000001fffffffe, x24);
1213  ASSERT_EQUAL_64(0xfffffffe00000002, x25);
1214  ASSERT_EQUAL_64(0, x26);
1215  ASSERT_EQUAL_64(0, x27);
1216
1217  TEARDOWN();
1218}
1219
1220
1221TEST(msub) {
1222  SETUP();
1223
1224  START();
1225  __ Mov(x16, 0);
1226  __ Mov(x17, 1);
1227  __ Mov(x18, 0xffffffff);
1228  __ Mov(x19, 0xffffffffffffffff);
1229
1230  __ Msub(w0, w16, w16, w16);
1231  __ Msub(w1, w16, w16, w17);
1232  __ Msub(w2, w16, w16, w18);
1233  __ Msub(w3, w16, w16, w19);
1234  __ Msub(w4, w16, w17, w17);
1235  __ Msub(w5, w17, w17, w18);
1236  __ Msub(w6, w17, w17, w19);
1237  __ Msub(w7, w17, w18, w16);
1238  __ Msub(w8, w17, w18, w18);
1239  __ Msub(w9, w18, w18, w17);
1240  __ Msub(w10, w18, w19, w18);
1241  __ Msub(w11, w19, w19, w19);
1242
1243  __ Msub(x12, x16, x16, x16);
1244  __ Msub(x13, x16, x16, x17);
1245  __ Msub(x14, x16, x16, x18);
1246  __ Msub(x15, x16, x16, x19);
1247  __ Msub(x20, x16, x17, x17);
1248  __ Msub(x21, x17, x17, x18);
1249  __ Msub(x22, x17, x17, x19);
1250  __ Msub(x23, x17, x18, x16);
1251  __ Msub(x24, x17, x18, x18);
1252  __ Msub(x25, x18, x18, x17);
1253  __ Msub(x26, x18, x19, x18);
1254  __ Msub(x27, x19, x19, x19);
1255
1256  END();
1257
1258  RUN();
1259
1260  ASSERT_EQUAL_64(0, x0);
1261  ASSERT_EQUAL_64(1, x1);
1262  ASSERT_EQUAL_64(0xffffffff, x2);
1263  ASSERT_EQUAL_64(0xffffffff, x3);
1264  ASSERT_EQUAL_64(1, x4);
1265  ASSERT_EQUAL_64(0xfffffffe, x5);
1266  ASSERT_EQUAL_64(0xfffffffe, x6);
1267  ASSERT_EQUAL_64(1, x7);
1268  ASSERT_EQUAL_64(0, x8);
1269  ASSERT_EQUAL_64(0, x9);
1270  ASSERT_EQUAL_64(0xfffffffe, x10);
1271  ASSERT_EQUAL_64(0xfffffffe, x11);
1272
1273  ASSERT_EQUAL_64(0, x12);
1274  ASSERT_EQUAL_64(1, x13);
1275  ASSERT_EQUAL_64(0x00000000ffffffff, x14);
1276  ASSERT_EQUAL_64(0xffffffffffffffff, x15);
1277  ASSERT_EQUAL_64(1, x20);
1278  ASSERT_EQUAL_64(0x00000000fffffffe, x21);
1279  ASSERT_EQUAL_64(0xfffffffffffffffe, x22);
1280  ASSERT_EQUAL_64(0xffffffff00000001, x23);
1281  ASSERT_EQUAL_64(0, x24);
1282  ASSERT_EQUAL_64(0x0000000200000000, x25);
1283  ASSERT_EQUAL_64(0x00000001fffffffe, x26);
1284  ASSERT_EQUAL_64(0xfffffffffffffffe, x27);
1285
1286  TEARDOWN();
1287}
1288
1289
1290TEST(smulh) {
1291  SETUP();
1292
1293  START();
1294  __ Mov(x20, 0);
1295  __ Mov(x21, 1);
1296  __ Mov(x22, 0x0000000100000000);
1297  __ Mov(x23, 0x0000000012345678);
1298  __ Mov(x24, 0x0123456789abcdef);
1299  __ Mov(x25, 0x0000000200000000);
1300  __ Mov(x26, 0x8000000000000000);
1301  __ Mov(x27, 0xffffffffffffffff);
1302  __ Mov(x28, 0x5555555555555555);
1303  __ Mov(x29, 0xaaaaaaaaaaaaaaaa);
1304
1305  __ Smulh(x0, x20, x24);
1306  __ Smulh(x1, x21, x24);
1307  __ Smulh(x2, x22, x23);
1308  __ Smulh(x3, x22, x24);
1309  __ Smulh(x4, x24, x25);
1310  __ Smulh(x5, x23, x27);
1311  __ Smulh(x6, x26, x26);
1312  __ Smulh(x7, x26, x27);
1313  __ Smulh(x8, x27, x27);
1314  __ Smulh(x9, x28, x28);
1315  __ Smulh(x10, x28, x29);
1316  __ Smulh(x11, x29, x29);
1317  END();
1318
1319  RUN();
1320
1321  ASSERT_EQUAL_64(0, x0);
1322  ASSERT_EQUAL_64(0, x1);
1323  ASSERT_EQUAL_64(0, x2);
1324  ASSERT_EQUAL_64(0x0000000001234567, x3);
1325  ASSERT_EQUAL_64(0x0000000002468acf, x4);
1326  ASSERT_EQUAL_64(0xffffffffffffffff, x5);
1327  ASSERT_EQUAL_64(0x4000000000000000, x6);
1328  ASSERT_EQUAL_64(0, x7);
1329  ASSERT_EQUAL_64(0, x8);
1330  ASSERT_EQUAL_64(0x1c71c71c71c71c71, x9);
1331  ASSERT_EQUAL_64(0xe38e38e38e38e38e, x10);
1332  ASSERT_EQUAL_64(0x1c71c71c71c71c72, x11);
1333
1334  TEARDOWN();
1335}
1336
1337
1338TEST(umulh) {
1339  SETUP();
1340
1341  START();
1342  __ Mov(x20, 0);
1343  __ Mov(x21, 1);
1344  __ Mov(x22, 0x0000000100000000);
1345  __ Mov(x23, 0x0000000012345678);
1346  __ Mov(x24, 0x0123456789abcdef);
1347  __ Mov(x25, 0x0000000200000000);
1348  __ Mov(x26, 0x8000000000000000);
1349  __ Mov(x27, 0xffffffffffffffff);
1350  __ Mov(x28, 0x5555555555555555);
1351  __ Mov(x29, 0xaaaaaaaaaaaaaaaa);
1352
1353  __ Umulh(x0, x20, x24);
1354  __ Umulh(x1, x21, x24);
1355  __ Umulh(x2, x22, x23);
1356  __ Umulh(x3, x22, x24);
1357  __ Umulh(x4, x24, x25);
1358  __ Umulh(x5, x23, x27);
1359  __ Umulh(x6, x26, x26);
1360  __ Umulh(x7, x26, x27);
1361  __ Umulh(x8, x27, x27);
1362  __ Umulh(x9, x28, x28);
1363  __ Umulh(x10, x28, x29);
1364  __ Umulh(x11, x29, x29);
1365  END();
1366
1367  RUN();
1368
1369  ASSERT_EQUAL_64(0, x0);
1370  ASSERT_EQUAL_64(0, x1);
1371  ASSERT_EQUAL_64(0, x2);
1372  ASSERT_EQUAL_64(0x0000000001234567, x3);
1373  ASSERT_EQUAL_64(0x0000000002468acf, x4);
1374  ASSERT_EQUAL_64(0x0000000012345677, x5);
1375  ASSERT_EQUAL_64(0x4000000000000000, x6);
1376  ASSERT_EQUAL_64(0x7fffffffffffffff, x7);
1377  ASSERT_EQUAL_64(0xfffffffffffffffe, x8);
1378  ASSERT_EQUAL_64(0x1c71c71c71c71c71, x9);
1379  ASSERT_EQUAL_64(0x38e38e38e38e38e3, x10);
1380  ASSERT_EQUAL_64(0x71c71c71c71c71c6, x11);
1381
1382  TEARDOWN();
1383}
1384
1385
1386TEST(smaddl_umaddl_umull) {
1387  SETUP();
1388
1389  START();
1390  __ Mov(x17, 1);
1391  __ Mov(x18, 0x00000000ffffffff);
1392  __ Mov(x19, 0xffffffffffffffff);
1393  __ Mov(x20, 4);
1394  __ Mov(x21, 0x0000000200000000);
1395
1396  __ Smaddl(x9, w17, w18, x20);
1397  __ Smaddl(x10, w18, w18, x20);
1398  __ Smaddl(x11, w19, w19, x20);
1399  __ Smaddl(x12, w19, w19, x21);
1400  __ Umaddl(x13, w17, w18, x20);
1401  __ Umaddl(x14, w18, w18, x20);
1402  __ Umaddl(x15, w19, w19, x20);
1403  __ Umaddl(x22, w19, w19, x21);
1404  __ Umull(x24, w19, w19);
1405  __ Umull(x25, w17, w18);
1406  END();
1407
1408  RUN();
1409
1410  ASSERT_EQUAL_64(3, x9);
1411  ASSERT_EQUAL_64(5, x10);
1412  ASSERT_EQUAL_64(5, x11);
1413  ASSERT_EQUAL_64(0x0000000200000001, x12);
1414  ASSERT_EQUAL_64(0x0000000100000003, x13);
1415  ASSERT_EQUAL_64(0xfffffffe00000005, x14);
1416  ASSERT_EQUAL_64(0xfffffffe00000005, x15);
1417  ASSERT_EQUAL_64(1, x22);
1418  ASSERT_EQUAL_64(0xfffffffe00000001, x24);
1419  ASSERT_EQUAL_64(0x00000000ffffffff, x25);
1420
1421  TEARDOWN();
1422}
1423
1424
1425TEST(smsubl_umsubl) {
1426  SETUP();
1427
1428  START();
1429  __ Mov(x17, 1);
1430  __ Mov(x18, 0x00000000ffffffff);
1431  __ Mov(x19, 0xffffffffffffffff);
1432  __ Mov(x20, 4);
1433  __ Mov(x21, 0x0000000200000000);
1434
1435  __ Smsubl(x9, w17, w18, x20);
1436  __ Smsubl(x10, w18, w18, x20);
1437  __ Smsubl(x11, w19, w19, x20);
1438  __ Smsubl(x12, w19, w19, x21);
1439  __ Umsubl(x13, w17, w18, x20);
1440  __ Umsubl(x14, w18, w18, x20);
1441  __ Umsubl(x15, w19, w19, x20);
1442  __ Umsubl(x22, w19, w19, x21);
1443  END();
1444
1445  RUN();
1446
1447  ASSERT_EQUAL_64(5, x9);
1448  ASSERT_EQUAL_64(3, x10);
1449  ASSERT_EQUAL_64(3, x11);
1450  ASSERT_EQUAL_64(0x00000001ffffffff, x12);
1451  ASSERT_EQUAL_64(0xffffffff00000005, x13);
1452  ASSERT_EQUAL_64(0x0000000200000003, x14);
1453  ASSERT_EQUAL_64(0x0000000200000003, x15);
1454  ASSERT_EQUAL_64(0x00000003ffffffff, x22);
1455
1456  TEARDOWN();
1457}
1458
1459
1460TEST(div) {
1461  SETUP();
1462
1463  START();
1464  __ Mov(x16, 1);
1465  __ Mov(x17, 0xffffffff);
1466  __ Mov(x18, 0xffffffffffffffff);
1467  __ Mov(x19, 0x80000000);
1468  __ Mov(x20, 0x8000000000000000);
1469  __ Mov(x21, 2);
1470
1471  __ Udiv(w0, w16, w16);
1472  __ Udiv(w1, w17, w16);
1473  __ Sdiv(w2, w16, w16);
1474  __ Sdiv(w3, w16, w17);
1475  __ Sdiv(w4, w17, w18);
1476
1477  __ Udiv(x5, x16, x16);
1478  __ Udiv(x6, x17, x18);
1479  __ Sdiv(x7, x16, x16);
1480  __ Sdiv(x8, x16, x17);
1481  __ Sdiv(x9, x17, x18);
1482
1483  __ Udiv(w10, w19, w21);
1484  __ Sdiv(w11, w19, w21);
1485  __ Udiv(x12, x19, x21);
1486  __ Sdiv(x13, x19, x21);
1487  __ Udiv(x14, x20, x21);
1488  __ Sdiv(x15, x20, x21);
1489
1490  __ Udiv(w22, w19, w17);
1491  __ Sdiv(w23, w19, w17);
1492  __ Udiv(x24, x20, x18);
1493  __ Sdiv(x25, x20, x18);
1494
1495  __ Udiv(x26, x16, x21);
1496  __ Sdiv(x27, x16, x21);
1497  __ Udiv(x28, x18, x21);
1498  __ Sdiv(x29, x18, x21);
1499
1500  __ Mov(x17, 0);
1501  __ Udiv(w18, w16, w17);
1502  __ Sdiv(w19, w16, w17);
1503  __ Udiv(x20, x16, x17);
1504  __ Sdiv(x21, x16, x17);
1505  END();
1506
1507  RUN();
1508
1509  ASSERT_EQUAL_64(1, x0);
1510  ASSERT_EQUAL_64(0xffffffff, x1);
1511  ASSERT_EQUAL_64(1, x2);
1512  ASSERT_EQUAL_64(0xffffffff, x3);
1513  ASSERT_EQUAL_64(1, x4);
1514  ASSERT_EQUAL_64(1, x5);
1515  ASSERT_EQUAL_64(0, x6);
1516  ASSERT_EQUAL_64(1, x7);
1517  ASSERT_EQUAL_64(0, x8);
1518  ASSERT_EQUAL_64(0xffffffff00000001, x9);
1519  ASSERT_EQUAL_64(0x40000000, x10);
1520  ASSERT_EQUAL_64(0xc0000000, x11);
1521  ASSERT_EQUAL_64(0x0000000040000000, x12);
1522  ASSERT_EQUAL_64(0x0000000040000000, x13);
1523  ASSERT_EQUAL_64(0x4000000000000000, x14);
1524  ASSERT_EQUAL_64(0xc000000000000000, x15);
1525  ASSERT_EQUAL_64(0, x22);
1526  ASSERT_EQUAL_64(0x80000000, x23);
1527  ASSERT_EQUAL_64(0, x24);
1528  ASSERT_EQUAL_64(0x8000000000000000, x25);
1529  ASSERT_EQUAL_64(0, x26);
1530  ASSERT_EQUAL_64(0, x27);
1531  ASSERT_EQUAL_64(0x7fffffffffffffff, x28);
1532  ASSERT_EQUAL_64(0, x29);
1533  ASSERT_EQUAL_64(0, x18);
1534  ASSERT_EQUAL_64(0, x19);
1535  ASSERT_EQUAL_64(0, x20);
1536  ASSERT_EQUAL_64(0, x21);
1537
1538  TEARDOWN();
1539}
1540
1541
1542TEST(rbit_rev) {
1543  SETUP();
1544
1545  START();
1546  __ Mov(x24, 0xfedcba9876543210);
1547  __ Rbit(w0, w24);
1548  __ Rbit(x1, x24);
1549  __ Rev16(w2, w24);
1550  __ Rev16(x3, x24);
1551  __ Rev(w4, w24);
1552  __ Rev32(x5, x24);
1553  __ Rev(x6, x24);
1554  END();
1555
1556  RUN();
1557
1558  ASSERT_EQUAL_64(0x084c2a6e, x0);
1559  ASSERT_EQUAL_64(0x084c2a6e195d3b7f, x1);
1560  ASSERT_EQUAL_64(0x54761032, x2);
1561  ASSERT_EQUAL_64(0xdcfe98ba54761032, x3);
1562  ASSERT_EQUAL_64(0x10325476, x4);
1563  ASSERT_EQUAL_64(0x98badcfe10325476, x5);
1564  ASSERT_EQUAL_64(0x1032547698badcfe, x6);
1565
1566  TEARDOWN();
1567}
1568
1569
1570TEST(clz_cls) {
1571  SETUP();
1572
1573  START();
1574  __ Mov(x24, 0x0008000000800000);
1575  __ Mov(x25, 0xff800000fff80000);
1576  __ Mov(x26, 0);
1577  __ Clz(w0, w24);
1578  __ Clz(x1, x24);
1579  __ Clz(w2, w25);
1580  __ Clz(x3, x25);
1581  __ Clz(w4, w26);
1582  __ Clz(x5, x26);
1583  __ Cls(w6, w24);
1584  __ Cls(x7, x24);
1585  __ Cls(w8, w25);
1586  __ Cls(x9, x25);
1587  __ Cls(w10, w26);
1588  __ Cls(x11, x26);
1589  END();
1590
1591  RUN();
1592
1593  ASSERT_EQUAL_64(8, x0);
1594  ASSERT_EQUAL_64(12, x1);
1595  ASSERT_EQUAL_64(0, x2);
1596  ASSERT_EQUAL_64(0, x3);
1597  ASSERT_EQUAL_64(32, x4);
1598  ASSERT_EQUAL_64(64, x5);
1599  ASSERT_EQUAL_64(7, x6);
1600  ASSERT_EQUAL_64(11, x7);
1601  ASSERT_EQUAL_64(12, x8);
1602  ASSERT_EQUAL_64(8, x9);
1603  ASSERT_EQUAL_64(31, x10);
1604  ASSERT_EQUAL_64(63, x11);
1605
1606  TEARDOWN();
1607}
1608
1609
1610TEST(label) {
1611  SETUP();
1612
1613  Label label_1, label_2, label_3, label_4;
1614
1615  START();
1616  __ Mov(x0, 0x1);
1617  __ Mov(x1, 0x0);
1618  __ Mov(x22, lr);    // Save lr.
1619
1620  __ B(&label_1);
1621  __ B(&label_1);
1622  __ B(&label_1);     // Multiple branches to the same label.
1623  __ Mov(x0, 0x0);
1624  __ Bind(&label_2);
1625  __ B(&label_3);     // Forward branch.
1626  __ Mov(x0, 0x0);
1627  __ Bind(&label_1);
1628  __ B(&label_2);     // Backward branch.
1629  __ Mov(x0, 0x0);
1630  __ Bind(&label_3);
1631  __ Bl(&label_4);
1632  END();
1633
1634  __ Bind(&label_4);
1635  __ Mov(x1, 0x1);
1636  __ Mov(lr, x22);
1637  END();
1638
1639  RUN();
1640
1641  ASSERT_EQUAL_64(0x1, x0);
1642  ASSERT_EQUAL_64(0x1, x1);
1643
1644  TEARDOWN();
1645}
1646
1647
1648TEST(label_2) {
1649  SETUP();
1650
1651  Label label_1, label_2, label_3;
1652  Label first_jump_to_3;
1653
1654  START();
1655  __ Mov(x0, 0x0);
1656
1657  __ B(&label_1);
1658  ptrdiff_t offset_2 = masm.CursorOffset();
1659  __ Orr(x0, x0, 1 << 1);
1660  __ B(&label_3);
1661  ptrdiff_t offset_1 = masm.CursorOffset();
1662  __ Orr(x0, x0, 1 << 0);
1663  __ B(&label_2);
1664  ptrdiff_t offset_3 = masm.CursorOffset();
1665  __ Tbz(x0, 2, &first_jump_to_3);
1666  __ Orr(x0, x0, 1 << 3);
1667  __ Bind(&first_jump_to_3);
1668  __ Orr(x0, x0, 1 << 2);
1669  __ Tbz(x0, 3, &label_3);
1670
1671  // Labels 1, 2, and 3 are bound before the current buffer offset. Branches to
1672  // label_1 and label_2 branch respectively forward and backward. Branches to
1673  // label 3 include both forward and backward branches.
1674  masm.BindToOffset(&label_1, offset_1);
1675  masm.BindToOffset(&label_2, offset_2);
1676  masm.BindToOffset(&label_3, offset_3);
1677
1678  END();
1679
1680  RUN();
1681
1682  ASSERT_EQUAL_64(0xf, x0);
1683
1684  TEARDOWN();
1685}
1686
1687
1688TEST(adr) {
1689  SETUP();
1690
1691  Label label_1, label_2, label_3, label_4;
1692
1693  START();
1694  __ Mov(x0, 0x0);        // Set to non-zero to indicate failure.
1695  __ Adr(x1, &label_3);   // Set to zero to indicate success.
1696
1697  __ Adr(x2, &label_1);   // Multiple forward references to the same label.
1698  __ Adr(x3, &label_1);
1699  __ Adr(x4, &label_1);
1700
1701  __ Bind(&label_2);
1702  __ Eor(x5, x2, Operand(x3));  // Ensure that x2,x3 and x4 are identical.
1703  __ Eor(x6, x2, Operand(x4));
1704  __ Orr(x0, x0, Operand(x5));
1705  __ Orr(x0, x0, Operand(x6));
1706  __ Br(x2);  // label_1, label_3
1707
1708  __ Bind(&label_3);
1709  __ Adr(x2, &label_3);   // Self-reference (offset 0).
1710  __ Eor(x1, x1, Operand(x2));
1711  __ Adr(x2, &label_4);   // Simple forward reference.
1712  __ Br(x2);  // label_4
1713
1714  __ Bind(&label_1);
1715  __ Adr(x2, &label_3);   // Multiple reverse references to the same label.
1716  __ Adr(x3, &label_3);
1717  __ Adr(x4, &label_3);
1718  __ Adr(x5, &label_2);   // Simple reverse reference.
1719  __ Br(x5);  // label_2
1720
1721  __ Bind(&label_4);
1722  END();
1723
1724  RUN();
1725
1726  ASSERT_EQUAL_64(0x0, x0);
1727  ASSERT_EQUAL_64(0x0, x1);
1728
1729  TEARDOWN();
1730}
1731
1732
1733// Simple adrp tests: check that labels are linked and handled properly.
1734// This is similar to the adr test, but all the adrp instructions are put on the
1735// same page so that they return the same value.
1736TEST(adrp) {
1737  Label start;
1738  Label label_1, label_2, label_3;
1739
1740  SETUP_CUSTOM(2 * kPageSize, PageOffsetDependentCode);
1741  START();
1742
1743  // Waste space until the start of a page.
1744  {
1745    InstructionAccurateScope scope(&masm,
1746                                   kPageSize / kInstructionSize,
1747                                   InstructionAccurateScope::kMaximumSize);
1748    const uintptr_t kPageOffsetMask = kPageSize - 1;
1749    while ((masm.GetCursorAddress<uintptr_t>() & kPageOffsetMask) != 0) {
1750      __ b(&start);
1751    }
1752    __ bind(&start);
1753  }
1754
1755  // Simple forward reference.
1756  __ Adrp(x0, &label_2);
1757
1758  __ Bind(&label_1);
1759
1760  // Multiple forward references to the same label.
1761  __ Adrp(x1, &label_3);
1762  __ Adrp(x2, &label_3);
1763  __ Adrp(x3, &label_3);
1764
1765  __ Bind(&label_2);
1766
1767  // Self-reference (offset 0).
1768  __ Adrp(x4, &label_2);
1769
1770  __ Bind(&label_3);
1771
1772  // Simple reverse reference.
1773  __ Adrp(x5, &label_1);
1774
1775  // Multiple reverse references to the same label.
1776  __ Adrp(x6, &label_2);
1777  __ Adrp(x7, &label_2);
1778  __ Adrp(x8, &label_2);
1779
1780  VIXL_ASSERT(masm.SizeOfCodeGeneratedSince(&start) < kPageSize);
1781  END();
1782  RUN();
1783
1784  uint64_t expected = reinterpret_cast<uint64_t>(
1785      AlignDown(masm.GetLabelAddress<uint64_t*>(&start), kPageSize));
1786  ASSERT_EQUAL_64(expected, x0);
1787  ASSERT_EQUAL_64(expected, x1);
1788  ASSERT_EQUAL_64(expected, x2);
1789  ASSERT_EQUAL_64(expected, x3);
1790  ASSERT_EQUAL_64(expected, x4);
1791  ASSERT_EQUAL_64(expected, x5);
1792  ASSERT_EQUAL_64(expected, x6);
1793  ASSERT_EQUAL_64(expected, x7);
1794  ASSERT_EQUAL_64(expected, x8);
1795
1796  TEARDOWN_CUSTOM();
1797}
1798
1799
1800static void AdrpPageBoundaryHelper(unsigned offset_into_page) {
1801  VIXL_ASSERT(offset_into_page < kPageSize);
1802  VIXL_ASSERT((offset_into_page % kInstructionSize) == 0);
1803
1804  const uintptr_t kPageOffsetMask = kPageSize - 1;
1805
1806  // The test label is always bound on page 0. Adrp instructions are generated
1807  // on pages from kStartPage to kEndPage (inclusive).
1808  const int kStartPage = -16;
1809  const int kEndPage = 16;
1810  const int kMaxCodeSize = (kEndPage - kStartPage + 2) * kPageSize;
1811
1812  SETUP_CUSTOM(kMaxCodeSize, PageOffsetDependentCode);
1813  START();
1814
1815  Label test;
1816  Label start;
1817
1818  {
1819    InstructionAccurateScope scope(&masm,
1820                                   kMaxCodeSize / kInstructionSize,
1821                                   InstructionAccurateScope::kMaximumSize);
1822    // Initialize NZCV with `eq` flags.
1823    __ cmp(wzr, wzr);
1824    // Waste space until the start of a page.
1825    while ((masm.GetCursorAddress<uintptr_t>() & kPageOffsetMask) != 0) {
1826      __ b(&start);
1827    }
1828
1829    // The first page.
1830    VIXL_STATIC_ASSERT(kStartPage < 0);
1831    {
1832      InstructionAccurateScope scope_page(&masm, kPageSize / kInstructionSize);
1833      __ bind(&start);
1834      __ adrp(x0, &test);
1835      __ adrp(x1, &test);
1836      for (size_t i = 2; i < (kPageSize / kInstructionSize); i += 2) {
1837        __ ccmp(x0, x1, NoFlag, eq);
1838        __ adrp(x1, &test);
1839      }
1840    }
1841
1842    // Subsequent pages.
1843    VIXL_STATIC_ASSERT(kEndPage >= 0);
1844    for (int page = (kStartPage + 1); page <= kEndPage; page++) {
1845      InstructionAccurateScope scope_page(&masm, kPageSize / kInstructionSize);
1846      if (page == 0) {
1847        for (size_t i = 0; i < (kPageSize / kInstructionSize);) {
1848          if (i++ == (offset_into_page / kInstructionSize)) __ bind(&test);
1849          __ ccmp(x0, x1, NoFlag, eq);
1850          if (i++ == (offset_into_page / kInstructionSize)) __ bind(&test);
1851          __ adrp(x1, &test);
1852        }
1853      } else {
1854        for (size_t i = 0; i < (kPageSize / kInstructionSize); i += 2) {
1855          __ ccmp(x0, x1, NoFlag, eq);
1856          __ adrp(x1, &test);
1857        }
1858      }
1859    }
1860  }
1861
1862  // Every adrp instruction pointed to the same label (`test`), so they should
1863  // all have produced the same result.
1864
1865  END();
1866  RUN();
1867
1868  uintptr_t expected =
1869      AlignDown(masm.GetLabelAddress<uintptr_t>(&test), kPageSize);
1870  ASSERT_EQUAL_64(expected, x0);
1871  ASSERT_EQUAL_64(expected, x1);
1872  ASSERT_EQUAL_NZCV(ZCFlag);
1873
1874  TEARDOWN_CUSTOM();
1875}
1876
1877
1878// Test that labels are correctly referenced by adrp across page boundaries.
1879TEST(adrp_page_boundaries) {
1880  VIXL_STATIC_ASSERT(kPageSize == 4096);
1881  AdrpPageBoundaryHelper(kInstructionSize * 0);
1882  AdrpPageBoundaryHelper(kInstructionSize * 1);
1883  AdrpPageBoundaryHelper(kInstructionSize * 512);
1884  AdrpPageBoundaryHelper(kInstructionSize * 1022);
1885  AdrpPageBoundaryHelper(kInstructionSize * 1023);
1886}
1887
1888
1889static void AdrpOffsetHelper(int64_t offset) {
1890  const size_t kPageOffsetMask = kPageSize - 1;
1891  const int kMaxCodeSize = 2 * kPageSize;
1892
1893  SETUP_CUSTOM(kMaxCodeSize, PageOffsetDependentCode);
1894  START();
1895
1896  Label page;
1897
1898  {
1899    InstructionAccurateScope scope(&masm,
1900                                   kMaxCodeSize / kInstructionSize,
1901                                   InstructionAccurateScope::kMaximumSize);
1902    // Initialize NZCV with `eq` flags.
1903    __ cmp(wzr, wzr);
1904    // Waste space until the start of a page.
1905    while ((masm.GetCursorAddress<uintptr_t>() & kPageOffsetMask) != 0) {
1906      __ b(&page);
1907    }
1908    __ bind(&page);
1909
1910    {
1911      int imm21 = static_cast<int>(offset);
1912      InstructionAccurateScope scope_page(&masm, kPageSize / kInstructionSize);
1913      // Every adrp instruction on this page should return the same value.
1914      __ adrp(x0, imm21);
1915      __ adrp(x1, imm21);
1916      for (size_t i = 2; i < kPageSize / kInstructionSize; i += 2) {
1917        __ ccmp(x0, x1, NoFlag, eq);
1918        __ adrp(x1, imm21);
1919      }
1920    }
1921  }
1922
1923  END();
1924  RUN();
1925
1926  uintptr_t expected =
1927      masm.GetLabelAddress<uintptr_t>(&page) + (kPageSize * offset);
1928  ASSERT_EQUAL_64(expected, x0);
1929  ASSERT_EQUAL_64(expected, x1);
1930  ASSERT_EQUAL_NZCV(ZCFlag);
1931
1932  TEARDOWN_CUSTOM();
1933}
1934
1935
1936// Check that adrp produces the correct result for a specific offset.
1937TEST(adrp_offset) {
1938  AdrpOffsetHelper(0);
1939  AdrpOffsetHelper(1);
1940  AdrpOffsetHelper(-1);
1941  AdrpOffsetHelper(4);
1942  AdrpOffsetHelper(-4);
1943  AdrpOffsetHelper(0x000fffff);
1944  AdrpOffsetHelper(-0x000fffff);
1945  AdrpOffsetHelper(-0x00100000);
1946}
1947
1948
1949TEST(branch_cond) {
1950  SETUP();
1951  ALLOW_ASM();
1952
1953  Label done, wrong;
1954
1955  START();
1956  __ Mov(x0, 0x1);
1957  __ Mov(x1, 0x1);
1958  __ Mov(x2, 0x8000000000000000);
1959
1960  // For each 'cmp' instruction below, condition codes other than the ones
1961  // following it would branch.
1962
1963  __ Cmp(x1, 0);
1964  __ B(&wrong, eq);
1965  __ B(&wrong, lo);
1966  __ B(&wrong, mi);
1967  __ B(&wrong, vs);
1968  __ B(&wrong, ls);
1969  __ B(&wrong, lt);
1970  __ B(&wrong, le);
1971  Label ok_1;
1972  __ B(&ok_1, ne);
1973  __ Mov(x0, 0x0);
1974  __ Bind(&ok_1);
1975
1976  __ Cmp(x1, 1);
1977  __ B(&wrong, ne);
1978  __ B(&wrong, lo);
1979  __ B(&wrong, mi);
1980  __ B(&wrong, vs);
1981  __ B(&wrong, hi);
1982  __ B(&wrong, lt);
1983  __ B(&wrong, gt);
1984  Label ok_2;
1985  __ B(&ok_2, pl);
1986  __ Mov(x0, 0x0);
1987  __ Bind(&ok_2);
1988
1989  __ Cmp(x1, 2);
1990  __ B(&wrong, eq);
1991  __ B(&wrong, hs);
1992  __ B(&wrong, pl);
1993  __ B(&wrong, vs);
1994  __ B(&wrong, hi);
1995  __ B(&wrong, ge);
1996  __ B(&wrong, gt);
1997  Label ok_3;
1998  __ B(&ok_3, vc);
1999  __ Mov(x0, 0x0);
2000  __ Bind(&ok_3);
2001
2002  __ Cmp(x2, 1);
2003  __ B(&wrong, eq);
2004  __ B(&wrong, lo);
2005  __ B(&wrong, mi);
2006  __ B(&wrong, vc);
2007  __ B(&wrong, ls);
2008  __ B(&wrong, ge);
2009  __ B(&wrong, gt);
2010  Label ok_4;
2011  __ B(&ok_4, le);
2012  __ Mov(x0, 0x0);
2013  __ Bind(&ok_4);
2014
2015  // The MacroAssembler does not allow al as a branch condition.
2016  Label ok_5;
2017  __ b(&ok_5, al);
2018  __ Mov(x0, 0x0);
2019  __ Bind(&ok_5);
2020
2021  // The MacroAssembler does not allow nv as a branch condition.
2022  Label ok_6;
2023  __ b(&ok_6, nv);
2024  __ Mov(x0, 0x0);
2025  __ Bind(&ok_6);
2026
2027  __ B(&done);
2028
2029  __ Bind(&wrong);
2030  __ Mov(x0, 0x0);
2031
2032  __ Bind(&done);
2033  END();
2034
2035  RUN();
2036
2037  ASSERT_EQUAL_64(0x1, x0);
2038
2039  TEARDOWN();
2040}
2041
2042
2043TEST(branch_to_reg) {
2044  SETUP();
2045
2046  // Test br.
2047  Label fn1, after_fn1;
2048
2049  START();
2050  __ Mov(x29, lr);
2051
2052  __ Mov(x1, 0);
2053  __ B(&after_fn1);
2054
2055  __ Bind(&fn1);
2056  __ Mov(x0, lr);
2057  __ Mov(x1, 42);
2058  __ Br(x0);
2059
2060  __ Bind(&after_fn1);
2061  __ Bl(&fn1);
2062
2063  // Test blr.
2064  Label fn2, after_fn2;
2065
2066  __ Mov(x2, 0);
2067  __ B(&after_fn2);
2068
2069  __ Bind(&fn2);
2070  __ Mov(x0, lr);
2071  __ Mov(x2, 84);
2072  __ Blr(x0);
2073
2074  __ Bind(&after_fn2);
2075  __ Bl(&fn2);
2076  __ Mov(x3, lr);
2077
2078  __ Mov(lr, x29);
2079  END();
2080
2081  RUN();
2082
2083  ASSERT_EQUAL_64(core.xreg(3) + kInstructionSize, x0);
2084  ASSERT_EQUAL_64(42, x1);
2085  ASSERT_EQUAL_64(84, x2);
2086
2087  TEARDOWN();
2088}
2089
2090
2091TEST(compare_branch) {
2092  SETUP();
2093
2094  START();
2095  __ Mov(x0, 0);
2096  __ Mov(x1, 0);
2097  __ Mov(x2, 0);
2098  __ Mov(x3, 0);
2099  __ Mov(x4, 0);
2100  __ Mov(x5, 0);
2101  __ Mov(x16, 0);
2102  __ Mov(x17, 42);
2103
2104  Label zt, zt_end;
2105  __ Cbz(w16, &zt);
2106  __ B(&zt_end);
2107  __ Bind(&zt);
2108  __ Mov(x0, 1);
2109  __ Bind(&zt_end);
2110
2111  Label zf, zf_end;
2112  __ Cbz(x17, &zf);
2113  __ B(&zf_end);
2114  __ Bind(&zf);
2115  __ Mov(x1, 1);
2116  __ Bind(&zf_end);
2117
2118  Label nzt, nzt_end;
2119  __ Cbnz(w17, &nzt);
2120  __ B(&nzt_end);
2121  __ Bind(&nzt);
2122  __ Mov(x2, 1);
2123  __ Bind(&nzt_end);
2124
2125  Label nzf, nzf_end;
2126  __ Cbnz(x16, &nzf);
2127  __ B(&nzf_end);
2128  __ Bind(&nzf);
2129  __ Mov(x3, 1);
2130  __ Bind(&nzf_end);
2131
2132  __ Mov(x18, 0xffffffff00000000);
2133
2134  Label a, a_end;
2135  __ Cbz(w18, &a);
2136  __ B(&a_end);
2137  __ Bind(&a);
2138  __ Mov(x4, 1);
2139  __ Bind(&a_end);
2140
2141  Label b, b_end;
2142  __ Cbnz(w18, &b);
2143  __ B(&b_end);
2144  __ Bind(&b);
2145  __ Mov(x5, 1);
2146  __ Bind(&b_end);
2147
2148  END();
2149
2150  RUN();
2151
2152  ASSERT_EQUAL_64(1, x0);
2153  ASSERT_EQUAL_64(0, x1);
2154  ASSERT_EQUAL_64(1, x2);
2155  ASSERT_EQUAL_64(0, x3);
2156  ASSERT_EQUAL_64(1, x4);
2157  ASSERT_EQUAL_64(0, x5);
2158
2159  TEARDOWN();
2160}
2161
2162
2163TEST(test_branch) {
2164  SETUP();
2165
2166  START();
2167  __ Mov(x0, 0);
2168  __ Mov(x1, 0);
2169  __ Mov(x2, 0);
2170  __ Mov(x3, 0);
2171  __ Mov(x16, 0xaaaaaaaaaaaaaaaa);
2172
2173  Label bz, bz_end;
2174  __ Tbz(w16, 0, &bz);
2175  __ B(&bz_end);
2176  __ Bind(&bz);
2177  __ Mov(x0, 1);
2178  __ Bind(&bz_end);
2179
2180  Label bo, bo_end;
2181  __ Tbz(x16, 63, &bo);
2182  __ B(&bo_end);
2183  __ Bind(&bo);
2184  __ Mov(x1, 1);
2185  __ Bind(&bo_end);
2186
2187  Label nbz, nbz_end;
2188  __ Tbnz(x16, 61, &nbz);
2189  __ B(&nbz_end);
2190  __ Bind(&nbz);
2191  __ Mov(x2, 1);
2192  __ Bind(&nbz_end);
2193
2194  Label nbo, nbo_end;
2195  __ Tbnz(w16, 2, &nbo);
2196  __ B(&nbo_end);
2197  __ Bind(&nbo);
2198  __ Mov(x3, 1);
2199  __ Bind(&nbo_end);
2200  END();
2201
2202  RUN();
2203
2204  ASSERT_EQUAL_64(1, x0);
2205  ASSERT_EQUAL_64(0, x1);
2206  ASSERT_EQUAL_64(1, x2);
2207  ASSERT_EQUAL_64(0, x3);
2208
2209  TEARDOWN();
2210}
2211
2212
2213TEST(branch_type) {
2214  SETUP();
2215
2216  Label fail, done;
2217
2218  START();
2219  __ Mov(x0, 0x0);
2220  __ Mov(x10, 0x7);
2221  __ Mov(x11, 0x0);
2222
2223  // Test non taken branches.
2224  __ Cmp(x10, 0x7);
2225  __ B(&fail, ne);
2226  __ B(&fail, never);
2227  __ B(&fail, reg_zero, x10);
2228  __ B(&fail, reg_not_zero, x11);
2229  __ B(&fail, reg_bit_clear, x10, 0);
2230  __ B(&fail, reg_bit_set, x10, 3);
2231
2232  // Test taken branches.
2233  Label l1, l2, l3, l4, l5;
2234  __ Cmp(x10, 0x7);
2235  __ B(&l1, eq);
2236  __ B(&fail);
2237  __ Bind(&l1);
2238  __ B(&l2, always);
2239  __ B(&fail);
2240  __ Bind(&l2);
2241  __ B(&l3, reg_not_zero, x10);
2242  __ B(&fail);
2243  __ Bind(&l3);
2244  __ B(&l4, reg_bit_clear, x10, 15);
2245  __ B(&fail);
2246  __ Bind(&l4);
2247  __ B(&l5, reg_bit_set, x10, 1);
2248  __ B(&fail);
2249  __ Bind(&l5);
2250
2251  __ B(&done);
2252
2253  __ Bind(&fail);
2254  __ Mov(x0, 0x1);
2255
2256  __ Bind(&done);
2257
2258  END();
2259
2260  RUN();
2261
2262  ASSERT_EQUAL_64(0x0, x0);
2263
2264  TEARDOWN();
2265}
2266
2267
2268TEST(ldr_str_offset) {
2269  SETUP();
2270
2271  uint64_t src[2] = {0xfedcba9876543210, 0x0123456789abcdef};
2272  uint64_t dst[5] = {0, 0, 0, 0, 0};
2273  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2274  uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
2275
2276  START();
2277  __ Mov(x17, src_base);
2278  __ Mov(x18, dst_base);
2279  __ Ldr(w0, MemOperand(x17));
2280  __ Str(w0, MemOperand(x18));
2281  __ Ldr(w1, MemOperand(x17, 4));
2282  __ Str(w1, MemOperand(x18, 12));
2283  __ Ldr(x2, MemOperand(x17, 8));
2284  __ Str(x2, MemOperand(x18, 16));
2285  __ Ldrb(w3, MemOperand(x17, 1));
2286  __ Strb(w3, MemOperand(x18, 25));
2287  __ Ldrh(w4, MemOperand(x17, 2));
2288  __ Strh(w4, MemOperand(x18, 33));
2289  END();
2290
2291  RUN();
2292
2293  ASSERT_EQUAL_64(0x76543210, x0);
2294  ASSERT_EQUAL_64(0x76543210, dst[0]);
2295  ASSERT_EQUAL_64(0xfedcba98, x1);
2296  ASSERT_EQUAL_64(0xfedcba9800000000, dst[1]);
2297  ASSERT_EQUAL_64(0x0123456789abcdef, x2);
2298  ASSERT_EQUAL_64(0x0123456789abcdef, dst[2]);
2299  ASSERT_EQUAL_64(0x32, x3);
2300  ASSERT_EQUAL_64(0x3200, dst[3]);
2301  ASSERT_EQUAL_64(0x7654, x4);
2302  ASSERT_EQUAL_64(0x765400, dst[4]);
2303  ASSERT_EQUAL_64(src_base, x17);
2304  ASSERT_EQUAL_64(dst_base, x18);
2305
2306  TEARDOWN();
2307}
2308
2309
2310TEST(ldr_str_wide) {
2311  SETUP();
2312
2313  uint32_t src[8192];
2314  uint32_t dst[8192];
2315  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2316  uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
2317  memset(src, 0xaa, 8192 * sizeof(src[0]));
2318  memset(dst, 0xaa, 8192 * sizeof(dst[0]));
2319  src[0] = 0;
2320  src[6144] = 6144;
2321  src[8191] = 8191;
2322
2323  START();
2324  __ Mov(x22, src_base);
2325  __ Mov(x23, dst_base);
2326  __ Mov(x24, src_base);
2327  __ Mov(x25, dst_base);
2328  __ Mov(x26, src_base);
2329  __ Mov(x27, dst_base);
2330
2331  __ Ldr(w0, MemOperand(x22, 8191 * sizeof(src[0])));
2332  __ Str(w0, MemOperand(x23, 8191 * sizeof(dst[0])));
2333  __ Ldr(w1, MemOperand(x24, 4096 * sizeof(src[0]), PostIndex));
2334  __ Str(w1, MemOperand(x25, 4096 * sizeof(dst[0]), PostIndex));
2335  __ Ldr(w2, MemOperand(x26, 6144 * sizeof(src[0]), PreIndex));
2336  __ Str(w2, MemOperand(x27, 6144 * sizeof(dst[0]), PreIndex));
2337  END();
2338
2339  RUN();
2340
2341  ASSERT_EQUAL_32(8191, w0);
2342  ASSERT_EQUAL_32(8191, dst[8191]);
2343  ASSERT_EQUAL_64(src_base, x22);
2344  ASSERT_EQUAL_64(dst_base, x23);
2345  ASSERT_EQUAL_32(0, w1);
2346  ASSERT_EQUAL_32(0, dst[0]);
2347  ASSERT_EQUAL_64(src_base + 4096 * sizeof(src[0]), x24);
2348  ASSERT_EQUAL_64(dst_base + 4096 * sizeof(dst[0]), x25);
2349  ASSERT_EQUAL_32(6144, w2);
2350  ASSERT_EQUAL_32(6144, dst[6144]);
2351  ASSERT_EQUAL_64(src_base + 6144 * sizeof(src[0]), x26);
2352  ASSERT_EQUAL_64(dst_base + 6144 * sizeof(dst[0]), x27);
2353
2354  TEARDOWN();
2355}
2356
2357
2358TEST(ldr_str_preindex) {
2359  SETUP();
2360
2361  uint64_t src[2] = {0xfedcba9876543210, 0x0123456789abcdef};
2362  uint64_t dst[6] = {0, 0, 0, 0, 0, 0};
2363  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2364  uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
2365
2366  START();
2367  __ Mov(x17, src_base);
2368  __ Mov(x18, dst_base);
2369  __ Mov(x19, src_base);
2370  __ Mov(x20, dst_base);
2371  __ Mov(x21, src_base + 16);
2372  __ Mov(x22, dst_base + 40);
2373  __ Mov(x23, src_base);
2374  __ Mov(x24, dst_base);
2375  __ Mov(x25, src_base);
2376  __ Mov(x26, dst_base);
2377  __ Ldr(w0, MemOperand(x17, 4, PreIndex));
2378  __ Str(w0, MemOperand(x18, 12, PreIndex));
2379  __ Ldr(x1, MemOperand(x19, 8, PreIndex));
2380  __ Str(x1, MemOperand(x20, 16, PreIndex));
2381  __ Ldr(w2, MemOperand(x21, -4, PreIndex));
2382  __ Str(w2, MemOperand(x22, -4, PreIndex));
2383  __ Ldrb(w3, MemOperand(x23, 1, PreIndex));
2384  __ Strb(w3, MemOperand(x24, 25, PreIndex));
2385  __ Ldrh(w4, MemOperand(x25, 3, PreIndex));
2386  __ Strh(w4, MemOperand(x26, 41, PreIndex));
2387  END();
2388
2389  RUN();
2390
2391  ASSERT_EQUAL_64(0xfedcba98, x0);
2392  ASSERT_EQUAL_64(0xfedcba9800000000, dst[1]);
2393  ASSERT_EQUAL_64(0x0123456789abcdef, x1);
2394  ASSERT_EQUAL_64(0x0123456789abcdef, dst[2]);
2395  ASSERT_EQUAL_64(0x01234567, x2);
2396  ASSERT_EQUAL_64(0x0123456700000000, dst[4]);
2397  ASSERT_EQUAL_64(0x32, x3);
2398  ASSERT_EQUAL_64(0x3200, dst[3]);
2399  ASSERT_EQUAL_64(0x9876, x4);
2400  ASSERT_EQUAL_64(0x987600, dst[5]);
2401  ASSERT_EQUAL_64(src_base + 4, x17);
2402  ASSERT_EQUAL_64(dst_base + 12, x18);
2403  ASSERT_EQUAL_64(src_base + 8, x19);
2404  ASSERT_EQUAL_64(dst_base + 16, x20);
2405  ASSERT_EQUAL_64(src_base + 12, x21);
2406  ASSERT_EQUAL_64(dst_base + 36, x22);
2407  ASSERT_EQUAL_64(src_base + 1, x23);
2408  ASSERT_EQUAL_64(dst_base + 25, x24);
2409  ASSERT_EQUAL_64(src_base + 3, x25);
2410  ASSERT_EQUAL_64(dst_base + 41, x26);
2411
2412  TEARDOWN();
2413}
2414
2415
2416TEST(ldr_str_postindex) {
2417  SETUP();
2418
2419  uint64_t src[2] = {0xfedcba9876543210, 0x0123456789abcdef};
2420  uint64_t dst[6] = {0, 0, 0, 0, 0, 0};
2421  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2422  uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
2423
2424  START();
2425  __ Mov(x17, src_base + 4);
2426  __ Mov(x18, dst_base + 12);
2427  __ Mov(x19, src_base + 8);
2428  __ Mov(x20, dst_base + 16);
2429  __ Mov(x21, src_base + 8);
2430  __ Mov(x22, dst_base + 32);
2431  __ Mov(x23, src_base + 1);
2432  __ Mov(x24, dst_base + 25);
2433  __ Mov(x25, src_base + 3);
2434  __ Mov(x26, dst_base + 41);
2435  __ Ldr(w0, MemOperand(x17, 4, PostIndex));
2436  __ Str(w0, MemOperand(x18, 12, PostIndex));
2437  __ Ldr(x1, MemOperand(x19, 8, PostIndex));
2438  __ Str(x1, MemOperand(x20, 16, PostIndex));
2439  __ Ldr(x2, MemOperand(x21, -8, PostIndex));
2440  __ Str(x2, MemOperand(x22, -32, PostIndex));
2441  __ Ldrb(w3, MemOperand(x23, 1, PostIndex));
2442  __ Strb(w3, MemOperand(x24, 5, PostIndex));
2443  __ Ldrh(w4, MemOperand(x25, -3, PostIndex));
2444  __ Strh(w4, MemOperand(x26, -41, PostIndex));
2445  END();
2446
2447  RUN();
2448
2449  ASSERT_EQUAL_64(0xfedcba98, x0);
2450  ASSERT_EQUAL_64(0xfedcba9800000000, dst[1]);
2451  ASSERT_EQUAL_64(0x0123456789abcdef, x1);
2452  ASSERT_EQUAL_64(0x0123456789abcdef, dst[2]);
2453  ASSERT_EQUAL_64(0x0123456789abcdef, x2);
2454  ASSERT_EQUAL_64(0x0123456789abcdef, dst[4]);
2455  ASSERT_EQUAL_64(0x32, x3);
2456  ASSERT_EQUAL_64(0x3200, dst[3]);
2457  ASSERT_EQUAL_64(0x9876, x4);
2458  ASSERT_EQUAL_64(0x987600, dst[5]);
2459  ASSERT_EQUAL_64(src_base + 8, x17);
2460  ASSERT_EQUAL_64(dst_base + 24, x18);
2461  ASSERT_EQUAL_64(src_base + 16, x19);
2462  ASSERT_EQUAL_64(dst_base + 32, x20);
2463  ASSERT_EQUAL_64(src_base, x21);
2464  ASSERT_EQUAL_64(dst_base, x22);
2465  ASSERT_EQUAL_64(src_base + 2, x23);
2466  ASSERT_EQUAL_64(dst_base + 30, x24);
2467  ASSERT_EQUAL_64(src_base, x25);
2468  ASSERT_EQUAL_64(dst_base, x26);
2469
2470  TEARDOWN();
2471}
2472
2473
2474TEST(ldr_str_largeindex) {
2475  SETUP();
2476
2477  // This value won't fit in the immediate offset field of ldr/str instructions.
2478  int largeoffset = 0xabcdef;
2479
2480  int64_t data[3] = { 0x1122334455667788, 0, 0 };
2481  uint64_t base_addr = reinterpret_cast<uintptr_t>(data);
2482  uint64_t drifted_addr = base_addr - largeoffset;
2483
2484  // This test checks that we we can use large immediate offsets when
2485  // using PreIndex or PostIndex addressing mode of the MacroAssembler
2486  // Ldr/Str instructions.
2487
2488  START();
2489  __ Mov(x19, drifted_addr);
2490  __ Ldr(x0, MemOperand(x19, largeoffset, PreIndex));
2491
2492  __ Mov(x20, base_addr);
2493  __ Ldr(x1, MemOperand(x20, largeoffset, PostIndex));
2494
2495  __ Mov(x21, drifted_addr);
2496  __ Str(x0, MemOperand(x21, largeoffset + 8, PreIndex));
2497
2498  __ Mov(x22, base_addr + 16);
2499  __ Str(x0, MemOperand(x22, largeoffset, PostIndex));
2500  END();
2501
2502  RUN();
2503
2504  ASSERT_EQUAL_64(0x1122334455667788, data[0]);
2505  ASSERT_EQUAL_64(0x1122334455667788, data[1]);
2506  ASSERT_EQUAL_64(0x1122334455667788, data[2]);
2507  ASSERT_EQUAL_64(0x1122334455667788, x0);
2508  ASSERT_EQUAL_64(0x1122334455667788, x1);
2509
2510  ASSERT_EQUAL_64(base_addr, x19);
2511  ASSERT_EQUAL_64(base_addr + largeoffset, x20);
2512  ASSERT_EQUAL_64(base_addr + 8, x21);
2513  ASSERT_EQUAL_64(base_addr + 16 + largeoffset, x22);
2514
2515  TEARDOWN();
2516}
2517
2518
2519TEST(load_signed) {
2520  SETUP();
2521
2522  uint32_t src[2] = {0x80008080, 0x7fff7f7f};
2523  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2524
2525  START();
2526  __ Mov(x24, src_base);
2527  __ Ldrsb(w0, MemOperand(x24));
2528  __ Ldrsb(w1, MemOperand(x24, 4));
2529  __ Ldrsh(w2, MemOperand(x24));
2530  __ Ldrsh(w3, MemOperand(x24, 4));
2531  __ Ldrsb(x4, MemOperand(x24));
2532  __ Ldrsb(x5, MemOperand(x24, 4));
2533  __ Ldrsh(x6, MemOperand(x24));
2534  __ Ldrsh(x7, MemOperand(x24, 4));
2535  __ Ldrsw(x8, MemOperand(x24));
2536  __ Ldrsw(x9, MemOperand(x24, 4));
2537  END();
2538
2539  RUN();
2540
2541  ASSERT_EQUAL_64(0xffffff80, x0);
2542  ASSERT_EQUAL_64(0x0000007f, x1);
2543  ASSERT_EQUAL_64(0xffff8080, x2);
2544  ASSERT_EQUAL_64(0x00007f7f, x3);
2545  ASSERT_EQUAL_64(0xffffffffffffff80, x4);
2546  ASSERT_EQUAL_64(0x000000000000007f, x5);
2547  ASSERT_EQUAL_64(0xffffffffffff8080, x6);
2548  ASSERT_EQUAL_64(0x0000000000007f7f, x7);
2549  ASSERT_EQUAL_64(0xffffffff80008080, x8);
2550  ASSERT_EQUAL_64(0x000000007fff7f7f, x9);
2551
2552  TEARDOWN();
2553}
2554
2555
2556TEST(load_store_regoffset) {
2557  SETUP();
2558
2559  uint32_t src[3] = {1, 2, 3};
2560  uint32_t dst[4] = {0, 0, 0, 0};
2561  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2562  uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
2563
2564  START();
2565  __ Mov(x16, src_base);
2566  __ Mov(x17, dst_base);
2567  __ Mov(x18, src_base + 3 * sizeof(src[0]));
2568  __ Mov(x19, dst_base + 3 * sizeof(dst[0]));
2569  __ Mov(x20, dst_base + 4 * sizeof(dst[0]));
2570  __ Mov(x24, 0);
2571  __ Mov(x25, 4);
2572  __ Mov(x26, -4);
2573  __ Mov(x27, 0xfffffffc);  // 32-bit -4.
2574  __ Mov(x28, 0xfffffffe);  // 32-bit -2.
2575  __ Mov(x29, 0xffffffff);  // 32-bit -1.
2576
2577  __ Ldr(w0, MemOperand(x16, x24));
2578  __ Ldr(x1, MemOperand(x16, x25));
2579  __ Ldr(w2, MemOperand(x18, x26));
2580  __ Ldr(w3, MemOperand(x18, x27, SXTW));
2581  __ Ldr(w4, MemOperand(x18, x28, SXTW, 2));
2582  __ Str(w0, MemOperand(x17, x24));
2583  __ Str(x1, MemOperand(x17, x25));
2584  __ Str(w2, MemOperand(x20, x29, SXTW, 2));
2585  END();
2586
2587  RUN();
2588
2589  ASSERT_EQUAL_64(1, x0);
2590  ASSERT_EQUAL_64(0x0000000300000002, x1);
2591  ASSERT_EQUAL_64(3, x2);
2592  ASSERT_EQUAL_64(3, x3);
2593  ASSERT_EQUAL_64(2, x4);
2594  ASSERT_EQUAL_32(1, dst[0]);
2595  ASSERT_EQUAL_32(2, dst[1]);
2596  ASSERT_EQUAL_32(3, dst[2]);
2597  ASSERT_EQUAL_32(3, dst[3]);
2598
2599  TEARDOWN();
2600}
2601
2602
2603TEST(load_store_float) {
2604  SETUP();
2605
2606  float src[3] = {1.0, 2.0, 3.0};
2607  float dst[3] = {0.0, 0.0, 0.0};
2608  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2609  uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
2610
2611  START();
2612  __ Mov(x17, src_base);
2613  __ Mov(x18, dst_base);
2614  __ Mov(x19, src_base);
2615  __ Mov(x20, dst_base);
2616  __ Mov(x21, src_base);
2617  __ Mov(x22, dst_base);
2618  __ Ldr(s0, MemOperand(x17, sizeof(src[0])));
2619  __ Str(s0, MemOperand(x18, sizeof(dst[0]), PostIndex));
2620  __ Ldr(s1, MemOperand(x19, sizeof(src[0]), PostIndex));
2621  __ Str(s1, MemOperand(x20, 2 * sizeof(dst[0]), PreIndex));
2622  __ Ldr(s2, MemOperand(x21, 2 * sizeof(src[0]), PreIndex));
2623  __ Str(s2, MemOperand(x22, sizeof(dst[0])));
2624  END();
2625
2626  RUN();
2627
2628  ASSERT_EQUAL_FP32(2.0, s0);
2629  ASSERT_EQUAL_FP32(2.0, dst[0]);
2630  ASSERT_EQUAL_FP32(1.0, s1);
2631  ASSERT_EQUAL_FP32(1.0, dst[2]);
2632  ASSERT_EQUAL_FP32(3.0, s2);
2633  ASSERT_EQUAL_FP32(3.0, dst[1]);
2634  ASSERT_EQUAL_64(src_base, x17);
2635  ASSERT_EQUAL_64(dst_base + sizeof(dst[0]), x18);
2636  ASSERT_EQUAL_64(src_base + sizeof(src[0]), x19);
2637  ASSERT_EQUAL_64(dst_base + 2 * sizeof(dst[0]), x20);
2638  ASSERT_EQUAL_64(src_base + 2 * sizeof(src[0]), x21);
2639  ASSERT_EQUAL_64(dst_base, x22);
2640
2641  TEARDOWN();
2642}
2643
2644
2645TEST(load_store_double) {
2646  SETUP();
2647
2648  double src[3] = {1.0, 2.0, 3.0};
2649  double dst[3] = {0.0, 0.0, 0.0};
2650  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2651  uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
2652
2653  START();
2654  __ Mov(x17, src_base);
2655  __ Mov(x18, dst_base);
2656  __ Mov(x19, src_base);
2657  __ Mov(x20, dst_base);
2658  __ Mov(x21, src_base);
2659  __ Mov(x22, dst_base);
2660  __ Ldr(d0, MemOperand(x17, sizeof(src[0])));
2661  __ Str(d0, MemOperand(x18, sizeof(dst[0]), PostIndex));
2662  __ Ldr(d1, MemOperand(x19, sizeof(src[0]), PostIndex));
2663  __ Str(d1, MemOperand(x20, 2 * sizeof(dst[0]), PreIndex));
2664  __ Ldr(d2, MemOperand(x21, 2 * sizeof(src[0]), PreIndex));
2665  __ Str(d2, MemOperand(x22, sizeof(dst[0])));
2666  END();
2667
2668  RUN();
2669
2670  ASSERT_EQUAL_FP64(2.0, d0);
2671  ASSERT_EQUAL_FP64(2.0, dst[0]);
2672  ASSERT_EQUAL_FP64(1.0, d1);
2673  ASSERT_EQUAL_FP64(1.0, dst[2]);
2674  ASSERT_EQUAL_FP64(3.0, d2);
2675  ASSERT_EQUAL_FP64(3.0, dst[1]);
2676  ASSERT_EQUAL_64(src_base, x17);
2677  ASSERT_EQUAL_64(dst_base + sizeof(dst[0]), x18);
2678  ASSERT_EQUAL_64(src_base + sizeof(src[0]), x19);
2679  ASSERT_EQUAL_64(dst_base + 2 * sizeof(dst[0]), x20);
2680  ASSERT_EQUAL_64(src_base + 2 * sizeof(src[0]), x21);
2681  ASSERT_EQUAL_64(dst_base, x22);
2682
2683  TEARDOWN();
2684}
2685
2686
2687TEST(load_store_b) {
2688  SETUP();
2689
2690  uint8_t src[3] = {0x12, 0x23, 0x34};
2691  uint8_t dst[3] = {0, 0, 0};
2692  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2693  uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
2694
2695  START();
2696  __ Mov(x17, src_base);
2697  __ Mov(x18, dst_base);
2698  __ Mov(x19, src_base);
2699  __ Mov(x20, dst_base);
2700  __ Mov(x21, src_base);
2701  __ Mov(x22, dst_base);
2702  __ Ldr(b0, MemOperand(x17, sizeof(src[0])));
2703  __ Str(b0, MemOperand(x18, sizeof(dst[0]), PostIndex));
2704  __ Ldr(b1, MemOperand(x19, sizeof(src[0]), PostIndex));
2705  __ Str(b1, MemOperand(x20, 2 * sizeof(dst[0]), PreIndex));
2706  __ Ldr(b2, MemOperand(x21, 2 * sizeof(src[0]), PreIndex));
2707  __ Str(b2, MemOperand(x22, sizeof(dst[0])));
2708  END();
2709
2710  RUN();
2711
2712  ASSERT_EQUAL_128(0, 0x23, q0);
2713  ASSERT_EQUAL_64(0x23, dst[0]);
2714  ASSERT_EQUAL_128(0, 0x12, q1);
2715  ASSERT_EQUAL_64(0x12, dst[2]);
2716  ASSERT_EQUAL_128(0, 0x34, q2);
2717  ASSERT_EQUAL_64(0x34, dst[1]);
2718  ASSERT_EQUAL_64(src_base, x17);
2719  ASSERT_EQUAL_64(dst_base + sizeof(dst[0]), x18);
2720  ASSERT_EQUAL_64(src_base + sizeof(src[0]), x19);
2721  ASSERT_EQUAL_64(dst_base + 2 * sizeof(dst[0]), x20);
2722  ASSERT_EQUAL_64(src_base + 2 * sizeof(src[0]), x21);
2723  ASSERT_EQUAL_64(dst_base, x22);
2724
2725  TEARDOWN();
2726}
2727
2728
2729TEST(load_store_h) {
2730  SETUP();
2731
2732  uint16_t src[3] = {0x1234, 0x2345, 0x3456};
2733  uint16_t dst[3] = {0, 0, 0};
2734  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2735  uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
2736
2737  START();
2738  __ Mov(x17, src_base);
2739  __ Mov(x18, dst_base);
2740  __ Mov(x19, src_base);
2741  __ Mov(x20, dst_base);
2742  __ Mov(x21, src_base);
2743  __ Mov(x22, dst_base);
2744  __ Ldr(h0, MemOperand(x17, sizeof(src[0])));
2745  __ Str(h0, MemOperand(x18, sizeof(dst[0]), PostIndex));
2746  __ Ldr(h1, MemOperand(x19, sizeof(src[0]), PostIndex));
2747  __ Str(h1, MemOperand(x20, 2 * sizeof(dst[0]), PreIndex));
2748  __ Ldr(h2, MemOperand(x21, 2 * sizeof(src[0]), PreIndex));
2749  __ Str(h2, MemOperand(x22, sizeof(dst[0])));
2750  END();
2751
2752  RUN();
2753
2754  ASSERT_EQUAL_128(0, 0x2345, q0);
2755  ASSERT_EQUAL_64(0x2345, dst[0]);
2756  ASSERT_EQUAL_128(0, 0x1234, q1);
2757  ASSERT_EQUAL_64(0x1234, dst[2]);
2758  ASSERT_EQUAL_128(0, 0x3456, q2);
2759  ASSERT_EQUAL_64(0x3456, dst[1]);
2760  ASSERT_EQUAL_64(src_base, x17);
2761  ASSERT_EQUAL_64(dst_base + sizeof(dst[0]), x18);
2762  ASSERT_EQUAL_64(src_base + sizeof(src[0]), x19);
2763  ASSERT_EQUAL_64(dst_base + 2 * sizeof(dst[0]), x20);
2764  ASSERT_EQUAL_64(src_base + 2 * sizeof(src[0]), x21);
2765  ASSERT_EQUAL_64(dst_base, x22);
2766
2767  TEARDOWN();
2768}
2769
2770
2771TEST(load_store_q) {
2772  SETUP();
2773
2774  uint8_t src[48] = {0x10, 0x32, 0x54, 0x76, 0x98, 0xba, 0xdc, 0xfe,
2775                     0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef,
2776                     0x21, 0x43, 0x65, 0x87, 0xa9, 0xcb, 0xed, 0x0f,
2777                     0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc, 0xde, 0xf0,
2778                     0x24, 0x46, 0x68, 0x8a, 0xac, 0xce, 0xe0, 0x02,
2779                     0x42, 0x64, 0x86, 0xa8, 0xca, 0xec, 0x0e, 0x20};
2780
2781  uint64_t dst[6] = {0, 0, 0, 0, 0, 0};
2782  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2783  uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
2784
2785  START();
2786  __ Mov(x17, src_base);
2787  __ Mov(x18, dst_base);
2788  __ Mov(x19, src_base);
2789  __ Mov(x20, dst_base);
2790  __ Mov(x21, src_base);
2791  __ Mov(x22, dst_base);
2792  __ Ldr(q0, MemOperand(x17, 16));
2793  __ Str(q0, MemOperand(x18, 16, PostIndex));
2794  __ Ldr(q1, MemOperand(x19, 16, PostIndex));
2795  __ Str(q1, MemOperand(x20, 32, PreIndex));
2796  __ Ldr(q2, MemOperand(x21, 32, PreIndex));
2797  __ Str(q2, MemOperand(x22, 16));
2798  END();
2799
2800  RUN();
2801
2802  ASSERT_EQUAL_128(0xf0debc9a78563412, 0x0fedcba987654321, q0);
2803  ASSERT_EQUAL_64(0x0fedcba987654321, dst[0]);
2804  ASSERT_EQUAL_64(0xf0debc9a78563412, dst[1]);
2805  ASSERT_EQUAL_128(0xefcdab8967452301, 0xfedcba9876543210, q1);
2806  ASSERT_EQUAL_64(0xfedcba9876543210, dst[4]);
2807  ASSERT_EQUAL_64(0xefcdab8967452301, dst[5]);
2808  ASSERT_EQUAL_128(0x200eeccaa8866442, 0x02e0ceac8a684624, q2);
2809  ASSERT_EQUAL_64(0x02e0ceac8a684624, dst[2]);
2810  ASSERT_EQUAL_64(0x200eeccaa8866442, dst[3]);
2811  ASSERT_EQUAL_64(src_base, x17);
2812  ASSERT_EQUAL_64(dst_base + 16, x18);
2813  ASSERT_EQUAL_64(src_base + 16, x19);
2814  ASSERT_EQUAL_64(dst_base + 32, x20);
2815  ASSERT_EQUAL_64(src_base + 32, x21);
2816  ASSERT_EQUAL_64(dst_base, x22);
2817
2818  TEARDOWN();
2819}
2820
2821
2822TEST(load_store_v_regoffset) {
2823  SETUP();
2824
2825  uint8_t src[64];
2826  for (unsigned i = 0; i < sizeof(src); i++) {
2827    src[i] = i;
2828  }
2829  uint8_t dst[64];
2830  memset(dst, 0, sizeof(dst));
2831
2832  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2833  uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
2834
2835  START();
2836  __ Mov(x17, src_base + 16);
2837  __ Mov(x18, 1);
2838  __ Mov(w19, -1);
2839  __ Mov(x20, dst_base - 1);
2840
2841  __ Ldr(b0, MemOperand(x17, x18));
2842  __ Ldr(b1, MemOperand(x17, x19, SXTW));
2843
2844  __ Ldr(h2, MemOperand(x17, x18));
2845  __ Ldr(h3, MemOperand(x17, x18, UXTW, 1));
2846  __ Ldr(h4, MemOperand(x17, x19, SXTW, 1));
2847  __ Ldr(h5, MemOperand(x17, x18, LSL, 1));
2848
2849  __ Ldr(s16, MemOperand(x17, x18));
2850  __ Ldr(s17, MemOperand(x17, x18, UXTW, 2));
2851  __ Ldr(s18, MemOperand(x17, x19, SXTW, 2));
2852  __ Ldr(s19, MemOperand(x17, x18, LSL, 2));
2853
2854  __ Ldr(d20, MemOperand(x17, x18));
2855  __ Ldr(d21, MemOperand(x17, x18, UXTW, 3));
2856  __ Ldr(d22, MemOperand(x17, x19, SXTW, 3));
2857  __ Ldr(d23, MemOperand(x17, x18, LSL, 3));
2858
2859  __ Ldr(q24, MemOperand(x17, x18));
2860  __ Ldr(q25, MemOperand(x17, x18, UXTW, 4));
2861  __ Ldr(q26, MemOperand(x17, x19, SXTW, 4));
2862  __ Ldr(q27, MemOperand(x17, x18, LSL, 4));
2863
2864  // Store [bhsdq]27 to adjacent memory locations, then load again to check.
2865  __ Str(b27, MemOperand(x20, x18));
2866  __ Str(h27, MemOperand(x20, x18, UXTW, 1));
2867  __ Add(x20, x20, 8);
2868  __ Str(s27, MemOperand(x20, x19, SXTW, 2));
2869  __ Sub(x20, x20, 8);
2870  __ Str(d27, MemOperand(x20, x18, LSL, 3));
2871  __ Add(x20, x20, 32);
2872  __ Str(q27, MemOperand(x20, x19, SXTW, 4));
2873
2874  __ Sub(x20, x20, 32);
2875  __ Ldr(q6, MemOperand(x20, x18));
2876  __ Ldr(q7, MemOperand(x20, x18, LSL, 4));
2877
2878  END();
2879
2880  RUN();
2881
2882  ASSERT_EQUAL_128(0, 0x11, q0);
2883  ASSERT_EQUAL_128(0, 0x0f, q1);
2884  ASSERT_EQUAL_128(0, 0x1211, q2);
2885  ASSERT_EQUAL_128(0, 0x1312, q3);
2886  ASSERT_EQUAL_128(0, 0x0f0e, q4);
2887  ASSERT_EQUAL_128(0, 0x1312, q5);
2888  ASSERT_EQUAL_128(0, 0x14131211, q16);
2889  ASSERT_EQUAL_128(0, 0x17161514, q17);
2890  ASSERT_EQUAL_128(0, 0x0f0e0d0c, q18);
2891  ASSERT_EQUAL_128(0, 0x17161514, q19);
2892  ASSERT_EQUAL_128(0, 0x1817161514131211, q20);
2893  ASSERT_EQUAL_128(0, 0x1f1e1d1c1b1a1918, q21);
2894  ASSERT_EQUAL_128(0, 0x0f0e0d0c0b0a0908, q22);
2895  ASSERT_EQUAL_128(0, 0x1f1e1d1c1b1a1918, q23);
2896  ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x1817161514131211, q24);
2897  ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q25);
2898  ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q26);
2899  ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q27);
2900  ASSERT_EQUAL_128(0x2027262524232221, 0x2023222120212020, q6);
2901  ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q7);
2902
2903  TEARDOWN();
2904}
2905
2906
2907TEST(neon_ld1_d) {
2908  SETUP();
2909
2910  uint8_t src[32 + 5];
2911  for (unsigned i = 0; i < sizeof(src); i++) {
2912    src[i] = i;
2913  }
2914  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2915
2916  START();
2917  __ Mov(x17, src_base);
2918  __ Ldr(q2, MemOperand(x17));  // Initialise top 64-bits of Q register.
2919  __ Ld1(v2.V8B(), MemOperand(x17));
2920  __ Add(x17, x17, 1);
2921  __ Ld1(v3.V8B(), v4.V8B(), MemOperand(x17));
2922  __ Add(x17, x17, 1);
2923  __ Ld1(v5.V4H(), v6.V4H(), v7.V4H(), MemOperand(x17));
2924  __ Add(x17, x17, 1);
2925  __ Ld1(v16.V2S(), v17.V2S(), v18.V2S(), v19.V2S(), MemOperand(x17));
2926  __ Add(x17, x17, 1);
2927  __ Ld1(v30.V2S(), v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x17));
2928  __ Add(x17, x17, 1);
2929  __ Ld1(v20.V1D(), v21.V1D(), v22.V1D(), v23.V1D(), MemOperand(x17));
2930  END();
2931
2932  RUN();
2933
2934  ASSERT_EQUAL_128(0, 0x0706050403020100, q2);
2935  ASSERT_EQUAL_128(0, 0x0807060504030201, q3);
2936  ASSERT_EQUAL_128(0, 0x100f0e0d0c0b0a09, q4);
2937  ASSERT_EQUAL_128(0, 0x0908070605040302, q5);
2938  ASSERT_EQUAL_128(0, 0x11100f0e0d0c0b0a, q6);
2939  ASSERT_EQUAL_128(0, 0x1918171615141312, q7);
2940  ASSERT_EQUAL_128(0, 0x0a09080706050403, q16);
2941  ASSERT_EQUAL_128(0, 0x1211100f0e0d0c0b, q17);
2942  ASSERT_EQUAL_128(0, 0x1a19181716151413, q18);
2943  ASSERT_EQUAL_128(0, 0x2221201f1e1d1c1b, q19);
2944  ASSERT_EQUAL_128(0, 0x0b0a090807060504, q30);
2945  ASSERT_EQUAL_128(0, 0x131211100f0e0d0c, q31);
2946  ASSERT_EQUAL_128(0, 0x1b1a191817161514, q0);
2947  ASSERT_EQUAL_128(0, 0x232221201f1e1d1c, q1);
2948  ASSERT_EQUAL_128(0, 0x0c0b0a0908070605, q20);
2949  ASSERT_EQUAL_128(0, 0x14131211100f0e0d, q21);
2950  ASSERT_EQUAL_128(0, 0x1c1b1a1918171615, q22);
2951  ASSERT_EQUAL_128(0, 0x24232221201f1e1d, q23);
2952
2953  TEARDOWN();
2954}
2955
2956
2957TEST(neon_ld1_d_postindex) {
2958  SETUP();
2959
2960  uint8_t src[32 + 5];
2961  for (unsigned i = 0; i < sizeof(src); i++) {
2962    src[i] = i;
2963  }
2964  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2965
2966  START();
2967  __ Mov(x17, src_base);
2968  __ Mov(x18, src_base + 1);
2969  __ Mov(x19, src_base + 2);
2970  __ Mov(x20, src_base + 3);
2971  __ Mov(x21, src_base + 4);
2972  __ Mov(x22, src_base + 5);
2973  __ Mov(x23, 1);
2974  __ Ldr(q2, MemOperand(x17));  // Initialise top 64-bits of Q register.
2975  __ Ld1(v2.V8B(), MemOperand(x17, x23, PostIndex));
2976  __ Ld1(v3.V8B(), v4.V8B(), MemOperand(x18, 16, PostIndex));
2977  __ Ld1(v5.V4H(), v6.V4H(), v7.V4H(), MemOperand(x19, 24, PostIndex));
2978  __ Ld1(v16.V2S(), v17.V2S(), v18.V2S(), v19.V2S(),
2979         MemOperand(x20, 32, PostIndex));
2980  __ Ld1(v30.V2S(), v31.V2S(), v0.V2S(), v1.V2S(),
2981         MemOperand(x21, 32, PostIndex));
2982  __ Ld1(v20.V1D(), v21.V1D(), v22.V1D(), v23.V1D(),
2983         MemOperand(x22, 32, PostIndex));
2984  END();
2985
2986  RUN();
2987
2988  ASSERT_EQUAL_128(0, 0x0706050403020100, q2);
2989  ASSERT_EQUAL_128(0, 0x0807060504030201, q3);
2990  ASSERT_EQUAL_128(0, 0x100f0e0d0c0b0a09, q4);
2991  ASSERT_EQUAL_128(0, 0x0908070605040302, q5);
2992  ASSERT_EQUAL_128(0, 0x11100f0e0d0c0b0a, q6);
2993  ASSERT_EQUAL_128(0, 0x1918171615141312, q7);
2994  ASSERT_EQUAL_128(0, 0x0a09080706050403, q16);
2995  ASSERT_EQUAL_128(0, 0x1211100f0e0d0c0b, q17);
2996  ASSERT_EQUAL_128(0, 0x1a19181716151413, q18);
2997  ASSERT_EQUAL_128(0, 0x2221201f1e1d1c1b, q19);
2998  ASSERT_EQUAL_128(0, 0x0b0a090807060504, q30);
2999  ASSERT_EQUAL_128(0, 0x131211100f0e0d0c, q31);
3000  ASSERT_EQUAL_128(0, 0x1b1a191817161514, q0);
3001  ASSERT_EQUAL_128(0, 0x232221201f1e1d1c, q1);
3002  ASSERT_EQUAL_128(0, 0x0c0b0a0908070605, q20);
3003  ASSERT_EQUAL_128(0, 0x14131211100f0e0d, q21);
3004  ASSERT_EQUAL_128(0, 0x1c1b1a1918171615, q22);
3005  ASSERT_EQUAL_128(0, 0x24232221201f1e1d, q23);
3006  ASSERT_EQUAL_64(src_base + 1, x17);
3007  ASSERT_EQUAL_64(src_base + 1 + 16, x18);
3008  ASSERT_EQUAL_64(src_base + 2 + 24, x19);
3009  ASSERT_EQUAL_64(src_base + 3 + 32, x20);
3010  ASSERT_EQUAL_64(src_base + 4 + 32, x21);
3011  ASSERT_EQUAL_64(src_base + 5 + 32, x22);
3012
3013  TEARDOWN();
3014}
3015
3016
3017TEST(neon_ld1_q) {
3018  SETUP();
3019
3020  uint8_t src[64 + 4];
3021  for (unsigned i = 0; i < sizeof(src); i++) {
3022    src[i] = i;
3023  }
3024  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3025
3026  START();
3027  __ Mov(x17, src_base);
3028  __ Ld1(v2.V16B(), MemOperand(x17));
3029  __ Add(x17, x17, 1);
3030  __ Ld1(v3.V16B(), v4.V16B(), MemOperand(x17));
3031  __ Add(x17, x17, 1);
3032  __ Ld1(v5.V8H(), v6.V8H(), v7.V8H(), MemOperand(x17));
3033  __ Add(x17, x17, 1);
3034  __ Ld1(v16.V4S(), v17.V4S(), v18.V4S(), v19.V4S(), MemOperand(x17));
3035  __ Add(x17, x17, 1);
3036  __ Ld1(v30.V2D(), v31.V2D(), v0.V2D(), v1.V2D(), MemOperand(x17));
3037  END();
3038
3039  RUN();
3040
3041  ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q2);
3042  ASSERT_EQUAL_128(0x100f0e0d0c0b0a09, 0x0807060504030201, q3);
3043  ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x1817161514131211, q4);
3044  ASSERT_EQUAL_128(0x11100f0e0d0c0b0a, 0x0908070605040302, q5);
3045  ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x1918171615141312, q6);
3046  ASSERT_EQUAL_128(0x31302f2e2d2c2b2a, 0x2928272625242322, q7);
3047  ASSERT_EQUAL_128(0x1211100f0e0d0c0b, 0x0a09080706050403, q16);
3048  ASSERT_EQUAL_128(0x2221201f1e1d1c1b, 0x1a19181716151413, q17);
3049  ASSERT_EQUAL_128(0x3231302f2e2d2c2b, 0x2a29282726252423, q18);
3050  ASSERT_EQUAL_128(0x4241403f3e3d3c3b, 0x3a39383736353433, q19);
3051  ASSERT_EQUAL_128(0x131211100f0e0d0c, 0x0b0a090807060504, q30);
3052  ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x1b1a191817161514, q31);
3053  ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x2b2a292827262524, q0);
3054  ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x3b3a393837363534, q1);
3055
3056  TEARDOWN();
3057}
3058
3059
3060TEST(neon_ld1_q_postindex) {
3061  SETUP();
3062
3063  uint8_t src[64 + 4];
3064  for (unsigned i = 0; i < sizeof(src); i++) {
3065    src[i] = i;
3066  }
3067  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3068
3069  START();
3070  __ Mov(x17, src_base);
3071  __ Mov(x18, src_base + 1);
3072  __ Mov(x19, src_base + 2);
3073  __ Mov(x20, src_base + 3);
3074  __ Mov(x21, src_base + 4);
3075  __ Mov(x22, 1);
3076  __ Ld1(v2.V16B(), MemOperand(x17, x22, PostIndex));
3077  __ Ld1(v3.V16B(), v4.V16B(), MemOperand(x18, 32, PostIndex));
3078  __ Ld1(v5.V8H(), v6.V8H(), v7.V8H(), MemOperand(x19, 48, PostIndex));
3079  __ Ld1(v16.V4S(), v17.V4S(), v18.V4S(), v19.V4S(),
3080         MemOperand(x20, 64, PostIndex));
3081  __ Ld1(v30.V2D(), v31.V2D(), v0.V2D(), v1.V2D(),
3082         MemOperand(x21, 64, PostIndex));
3083  END();
3084
3085  RUN();
3086
3087  ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q2);
3088  ASSERT_EQUAL_128(0x100f0e0d0c0b0a09, 0x0807060504030201, q3);
3089  ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x1817161514131211, q4);
3090  ASSERT_EQUAL_128(0x11100f0e0d0c0b0a, 0x0908070605040302, q5);
3091  ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x1918171615141312, q6);
3092  ASSERT_EQUAL_128(0x31302f2e2d2c2b2a, 0x2928272625242322, q7);
3093  ASSERT_EQUAL_128(0x1211100f0e0d0c0b, 0x0a09080706050403, q16);
3094  ASSERT_EQUAL_128(0x2221201f1e1d1c1b, 0x1a19181716151413, q17);
3095  ASSERT_EQUAL_128(0x3231302f2e2d2c2b, 0x2a29282726252423, q18);
3096  ASSERT_EQUAL_128(0x4241403f3e3d3c3b, 0x3a39383736353433, q19);
3097  ASSERT_EQUAL_128(0x131211100f0e0d0c, 0x0b0a090807060504, q30);
3098  ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x1b1a191817161514, q31);
3099  ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x2b2a292827262524, q0);
3100  ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x3b3a393837363534, q1);
3101  ASSERT_EQUAL_64(src_base + 1, x17);
3102  ASSERT_EQUAL_64(src_base + 1 + 32, x18);
3103  ASSERT_EQUAL_64(src_base + 2 + 48, x19);
3104  ASSERT_EQUAL_64(src_base + 3 + 64, x20);
3105  ASSERT_EQUAL_64(src_base + 4 + 64, x21);
3106
3107  TEARDOWN();
3108}
3109
3110
3111TEST(neon_ld1_lane) {
3112  SETUP();
3113
3114  uint8_t src[64];
3115  for (unsigned i = 0; i < sizeof(src); i++) {
3116    src[i] = i;
3117  }
3118  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3119
3120  START();
3121
3122  // Test loading whole register by element.
3123  __ Mov(x17, src_base);
3124  for (int i = 15; i >= 0; i--) {
3125    __ Ld1(v0.B(), i, MemOperand(x17));
3126    __ Add(x17, x17, 1);
3127  }
3128
3129  __ Mov(x17, src_base);
3130  for (int i = 7; i >= 0; i--) {
3131    __ Ld1(v1.H(), i, MemOperand(x17));
3132    __ Add(x17, x17, 1);
3133  }
3134
3135  __ Mov(x17, src_base);
3136  for (int i = 3; i >= 0; i--) {
3137    __ Ld1(v2.S(), i, MemOperand(x17));
3138    __ Add(x17, x17, 1);
3139  }
3140
3141  __ Mov(x17, src_base);
3142  for (int i = 1; i >= 0; i--) {
3143    __ Ld1(v3.D(), i, MemOperand(x17));
3144    __ Add(x17, x17, 1);
3145  }
3146
3147  // Test loading a single element into an initialised register.
3148  __ Mov(x17, src_base);
3149  __ Ldr(q4, MemOperand(x17));
3150  __ Ld1(v4.B(), 4, MemOperand(x17));
3151  __ Ldr(q5, MemOperand(x17));
3152  __ Ld1(v5.H(), 3, MemOperand(x17));
3153  __ Ldr(q6, MemOperand(x17));
3154  __ Ld1(v6.S(), 2, MemOperand(x17));
3155  __ Ldr(q7, MemOperand(x17));
3156  __ Ld1(v7.D(), 1, MemOperand(x17));
3157
3158  END();
3159
3160  RUN();
3161
3162  ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0);
3163  ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q1);
3164  ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q2);
3165  ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q3);
3166  ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q4);
3167  ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q5);
3168  ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q6);
3169  ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q7);
3170
3171  TEARDOWN();
3172}
3173
3174TEST(neon_ld2_d) {
3175  SETUP();
3176
3177  uint8_t src[64 + 4];
3178  for (unsigned i = 0; i < sizeof(src); i++) {
3179    src[i] = i;
3180  }
3181  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3182
3183  START();
3184  __ Mov(x17, src_base);
3185  __ Ld2(v2.V8B(), v3.V8B(), MemOperand(x17));
3186  __ Add(x17, x17, 1);
3187  __ Ld2(v4.V8B(), v5.V8B(), MemOperand(x17));
3188  __ Add(x17, x17, 1);
3189  __ Ld2(v6.V4H(), v7.V4H(), MemOperand(x17));
3190  __ Add(x17, x17, 1);
3191  __ Ld2(v31.V2S(), v0.V2S(), MemOperand(x17));
3192  END();
3193
3194  RUN();
3195
3196  ASSERT_EQUAL_128(0, 0x0e0c0a0806040200, q2);
3197  ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q3);
3198  ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q4);
3199  ASSERT_EQUAL_128(0, 0x100e0c0a08060402, q5);
3200  ASSERT_EQUAL_128(0, 0x0f0e0b0a07060302, q6);
3201  ASSERT_EQUAL_128(0, 0x11100d0c09080504, q7);
3202  ASSERT_EQUAL_128(0, 0x0e0d0c0b06050403, q31);
3203  ASSERT_EQUAL_128(0, 0x1211100f0a090807, q0);
3204
3205  TEARDOWN();
3206}
3207
3208TEST(neon_ld2_d_postindex) {
3209  SETUP();
3210
3211  uint8_t src[32 + 4];
3212  for (unsigned i = 0; i < sizeof(src); i++) {
3213    src[i] = i;
3214  }
3215  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3216
3217  START();
3218  __ Mov(x17, src_base);
3219  __ Mov(x18, src_base + 1);
3220  __ Mov(x19, src_base + 2);
3221  __ Mov(x20, src_base + 3);
3222  __ Mov(x21, src_base + 4);
3223  __ Mov(x22, 1);
3224  __ Ld2(v2.V8B(), v3.V8B(), MemOperand(x17, x22, PostIndex));
3225  __ Ld2(v4.V8B(), v5.V8B(), MemOperand(x18, 16, PostIndex));
3226  __ Ld2(v5.V4H(), v6.V4H(), MemOperand(x19, 16, PostIndex));
3227  __ Ld2(v16.V2S(), v17.V2S(), MemOperand(x20, 16, PostIndex));
3228  __ Ld2(v31.V2S(), v0.V2S(), MemOperand(x21, 16, PostIndex));
3229  END();
3230
3231  RUN();
3232
3233  ASSERT_EQUAL_128(0, 0x0e0c0a0806040200, q2);
3234  ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q3);
3235  ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q4);
3236  ASSERT_EQUAL_128(0, 0x0f0e0b0a07060302, q5);
3237  ASSERT_EQUAL_128(0, 0x11100d0c09080504, q6);
3238  ASSERT_EQUAL_128(0, 0x0e0d0c0b06050403, q16);
3239  ASSERT_EQUAL_128(0, 0x1211100f0a090807, q17);
3240  ASSERT_EQUAL_128(0, 0x0f0e0d0c07060504, q31);
3241  ASSERT_EQUAL_128(0, 0x131211100b0a0908, q0);
3242
3243  ASSERT_EQUAL_64(src_base + 1, x17);
3244  ASSERT_EQUAL_64(src_base + 1 + 16, x18);
3245  ASSERT_EQUAL_64(src_base + 2 + 16, x19);
3246  ASSERT_EQUAL_64(src_base + 3 + 16, x20);
3247  ASSERT_EQUAL_64(src_base + 4 + 16, x21);
3248
3249  TEARDOWN();
3250}
3251
3252
3253TEST(neon_ld2_q) {
3254  SETUP();
3255
3256  uint8_t src[64 + 4];
3257  for (unsigned i = 0; i < sizeof(src); i++) {
3258    src[i] = i;
3259  }
3260  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3261
3262  START();
3263  __ Mov(x17, src_base);
3264  __ Ld2(v2.V16B(), v3.V16B(), MemOperand(x17));
3265  __ Add(x17, x17, 1);
3266  __ Ld2(v4.V16B(), v5.V16B(), MemOperand(x17));
3267  __ Add(x17, x17, 1);
3268  __ Ld2(v6.V8H(), v7.V8H(), MemOperand(x17));
3269  __ Add(x17, x17, 1);
3270  __ Ld2(v16.V4S(), v17.V4S(), MemOperand(x17));
3271  __ Add(x17, x17, 1);
3272  __ Ld2(v31.V2D(), v0.V2D(), MemOperand(x17));
3273  END();
3274
3275  RUN();
3276
3277  ASSERT_EQUAL_128(0x1e1c1a1816141210, 0x0e0c0a0806040200, q2);
3278  ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q3);
3279  ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q4);
3280  ASSERT_EQUAL_128(0x201e1c1a18161412, 0x100e0c0a08060402, q5);
3281  ASSERT_EQUAL_128(0x1f1e1b1a17161312, 0x0f0e0b0a07060302, q6);
3282  ASSERT_EQUAL_128(0x21201d1c19181514, 0x11100d0c09080504, q7);
3283  ASSERT_EQUAL_128(0x1e1d1c1b16151413, 0x0e0d0c0b06050403, q16);
3284  ASSERT_EQUAL_128(0x2221201f1a191817, 0x1211100f0a090807, q17);
3285  ASSERT_EQUAL_128(0x1b1a191817161514, 0x0b0a090807060504, q31);
3286  ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x131211100f0e0d0c, q0);
3287
3288  TEARDOWN();
3289}
3290
3291
3292TEST(neon_ld2_q_postindex) {
3293  SETUP();
3294
3295  uint8_t src[64 + 4];
3296  for (unsigned i = 0; i < sizeof(src); i++) {
3297    src[i] = i;
3298  }
3299  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3300
3301  START();
3302  __ Mov(x17, src_base);
3303  __ Mov(x18, src_base + 1);
3304  __ Mov(x19, src_base + 2);
3305  __ Mov(x20, src_base + 3);
3306  __ Mov(x21, src_base + 4);
3307  __ Mov(x22, 1);
3308  __ Ld2(v2.V16B(), v3.V16B(), MemOperand(x17, x22, PostIndex));
3309  __ Ld2(v4.V16B(), v5.V16B(), MemOperand(x18, 32, PostIndex));
3310  __ Ld2(v6.V8H(), v7.V8H(), MemOperand(x19, 32, PostIndex));
3311  __ Ld2(v16.V4S(), v17.V4S(), MemOperand(x20, 32, PostIndex));
3312  __ Ld2(v31.V2D(), v0.V2D(), MemOperand(x21, 32, PostIndex));
3313  END();
3314
3315  RUN();
3316
3317  ASSERT_EQUAL_128(0x1e1c1a1816141210, 0x0e0c0a0806040200, q2);
3318  ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q3);
3319  ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q4);
3320  ASSERT_EQUAL_128(0x201e1c1a18161412, 0x100e0c0a08060402, q5);
3321  ASSERT_EQUAL_128(0x1f1e1b1a17161312, 0x0f0e0b0a07060302, q6);
3322  ASSERT_EQUAL_128(0x21201d1c19181514, 0x11100d0c09080504, q7);
3323  ASSERT_EQUAL_128(0x1e1d1c1b16151413, 0x0e0d0c0b06050403, q16);
3324  ASSERT_EQUAL_128(0x2221201f1a191817, 0x1211100f0a090807, q17);
3325  ASSERT_EQUAL_128(0x1b1a191817161514, 0x0b0a090807060504, q31);
3326  ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x131211100f0e0d0c, q0);
3327
3328
3329
3330  ASSERT_EQUAL_64(src_base + 1, x17);
3331  ASSERT_EQUAL_64(src_base + 1 + 32, x18);
3332  ASSERT_EQUAL_64(src_base + 2 + 32, x19);
3333  ASSERT_EQUAL_64(src_base + 3 + 32, x20);
3334  ASSERT_EQUAL_64(src_base + 4 + 32, x21);
3335
3336  TEARDOWN();
3337}
3338
3339
3340TEST(neon_ld2_lane) {
3341  SETUP();
3342
3343  uint8_t src[64];
3344  for (unsigned i = 0; i < sizeof(src); i++) {
3345    src[i] = i;
3346  }
3347  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3348
3349  START();
3350
3351  // Test loading whole register by element.
3352  __ Mov(x17, src_base);
3353  for (int i = 15; i >= 0; i--) {
3354    __ Ld2(v0.B(), v1.B(), i, MemOperand(x17));
3355    __ Add(x17, x17, 1);
3356  }
3357
3358  __ Mov(x17, src_base);
3359  for (int i = 7; i >= 0; i--) {
3360    __ Ld2(v2.H(), v3.H(), i, MemOperand(x17));
3361    __ Add(x17, x17, 1);
3362  }
3363
3364  __ Mov(x17, src_base);
3365  for (int i = 3; i >= 0; i--) {
3366    __ Ld2(v4.S(), v5.S(), i, MemOperand(x17));
3367    __ Add(x17, x17, 1);
3368  }
3369
3370  __ Mov(x17, src_base);
3371  for (int i = 1; i >= 0; i--) {
3372    __ Ld2(v6.D(), v7.D(), i, MemOperand(x17));
3373    __ Add(x17, x17, 1);
3374  }
3375
3376  // Test loading a single element into an initialised register.
3377  __ Mov(x17, src_base);
3378  __ Mov(x4, x17);
3379  __ Ldr(q8, MemOperand(x4, 16, PostIndex));
3380  __ Ldr(q9, MemOperand(x4));
3381  __ Ld2(v8.B(), v9.B(), 4, MemOperand(x17));
3382  __ Mov(x5, x17);
3383  __ Ldr(q10, MemOperand(x5, 16, PostIndex));
3384  __ Ldr(q11, MemOperand(x5));
3385  __ Ld2(v10.H(), v11.H(), 3, MemOperand(x17));
3386  __ Mov(x6, x17);
3387  __ Ldr(q12, MemOperand(x6, 16, PostIndex));
3388  __ Ldr(q13, MemOperand(x6));
3389  __ Ld2(v12.S(), v13.S(), 2, MemOperand(x17));
3390  __ Mov(x7, x17);
3391  __ Ldr(q14, MemOperand(x7, 16, PostIndex));
3392  __ Ldr(q15, MemOperand(x7));
3393  __ Ld2(v14.D(), v15.D(), 1, MemOperand(x17));
3394
3395  END();
3396
3397  RUN();
3398
3399  ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0);
3400  ASSERT_EQUAL_128(0x0102030405060708, 0x090a0b0c0d0e0f10, q1);
3401  ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q2);
3402  ASSERT_EQUAL_128(0x0302040305040605, 0x0706080709080a09, q3);
3403  ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q4);
3404  ASSERT_EQUAL_128(0x0706050408070605, 0x090807060a090807, q5);
3405  ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q6);
3406  ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x100f0e0d0c0b0a09, q7);
3407  ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q8);
3408  ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q9);
3409  ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q10);
3410  ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q11);
3411  ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q12);
3412  ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q13);
3413  ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q14);
3414  ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q15);
3415
3416  TEARDOWN();
3417}
3418
3419
3420TEST(neon_ld2_lane_postindex) {
3421  SETUP();
3422
3423  uint8_t src[64];
3424  for (unsigned i = 0; i < sizeof(src); i++) {
3425    src[i] = i;
3426  }
3427  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3428
3429  START();
3430  __ Mov(x17, src_base);
3431  __ Mov(x18, src_base);
3432  __ Mov(x19, src_base);
3433  __ Mov(x20, src_base);
3434  __ Mov(x21, src_base);
3435  __ Mov(x22, src_base);
3436  __ Mov(x23, src_base);
3437  __ Mov(x24, src_base);
3438
3439  // Test loading whole register by element.
3440  for (int i = 15; i >= 0; i--) {
3441    __ Ld2(v0.B(), v1.B(), i, MemOperand(x17, 2, PostIndex));
3442  }
3443
3444  for (int i = 7; i >= 0; i--) {
3445    __ Ld2(v2.H(), v3.H(), i, MemOperand(x18, 4, PostIndex));
3446  }
3447
3448  for (int i = 3; i >= 0; i--) {
3449    __ Ld2(v4.S(), v5.S(), i, MemOperand(x19, 8, PostIndex));
3450  }
3451
3452  for (int i = 1; i >= 0; i--) {
3453    __ Ld2(v6.D(), v7.D(), i, MemOperand(x20, 16, PostIndex));
3454  }
3455
3456  // Test loading a single element into an initialised register.
3457  __ Mov(x25, 1);
3458  __ Mov(x4, x21);
3459  __ Ldr(q8, MemOperand(x4, 16, PostIndex));
3460  __ Ldr(q9, MemOperand(x4));
3461  __ Ld2(v8.B(), v9.B(), 4, MemOperand(x21, x25, PostIndex));
3462  __ Add(x25, x25, 1);
3463
3464  __ Mov(x5, x22);
3465  __ Ldr(q10, MemOperand(x5, 16, PostIndex));
3466  __ Ldr(q11, MemOperand(x5));
3467  __ Ld2(v10.H(), v11.H(), 3, MemOperand(x22, x25, PostIndex));
3468  __ Add(x25, x25, 1);
3469
3470  __ Mov(x6, x23);
3471  __ Ldr(q12, MemOperand(x6, 16, PostIndex));
3472  __ Ldr(q13, MemOperand(x6));
3473  __ Ld2(v12.S(), v13.S(), 2, MemOperand(x23, x25, PostIndex));
3474  __ Add(x25, x25, 1);
3475
3476  __ Mov(x7, x24);
3477  __ Ldr(q14, MemOperand(x7, 16, PostIndex));
3478  __ Ldr(q15, MemOperand(x7));
3479  __ Ld2(v14.D(), v15.D(), 1, MemOperand(x24, x25, PostIndex));
3480
3481  END();
3482
3483  RUN();
3484
3485  ASSERT_EQUAL_128(0x00020406080a0c0e, 0x10121416181a1c1e, q0);
3486  ASSERT_EQUAL_128(0x01030507090b0d0f, 0x11131517191b1d1f, q1);
3487  ASSERT_EQUAL_128(0x0100050409080d0c, 0x1110151419181d1c, q2);
3488  ASSERT_EQUAL_128(0x030207060b0a0f0e, 0x131217161b1a1f1e, q3);
3489  ASSERT_EQUAL_128(0x030201000b0a0908, 0x131211101b1a1918, q4);
3490  ASSERT_EQUAL_128(0x070605040f0e0d0c, 0x171615141f1e1d1c, q5);
3491  ASSERT_EQUAL_128(0x0706050403020100, 0x1716151413121110, q6);
3492  ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1f1e1d1c1b1a1918, q7);
3493  ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q8);
3494  ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q9);
3495  ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q10);
3496  ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q11);
3497  ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q12);
3498  ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q13);
3499  ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q14);
3500  ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q15);
3501
3502
3503
3504
3505  ASSERT_EQUAL_64(src_base + 32, x17);
3506  ASSERT_EQUAL_64(src_base + 32, x18);
3507  ASSERT_EQUAL_64(src_base + 32, x19);
3508  ASSERT_EQUAL_64(src_base + 32, x20);
3509  ASSERT_EQUAL_64(src_base + 1, x21);
3510  ASSERT_EQUAL_64(src_base + 2, x22);
3511  ASSERT_EQUAL_64(src_base + 3, x23);
3512  ASSERT_EQUAL_64(src_base + 4, x24);
3513
3514  TEARDOWN();
3515}
3516
3517
3518TEST(neon_ld2_alllanes) {
3519  SETUP();
3520
3521  uint8_t src[64];
3522  for (unsigned i = 0; i < sizeof(src); i++) {
3523    src[i] = i;
3524  }
3525  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3526
3527  START();
3528  __ Mov(x17, src_base + 1);
3529  __ Mov(x18, 1);
3530  __ Ld2r(v0.V8B(), v1.V8B(), MemOperand(x17));
3531  __ Add(x17, x17, 2);
3532  __ Ld2r(v2.V16B(), v3.V16B(), MemOperand(x17));
3533  __ Add(x17, x17, 1);
3534  __ Ld2r(v4.V4H(), v5.V4H(), MemOperand(x17));
3535  __ Add(x17, x17, 1);
3536  __ Ld2r(v6.V8H(), v7.V8H(), MemOperand(x17));
3537  __ Add(x17, x17, 4);
3538  __ Ld2r(v8.V2S(), v9.V2S(), MemOperand(x17));
3539  __ Add(x17, x17, 1);
3540  __ Ld2r(v10.V4S(), v11.V4S(), MemOperand(x17));
3541  __ Add(x17, x17, 8);
3542  __ Ld2r(v12.V2D(), v13.V2D(), MemOperand(x17));
3543  END();
3544
3545  RUN();
3546
3547  ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
3548  ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
3549  ASSERT_EQUAL_128(0x0303030303030303, 0x0303030303030303, q2);
3550  ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3);
3551  ASSERT_EQUAL_128(0x0000000000000000, 0x0504050405040504, q4);
3552  ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q5);
3553  ASSERT_EQUAL_128(0x0605060506050605, 0x0605060506050605, q6);
3554  ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q7);
3555  ASSERT_EQUAL_128(0x0000000000000000, 0x0c0b0a090c0b0a09, q8);
3556  ASSERT_EQUAL_128(0x0000000000000000, 0x100f0e0d100f0e0d, q9);
3557  ASSERT_EQUAL_128(0x0d0c0b0a0d0c0b0a, 0x0d0c0b0a0d0c0b0a, q10);
3558  ASSERT_EQUAL_128(0x11100f0e11100f0e, 0x11100f0e11100f0e, q11);
3559  ASSERT_EQUAL_128(0x1918171615141312, 0x1918171615141312, q12);
3560  ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x21201f1e1d1c1b1a, q13);
3561
3562  TEARDOWN();
3563}
3564
3565
3566TEST(neon_ld2_alllanes_postindex) {
3567  SETUP();
3568
3569  uint8_t src[64];
3570  for (unsigned i = 0; i < sizeof(src); i++) {
3571    src[i] = i;
3572  }
3573  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3574
3575  START();
3576  __ Mov(x17, src_base + 1);
3577  __ Mov(x18, 1);
3578  __ Ld2r(v0.V8B(), v1.V8B(), MemOperand(x17, 2, PostIndex));
3579  __ Ld2r(v2.V16B(), v3.V16B(), MemOperand(x17, x18, PostIndex));
3580  __ Ld2r(v4.V4H(), v5.V4H(), MemOperand(x17, x18, PostIndex));
3581  __ Ld2r(v6.V8H(), v7.V8H(), MemOperand(x17, 4, PostIndex));
3582  __ Ld2r(v8.V2S(), v9.V2S(), MemOperand(x17, x18, PostIndex));
3583  __ Ld2r(v10.V4S(), v11.V4S(), MemOperand(x17, 8, PostIndex));
3584  __ Ld2r(v12.V2D(), v13.V2D(), MemOperand(x17, 16, PostIndex));
3585  END();
3586
3587  RUN();
3588
3589  ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
3590  ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
3591  ASSERT_EQUAL_128(0x0303030303030303, 0x0303030303030303, q2);
3592  ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3);
3593  ASSERT_EQUAL_128(0x0000000000000000, 0x0504050405040504, q4);
3594  ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q5);
3595  ASSERT_EQUAL_128(0x0605060506050605, 0x0605060506050605, q6);
3596  ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q7);
3597  ASSERT_EQUAL_128(0x0000000000000000, 0x0c0b0a090c0b0a09, q8);
3598  ASSERT_EQUAL_128(0x0000000000000000, 0x100f0e0d100f0e0d, q9);
3599  ASSERT_EQUAL_128(0x0d0c0b0a0d0c0b0a, 0x0d0c0b0a0d0c0b0a, q10);
3600  ASSERT_EQUAL_128(0x11100f0e11100f0e, 0x11100f0e11100f0e, q11);
3601  ASSERT_EQUAL_128(0x1918171615141312, 0x1918171615141312, q12);
3602  ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x21201f1e1d1c1b1a, q13);
3603  ASSERT_EQUAL_64(src_base + 34, x17);
3604
3605  TEARDOWN();
3606}
3607
3608
3609TEST(neon_ld3_d) {
3610  SETUP();
3611
3612  uint8_t src[64 + 4];
3613  for (unsigned i = 0; i < sizeof(src); i++) {
3614    src[i] = i;
3615  }
3616  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3617
3618  START();
3619  __ Mov(x17, src_base);
3620  __ Ld3(v2.V8B(), v3.V8B(), v4.V8B(), MemOperand(x17));
3621  __ Add(x17, x17, 1);
3622  __ Ld3(v5.V8B(), v6.V8B(), v7.V8B(), MemOperand(x17));
3623  __ Add(x17, x17, 1);
3624  __ Ld3(v8.V4H(), v9.V4H(), v10.V4H(), MemOperand(x17));
3625  __ Add(x17, x17, 1);
3626  __ Ld3(v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x17));
3627  END();
3628
3629  RUN();
3630
3631  ASSERT_EQUAL_128(0, 0x15120f0c09060300, q2);
3632  ASSERT_EQUAL_128(0, 0x1613100d0a070401, q3);
3633  ASSERT_EQUAL_128(0, 0x1714110e0b080502, q4);
3634  ASSERT_EQUAL_128(0, 0x1613100d0a070401, q5);
3635  ASSERT_EQUAL_128(0, 0x1714110e0b080502, q6);
3636  ASSERT_EQUAL_128(0, 0x1815120f0c090603, q7);
3637  ASSERT_EQUAL_128(0, 0x15140f0e09080302, q8);
3638  ASSERT_EQUAL_128(0, 0x171611100b0a0504, q9);
3639  ASSERT_EQUAL_128(0, 0x191813120d0c0706, q10);
3640  ASSERT_EQUAL_128(0, 0x1211100f06050403, q31);
3641  ASSERT_EQUAL_128(0, 0x161514130a090807, q0);
3642  ASSERT_EQUAL_128(0, 0x1a1918170e0d0c0b, q1);
3643
3644  TEARDOWN();
3645}
3646
3647
3648TEST(neon_ld3_d_postindex) {
3649  SETUP();
3650
3651  uint8_t src[32 + 4];
3652  for (unsigned i = 0; i < sizeof(src); i++) {
3653    src[i] = i;
3654  }
3655  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3656
3657  START();
3658  __ Mov(x17, src_base);
3659  __ Mov(x18, src_base + 1);
3660  __ Mov(x19, src_base + 2);
3661  __ Mov(x20, src_base + 3);
3662  __ Mov(x21, src_base + 4);
3663  __ Mov(x22, 1);
3664  __ Ld3(v2.V8B(), v3.V8B(), v4.V8B(), MemOperand(x17, x22, PostIndex));
3665  __ Ld3(v5.V8B(), v6.V8B(), v7.V8B(), MemOperand(x18, 24, PostIndex));
3666  __ Ld3(v8.V4H(), v9.V4H(), v10.V4H(), MemOperand(x19, 24, PostIndex));
3667  __ Ld3(v11.V2S(), v12.V2S(), v13.V2S(), MemOperand(x20, 24, PostIndex));
3668  __ Ld3(v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x21, 24, PostIndex));
3669  END();
3670
3671  RUN();
3672
3673  ASSERT_EQUAL_128(0, 0x15120f0c09060300, q2);
3674  ASSERT_EQUAL_128(0, 0x1613100d0a070401, q3);
3675  ASSERT_EQUAL_128(0, 0x1714110e0b080502, q4);
3676  ASSERT_EQUAL_128(0, 0x1613100d0a070401, q5);
3677  ASSERT_EQUAL_128(0, 0x1714110e0b080502, q6);
3678  ASSERT_EQUAL_128(0, 0x1815120f0c090603, q7);
3679  ASSERT_EQUAL_128(0, 0x15140f0e09080302, q8);
3680  ASSERT_EQUAL_128(0, 0x171611100b0a0504, q9);
3681  ASSERT_EQUAL_128(0, 0x191813120d0c0706, q10);
3682  ASSERT_EQUAL_128(0, 0x1211100f06050403, q11);
3683  ASSERT_EQUAL_128(0, 0x161514130a090807, q12);
3684  ASSERT_EQUAL_128(0, 0x1a1918170e0d0c0b, q13);
3685  ASSERT_EQUAL_128(0, 0x1312111007060504, q31);
3686  ASSERT_EQUAL_128(0, 0x171615140b0a0908, q0);
3687  ASSERT_EQUAL_128(0, 0x1b1a19180f0e0d0c, q1);
3688
3689  ASSERT_EQUAL_64(src_base + 1, x17);
3690  ASSERT_EQUAL_64(src_base + 1 + 24, x18);
3691  ASSERT_EQUAL_64(src_base + 2 + 24, x19);
3692  ASSERT_EQUAL_64(src_base + 3 + 24, x20);
3693  ASSERT_EQUAL_64(src_base + 4 + 24, x21);
3694
3695  TEARDOWN();
3696}
3697
3698
3699TEST(neon_ld3_q) {
3700  SETUP();
3701
3702  uint8_t src[64 + 4];
3703  for (unsigned i = 0; i < sizeof(src); i++) {
3704    src[i] = i;
3705  }
3706  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3707
3708  START();
3709  __ Mov(x17, src_base);
3710  __ Ld3(v2.V16B(), v3.V16B(), v4.V16B(), MemOperand(x17));
3711  __ Add(x17, x17, 1);
3712  __ Ld3(v5.V16B(), v6.V16B(), v7.V16B(), MemOperand(x17));
3713  __ Add(x17, x17, 1);
3714  __ Ld3(v8.V8H(), v9.V8H(), v10.V8H(), MemOperand(x17));
3715  __ Add(x17, x17, 1);
3716  __ Ld3(v11.V4S(), v12.V4S(), v13.V4S(), MemOperand(x17));
3717  __ Add(x17, x17, 1);
3718  __ Ld3(v31.V2D(), v0.V2D(), v1.V2D(), MemOperand(x17));
3719  END();
3720
3721  RUN();
3722
3723  ASSERT_EQUAL_128(0x2d2a2724211e1b18, 0x15120f0c09060300, q2);
3724  ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q3);
3725  ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q4);
3726  ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q5);
3727  ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q6);
3728  ASSERT_EQUAL_128(0x302d2a2724211e1b, 0x1815120f0c090603, q7);
3729  ASSERT_EQUAL_128(0x2d2c272621201b1a, 0x15140f0e09080302, q8);
3730  ASSERT_EQUAL_128(0x2f2e292823221d1c, 0x171611100b0a0504, q9);
3731  ASSERT_EQUAL_128(0x31302b2a25241f1e, 0x191813120d0c0706, q10);
3732  ASSERT_EQUAL_128(0x2a2928271e1d1c1b, 0x1211100f06050403, q11);
3733  ASSERT_EQUAL_128(0x2e2d2c2b2221201f, 0x161514130a090807, q12);
3734  ASSERT_EQUAL_128(0x3231302f26252423, 0x1a1918170e0d0c0b, q13);
3735  ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x0b0a090807060504, q31);
3736  ASSERT_EQUAL_128(0x2b2a292827262524, 0x131211100f0e0d0c, q0);
3737  ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x1b1a191817161514, q1);
3738
3739  TEARDOWN();
3740}
3741
3742
3743TEST(neon_ld3_q_postindex) {
3744  SETUP();
3745
3746  uint8_t src[64 + 4];
3747  for (unsigned i = 0; i < sizeof(src); i++) {
3748    src[i] = i;
3749  }
3750  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3751
3752  START();
3753  __ Mov(x17, src_base);
3754  __ Mov(x18, src_base + 1);
3755  __ Mov(x19, src_base + 2);
3756  __ Mov(x20, src_base + 3);
3757  __ Mov(x21, src_base + 4);
3758  __ Mov(x22, 1);
3759
3760  __ Ld3(v2.V16B(), v3.V16B(), v4.V16B(), MemOperand(x17, x22, PostIndex));
3761  __ Ld3(v5.V16B(), v6.V16B(), v7.V16B(), MemOperand(x18, 48, PostIndex));
3762  __ Ld3(v8.V8H(), v9.V8H(), v10.V8H(), MemOperand(x19, 48, PostIndex));
3763  __ Ld3(v11.V4S(), v12.V4S(), v13.V4S(), MemOperand(x20, 48, PostIndex));
3764  __ Ld3(v31.V2D(), v0.V2D(), v1.V2D(), MemOperand(x21, 48, PostIndex));
3765  END();
3766
3767  RUN();
3768
3769  ASSERT_EQUAL_128(0x2d2a2724211e1b18, 0x15120f0c09060300, q2);
3770  ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q3);
3771  ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q4);
3772  ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q5);
3773  ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q6);
3774  ASSERT_EQUAL_128(0x302d2a2724211e1b, 0x1815120f0c090603, q7);
3775  ASSERT_EQUAL_128(0x2d2c272621201b1a, 0x15140f0e09080302, q8);
3776  ASSERT_EQUAL_128(0x2f2e292823221d1c, 0x171611100b0a0504, q9);
3777  ASSERT_EQUAL_128(0x31302b2a25241f1e, 0x191813120d0c0706, q10);
3778  ASSERT_EQUAL_128(0x2a2928271e1d1c1b, 0x1211100f06050403, q11);
3779  ASSERT_EQUAL_128(0x2e2d2c2b2221201f, 0x161514130a090807, q12);
3780  ASSERT_EQUAL_128(0x3231302f26252423, 0x1a1918170e0d0c0b, q13);
3781  ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x0b0a090807060504, q31);
3782  ASSERT_EQUAL_128(0x2b2a292827262524, 0x131211100f0e0d0c, q0);
3783  ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x1b1a191817161514, q1);
3784
3785  ASSERT_EQUAL_64(src_base + 1, x17);
3786  ASSERT_EQUAL_64(src_base + 1 + 48, x18);
3787  ASSERT_EQUAL_64(src_base + 2 + 48, x19);
3788  ASSERT_EQUAL_64(src_base + 3 + 48, x20);
3789  ASSERT_EQUAL_64(src_base + 4 + 48, x21);
3790
3791  TEARDOWN();
3792}
3793
3794
3795TEST(neon_ld3_lane) {
3796  SETUP();
3797
3798  uint8_t src[64];
3799  for (unsigned i = 0; i < sizeof(src); i++) {
3800    src[i] = i;
3801  }
3802  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3803
3804  START();
3805
3806  // Test loading whole register by element.
3807  __ Mov(x17, src_base);
3808  for (int i = 15; i >= 0; i--) {
3809    __ Ld3(v0.B(), v1.B(), v2.B(), i, MemOperand(x17));
3810    __ Add(x17, x17, 1);
3811  }
3812
3813  __ Mov(x17, src_base);
3814  for (int i = 7; i >= 0; i--) {
3815    __ Ld3(v3.H(), v4.H(), v5.H(), i, MemOperand(x17));
3816    __ Add(x17, x17, 1);
3817  }
3818
3819  __ Mov(x17, src_base);
3820  for (int i = 3; i >= 0; i--) {
3821    __ Ld3(v6.S(), v7.S(), v8.S(), i, MemOperand(x17));
3822    __ Add(x17, x17, 1);
3823  }
3824
3825  __ Mov(x17, src_base);
3826  for (int i = 1; i >= 0; i--) {
3827    __ Ld3(v9.D(), v10.D(), v11.D(), i, MemOperand(x17));
3828    __ Add(x17, x17, 1);
3829  }
3830
3831  // Test loading a single element into an initialised register.
3832  __ Mov(x17, src_base);
3833  __ Mov(x4, x17);
3834  __ Ldr(q12, MemOperand(x4, 16, PostIndex));
3835  __ Ldr(q13, MemOperand(x4, 16, PostIndex));
3836  __ Ldr(q14, MemOperand(x4));
3837  __ Ld3(v12.B(), v13.B(), v14.B(), 4, MemOperand(x17));
3838  __ Mov(x5, x17);
3839  __ Ldr(q15, MemOperand(x5, 16, PostIndex));
3840  __ Ldr(q16, MemOperand(x5, 16, PostIndex));
3841  __ Ldr(q17, MemOperand(x5));
3842  __ Ld3(v15.H(), v16.H(), v17.H(), 3, MemOperand(x17));
3843  __ Mov(x6, x17);
3844  __ Ldr(q18, MemOperand(x6, 16, PostIndex));
3845  __ Ldr(q19, MemOperand(x6, 16, PostIndex));
3846  __ Ldr(q20, MemOperand(x6));
3847  __ Ld3(v18.S(), v19.S(), v20.S(), 2, MemOperand(x17));
3848  __ Mov(x7, x17);
3849  __ Ldr(q21, MemOperand(x7, 16, PostIndex));
3850  __ Ldr(q22, MemOperand(x7, 16, PostIndex));
3851  __ Ldr(q23, MemOperand(x7));
3852  __ Ld3(v21.D(), v22.D(), v23.D(), 1, MemOperand(x17));
3853
3854  END();
3855
3856  RUN();
3857
3858  ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0);
3859  ASSERT_EQUAL_128(0x0102030405060708, 0x090a0b0c0d0e0f10, q1);
3860  ASSERT_EQUAL_128(0x0203040506070809, 0x0a0b0c0d0e0f1011, q2);
3861  ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q3);
3862  ASSERT_EQUAL_128(0x0302040305040605, 0x0706080709080a09, q4);
3863  ASSERT_EQUAL_128(0x0504060507060807, 0x09080a090b0a0c0b, q5);
3864  ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q6);
3865  ASSERT_EQUAL_128(0x0706050408070605, 0x090807060a090807, q7);
3866  ASSERT_EQUAL_128(0x0b0a09080c0b0a09, 0x0d0c0b0a0e0d0c0b, q8);
3867  ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q9);
3868  ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x100f0e0d0c0b0a09, q10);
3869  ASSERT_EQUAL_128(0x1716151413121110, 0x1817161514131211, q11);
3870  ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q12);
3871  ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q13);
3872  ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q14);
3873  ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q15);
3874  ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q16);
3875  ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q17);
3876
3877  TEARDOWN();
3878}
3879
3880
3881TEST(neon_ld3_lane_postindex) {
3882  SETUP();
3883
3884  uint8_t src[64];
3885  for (unsigned i = 0; i < sizeof(src); i++) {
3886    src[i] = i;
3887  }
3888  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3889
3890  START();
3891
3892  // Test loading whole register by element.
3893  __ Mov(x17, src_base);
3894  __ Mov(x18, src_base);
3895  __ Mov(x19, src_base);
3896  __ Mov(x20, src_base);
3897  __ Mov(x21, src_base);
3898  __ Mov(x22, src_base);
3899  __ Mov(x23, src_base);
3900  __ Mov(x24, src_base);
3901  for (int i = 15; i >= 0; i--) {
3902    __ Ld3(v0.B(), v1.B(), v2.B(), i, MemOperand(x17, 3, PostIndex));
3903  }
3904
3905  for (int i = 7; i >= 0; i--) {
3906    __ Ld3(v3.H(), v4.H(), v5.H(), i, MemOperand(x18, 6, PostIndex));
3907  }
3908
3909  for (int i = 3; i >= 0; i--) {
3910    __ Ld3(v6.S(), v7.S(), v8.S(), i, MemOperand(x19, 12, PostIndex));
3911  }
3912
3913  for (int i = 1; i >= 0; i--) {
3914    __ Ld3(v9.D(), v10.D(), v11.D(), i, MemOperand(x20, 24, PostIndex));
3915  }
3916
3917
3918  // Test loading a single element into an initialised register.
3919  __ Mov(x25, 1);
3920  __ Mov(x4, x21);
3921  __ Ldr(q12, MemOperand(x4, 16, PostIndex));
3922  __ Ldr(q13, MemOperand(x4, 16, PostIndex));
3923  __ Ldr(q14, MemOperand(x4));
3924  __ Ld3(v12.B(), v13.B(), v14.B(), 4, MemOperand(x21, x25, PostIndex));
3925  __ Add(x25, x25, 1);
3926
3927  __ Mov(x5, x22);
3928  __ Ldr(q15, MemOperand(x5, 16, PostIndex));
3929  __ Ldr(q16, MemOperand(x5, 16, PostIndex));
3930  __ Ldr(q17, MemOperand(x5));
3931  __ Ld3(v15.H(), v16.H(), v17.H(), 3, MemOperand(x22, x25, PostIndex));
3932  __ Add(x25, x25, 1);
3933
3934  __ Mov(x6, x23);
3935  __ Ldr(q18, MemOperand(x6, 16, PostIndex));
3936  __ Ldr(q19, MemOperand(x6, 16, PostIndex));
3937  __ Ldr(q20, MemOperand(x6));
3938  __ Ld3(v18.S(), v19.S(), v20.S(), 2, MemOperand(x23, x25, PostIndex));
3939  __ Add(x25, x25, 1);
3940
3941  __ Mov(x7, x24);
3942  __ Ldr(q21, MemOperand(x7, 16, PostIndex));
3943  __ Ldr(q22, MemOperand(x7, 16, PostIndex));
3944  __ Ldr(q23, MemOperand(x7));
3945  __ Ld3(v21.D(), v22.D(), v23.D(), 1, MemOperand(x24, x25, PostIndex));
3946
3947  END();
3948
3949  RUN();
3950
3951  ASSERT_EQUAL_128(0x000306090c0f1215, 0x181b1e2124272a2d, q0);
3952  ASSERT_EQUAL_128(0x0104070a0d101316, 0x191c1f2225282b2e, q1);
3953  ASSERT_EQUAL_128(0x0205080b0e111417, 0x1a1d202326292c2f, q2);
3954  ASSERT_EQUAL_128(0x010007060d0c1312, 0x19181f1e25242b2a, q3);
3955  ASSERT_EQUAL_128(0x030209080f0e1514, 0x1b1a212027262d2c, q4);
3956  ASSERT_EQUAL_128(0x05040b0a11101716, 0x1d1c232229282f2e, q5);
3957  ASSERT_EQUAL_128(0x030201000f0e0d0c, 0x1b1a191827262524, q6);
3958  ASSERT_EQUAL_128(0x0706050413121110, 0x1f1e1d1c2b2a2928, q7);
3959  ASSERT_EQUAL_128(0x0b0a090817161514, 0x232221202f2e2d2c, q8);
3960  ASSERT_EQUAL_128(0x0706050403020100, 0x1f1e1d1c1b1a1918, q9);
3961  ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x2726252423222120, q10);
3962  ASSERT_EQUAL_128(0x1716151413121110, 0x2f2e2d2c2b2a2928, q11);
3963  ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q12);
3964  ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q13);
3965  ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q14);
3966  ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q15);
3967  ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q16);
3968  ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q17);
3969  ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q18);
3970  ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q19);
3971  ASSERT_EQUAL_128(0x2f2e2d2c0b0a0908, 0x2726252423222120, q20);
3972  ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q21);
3973  ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q22);
3974  ASSERT_EQUAL_128(0x1716151413121110, 0x2726252423222120, q23);
3975
3976  ASSERT_EQUAL_64(src_base + 48, x17);
3977  ASSERT_EQUAL_64(src_base + 48, x18);
3978  ASSERT_EQUAL_64(src_base + 48, x19);
3979  ASSERT_EQUAL_64(src_base + 48, x20);
3980  ASSERT_EQUAL_64(src_base + 1, x21);
3981  ASSERT_EQUAL_64(src_base + 2, x22);
3982  ASSERT_EQUAL_64(src_base + 3, x23);
3983  ASSERT_EQUAL_64(src_base + 4, x24);
3984
3985  TEARDOWN();
3986}
3987
3988
3989TEST(neon_ld3_alllanes) {
3990  SETUP();
3991
3992  uint8_t src[64];
3993  for (unsigned i = 0; i < sizeof(src); i++) {
3994    src[i] = i;
3995  }
3996  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3997
3998  START();
3999  __ Mov(x17, src_base + 1);
4000  __ Mov(x18, 1);
4001  __ Ld3r(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x17));
4002  __ Add(x17, x17, 3);
4003  __ Ld3r(v3.V16B(), v4.V16B(), v5.V16B(), MemOperand(x17));
4004  __ Add(x17, x17, 1);
4005  __ Ld3r(v6.V4H(), v7.V4H(), v8.V4H(), MemOperand(x17));
4006  __ Add(x17, x17, 1);
4007  __ Ld3r(v9.V8H(), v10.V8H(), v11.V8H(), MemOperand(x17));
4008  __ Add(x17, x17, 6);
4009  __ Ld3r(v12.V2S(), v13.V2S(), v14.V2S(), MemOperand(x17));
4010  __ Add(x17, x17, 1);
4011  __ Ld3r(v15.V4S(), v16.V4S(), v17.V4S(), MemOperand(x17));
4012  __ Add(x17, x17, 12);
4013  __ Ld3r(v18.V2D(), v19.V2D(), v20.V2D(), MemOperand(x17));
4014  END();
4015
4016  RUN();
4017
4018  ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
4019  ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
4020  ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2);
4021  ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3);
4022  ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4);
4023  ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5);
4024  ASSERT_EQUAL_128(0x0000000000000000, 0x0605060506050605, q6);
4025  ASSERT_EQUAL_128(0x0000000000000000, 0x0807080708070807, q7);
4026  ASSERT_EQUAL_128(0x0000000000000000, 0x0a090a090a090a09, q8);
4027  ASSERT_EQUAL_128(0x0706070607060706, 0x0706070607060706, q9);
4028  ASSERT_EQUAL_128(0x0908090809080908, 0x0908090809080908, q10);
4029  ASSERT_EQUAL_128(0x0b0a0b0a0b0a0b0a, 0x0b0a0b0a0b0a0b0a, q11);
4030  ASSERT_EQUAL_128(0x0000000000000000, 0x0f0e0d0c0f0e0d0c, q12);
4031  ASSERT_EQUAL_128(0x0000000000000000, 0x1312111013121110, q13);
4032  ASSERT_EQUAL_128(0x0000000000000000, 0x1716151417161514, q14);
4033  ASSERT_EQUAL_128(0x100f0e0d100f0e0d, 0x100f0e0d100f0e0d, q15);
4034  ASSERT_EQUAL_128(0x1413121114131211, 0x1413121114131211, q16);
4035  ASSERT_EQUAL_128(0x1817161518171615, 0x1817161518171615, q17);
4036  ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x201f1e1d1c1b1a19, q18);
4037  ASSERT_EQUAL_128(0x2827262524232221, 0x2827262524232221, q19);
4038  ASSERT_EQUAL_128(0x302f2e2d2c2b2a29, 0x302f2e2d2c2b2a29, q20);
4039
4040  TEARDOWN();
4041}
4042
4043
4044TEST(neon_ld3_alllanes_postindex) {
4045  SETUP();
4046
4047  uint8_t src[64];
4048  for (unsigned i = 0; i < sizeof(src); i++) {
4049    src[i] = i;
4050  }
4051  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
4052  __ Mov(x17, src_base + 1);
4053  __ Mov(x18, 1);
4054
4055  START();
4056  __ Mov(x17, src_base + 1);
4057  __ Mov(x18, 1);
4058  __ Ld3r(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x17, 3, PostIndex));
4059  __ Ld3r(v3.V16B(), v4.V16B(), v5.V16B(), MemOperand(x17, x18, PostIndex));
4060  __ Ld3r(v6.V4H(), v7.V4H(), v8.V4H(), MemOperand(x17, x18, PostIndex));
4061  __ Ld3r(v9.V8H(), v10.V8H(), v11.V8H(), MemOperand(x17, 6, PostIndex));
4062  __ Ld3r(v12.V2S(), v13.V2S(), v14.V2S(), MemOperand(x17, x18, PostIndex));
4063  __ Ld3r(v15.V4S(), v16.V4S(), v17.V4S(), MemOperand(x17, 12, PostIndex));
4064  __ Ld3r(v18.V2D(), v19.V2D(), v20.V2D(), MemOperand(x17, 24, PostIndex));
4065  END();
4066
4067  RUN();
4068
4069  ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
4070  ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
4071  ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2);
4072  ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3);
4073  ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4);
4074  ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5);
4075  ASSERT_EQUAL_128(0x0000000000000000, 0x0605060506050605, q6);
4076  ASSERT_EQUAL_128(0x0000000000000000, 0x0807080708070807, q7);
4077  ASSERT_EQUAL_128(0x0000000000000000, 0x0a090a090a090a09, q8);
4078  ASSERT_EQUAL_128(0x0706070607060706, 0x0706070607060706, q9);
4079  ASSERT_EQUAL_128(0x0908090809080908, 0x0908090809080908, q10);
4080  ASSERT_EQUAL_128(0x0b0a0b0a0b0a0b0a, 0x0b0a0b0a0b0a0b0a, q11);
4081  ASSERT_EQUAL_128(0x0000000000000000, 0x0f0e0d0c0f0e0d0c, q12);
4082  ASSERT_EQUAL_128(0x0000000000000000, 0x1312111013121110, q13);
4083  ASSERT_EQUAL_128(0x0000000000000000, 0x1716151417161514, q14);
4084  ASSERT_EQUAL_128(0x100f0e0d100f0e0d, 0x100f0e0d100f0e0d, q15);
4085  ASSERT_EQUAL_128(0x1413121114131211, 0x1413121114131211, q16);
4086  ASSERT_EQUAL_128(0x1817161518171615, 0x1817161518171615, q17);
4087  ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x201f1e1d1c1b1a19, q18);
4088  ASSERT_EQUAL_128(0x2827262524232221, 0x2827262524232221, q19);
4089  ASSERT_EQUAL_128(0x302f2e2d2c2b2a29, 0x302f2e2d2c2b2a29, q20);
4090
4091  TEARDOWN();
4092}
4093
4094
4095TEST(neon_ld4_d) {
4096  SETUP();
4097
4098  uint8_t src[64 + 4];
4099  for (unsigned i = 0; i < sizeof(src); i++) {
4100    src[i] = i;
4101  }
4102  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
4103
4104  START();
4105  __ Mov(x17, src_base);
4106  __ Ld4(v2.V8B(), v3.V8B(), v4.V8B(), v5.V8B(), MemOperand(x17));
4107  __ Add(x17, x17, 1);
4108  __ Ld4(v6.V8B(), v7.V8B(), v8.V8B(), v9.V8B(), MemOperand(x17));
4109  __ Add(x17, x17, 1);
4110  __ Ld4(v10.V4H(), v11.V4H(), v12.V4H(), v13.V4H(), MemOperand(x17));
4111  __ Add(x17, x17, 1);
4112  __ Ld4(v30.V2S(), v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x17));
4113  END();
4114
4115  RUN();
4116
4117  ASSERT_EQUAL_128(0, 0x1c1814100c080400, q2);
4118  ASSERT_EQUAL_128(0, 0x1d1915110d090501, q3);
4119  ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q4);
4120  ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q5);
4121  ASSERT_EQUAL_128(0, 0x1d1915110d090501, q6);
4122  ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q7);
4123  ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q8);
4124  ASSERT_EQUAL_128(0, 0x201c1814100c0804, q9);
4125  ASSERT_EQUAL_128(0, 0x1b1a13120b0a0302, q10);
4126  ASSERT_EQUAL_128(0, 0x1d1c15140d0c0504, q11);
4127  ASSERT_EQUAL_128(0, 0x1f1e17160f0e0706, q12);
4128  ASSERT_EQUAL_128(0, 0x2120191811100908, q13);
4129  ASSERT_EQUAL_128(0, 0x1615141306050403, q30);
4130  ASSERT_EQUAL_128(0, 0x1a1918170a090807, q31);
4131  ASSERT_EQUAL_128(0, 0x1e1d1c1b0e0d0c0b, q0);
4132  ASSERT_EQUAL_128(0, 0x2221201f1211100f, q1);
4133
4134  TEARDOWN();
4135}
4136
4137
4138TEST(neon_ld4_d_postindex) {
4139  SETUP();
4140
4141  uint8_t src[32 + 4];
4142  for (unsigned i = 0; i < sizeof(src); i++) {
4143    src[i] = i;
4144  }
4145  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
4146
4147  START();
4148  __ Mov(x17, src_base);
4149  __ Mov(x18, src_base + 1);
4150  __ Mov(x19, src_base + 2);
4151  __ Mov(x20, src_base + 3);
4152  __ Mov(x21, src_base + 4);
4153  __ Mov(x22, 1);
4154  __ Ld4(v2.V8B(), v3.V8B(), v4.V8B(), v5.V8B(),
4155         MemOperand(x17, x22, PostIndex));
4156  __ Ld4(v6.V8B(), v7.V8B(), v8.V8B(), v9.V8B(),
4157         MemOperand(x18, 32, PostIndex));
4158  __ Ld4(v10.V4H(), v11.V4H(), v12.V4H(), v13.V4H(),
4159         MemOperand(x19, 32, PostIndex));
4160  __ Ld4(v14.V2S(), v15.V2S(), v16.V2S(), v17.V2S(),
4161         MemOperand(x20, 32, PostIndex));
4162  __ Ld4(v30.V2S(), v31.V2S(), v0.V2S(), v1.V2S(),
4163         MemOperand(x21, 32, PostIndex));
4164  END();
4165
4166  RUN();
4167
4168  ASSERT_EQUAL_128(0, 0x1c1814100c080400, q2);
4169  ASSERT_EQUAL_128(0, 0x1d1915110d090501, q3);
4170  ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q4);
4171  ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q5);
4172  ASSERT_EQUAL_128(0, 0x1d1915110d090501, q6);
4173  ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q7);
4174  ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q8);
4175  ASSERT_EQUAL_128(0, 0x201c1814100c0804, q9);
4176  ASSERT_EQUAL_128(0, 0x1b1a13120b0a0302, q10);
4177  ASSERT_EQUAL_128(0, 0x1d1c15140d0c0504, q11);
4178  ASSERT_EQUAL_128(0, 0x1f1e17160f0e0706, q12);
4179  ASSERT_EQUAL_128(0, 0x2120191811100908, q13);
4180  ASSERT_EQUAL_128(0, 0x1615141306050403, q14);
4181  ASSERT_EQUAL_128(0, 0x1a1918170a090807, q15);
4182  ASSERT_EQUAL_128(0, 0x1e1d1c1b0e0d0c0b, q16);
4183  ASSERT_EQUAL_128(0, 0x2221201f1211100f, q17);
4184  ASSERT_EQUAL_128(0, 0x1716151407060504, q30);
4185  ASSERT_EQUAL_128(0, 0x1b1a19180b0a0908, q31);
4186  ASSERT_EQUAL_128(0, 0x1f1e1d1c0f0e0d0c, q0);
4187  ASSERT_EQUAL_128(0, 0x2322212013121110, q1);
4188
4189
4190  ASSERT_EQUAL_64(src_base + 1, x17);
4191  ASSERT_EQUAL_64(src_base + 1 + 32, x18);
4192  ASSERT_EQUAL_64(src_base + 2 + 32, x19);
4193  ASSERT_EQUAL_64(src_base + 3 + 32, x20);
4194  ASSERT_EQUAL_64(src_base + 4 + 32, x21);
4195  TEARDOWN();
4196}
4197
4198
4199TEST(neon_ld4_q) {
4200  SETUP();
4201
4202  uint8_t src[64 + 4];
4203  for (unsigned i = 0; i < sizeof(src); i++) {
4204    src[i] = i;
4205  }
4206  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
4207
4208  START();
4209  __ Mov(x17, src_base);
4210  __ Ld4(v2.V16B(), v3.V16B(), v4.V16B(), v5.V16B(), MemOperand(x17));
4211  __ Add(x17, x17, 1);
4212  __ Ld4(v6.V16B(), v7.V16B(), v8.V16B(), v9.V16B(), MemOperand(x17));
4213  __ Add(x17, x17, 1);
4214  __ Ld4(v10.V8H(), v11.V8H(), v12.V8H(), v13.V8H(), MemOperand(x17));
4215  __ Add(x17, x17, 1);
4216  __ Ld4(v14.V4S(), v15.V4S(), v16.V4S(), v17.V4S(), MemOperand(x17));
4217  __ Add(x17, x17, 1);
4218  __ Ld4(v18.V2D(), v19.V2D(), v20.V2D(), v21.V2D(), MemOperand(x17));
4219  END();
4220
4221  RUN();
4222
4223  ASSERT_EQUAL_128(0x3c3834302c282420, 0x1c1814100c080400, q2);
4224  ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q3);
4225  ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q4);
4226  ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q5);
4227  ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q6);
4228  ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q7);
4229  ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q8);
4230  ASSERT_EQUAL_128(0x403c3834302c2824, 0x201c1814100c0804, q9);
4231  ASSERT_EQUAL_128(0x3b3a33322b2a2322, 0x1b1a13120b0a0302, q10);
4232  ASSERT_EQUAL_128(0x3d3c35342d2c2524, 0x1d1c15140d0c0504, q11);
4233  ASSERT_EQUAL_128(0x3f3e37362f2e2726, 0x1f1e17160f0e0706, q12);
4234  ASSERT_EQUAL_128(0x4140393831302928, 0x2120191811100908, q13);
4235  ASSERT_EQUAL_128(0x3635343326252423, 0x1615141306050403, q14);
4236  ASSERT_EQUAL_128(0x3a3938372a292827, 0x1a1918170a090807, q15);
4237  ASSERT_EQUAL_128(0x3e3d3c3b2e2d2c2b, 0x1e1d1c1b0e0d0c0b, q16);
4238  ASSERT_EQUAL_128(0x4241403f3231302f, 0x2221201f1211100f, q17);
4239  ASSERT_EQUAL_128(0x2b2a292827262524, 0x0b0a090807060504, q18);
4240  ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x131211100f0e0d0c, q19);
4241  ASSERT_EQUAL_128(0x3b3a393837363534, 0x1b1a191817161514, q20);
4242  ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x232221201f1e1d1c, q21);
4243  TEARDOWN();
4244}
4245
4246
4247TEST(neon_ld4_q_postindex) {
4248  SETUP();
4249
4250  uint8_t src[64 + 4];
4251  for (unsigned i = 0; i < sizeof(src); i++) {
4252    src[i] = i;
4253  }
4254  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
4255
4256  START();
4257  __ Mov(x17, src_base);
4258  __ Mov(x18, src_base + 1);
4259  __ Mov(x19, src_base + 2);
4260  __ Mov(x20, src_base + 3);
4261  __ Mov(x21, src_base + 4);
4262  __ Mov(x22, 1);
4263
4264  __ Ld4(v2.V16B(), v3.V16B(), v4.V16B(), v5.V16B(),
4265         MemOperand(x17, x22, PostIndex));
4266  __ Ld4(v6.V16B(), v7.V16B(), v8.V16B(), v9.V16B(),
4267         MemOperand(x18, 64, PostIndex));
4268  __ Ld4(v10.V8H(), v11.V8H(), v12.V8H(), v13.V8H(),
4269         MemOperand(x19, 64, PostIndex));
4270  __ Ld4(v14.V4S(), v15.V4S(), v16.V4S(), v17.V4S(),
4271         MemOperand(x20, 64, PostIndex));
4272  __ Ld4(v30.V2D(), v31.V2D(), v0.V2D(), v1.V2D(),
4273         MemOperand(x21, 64, PostIndex));
4274  END();
4275
4276  RUN();
4277
4278  ASSERT_EQUAL_128(0x3c3834302c282420, 0x1c1814100c080400, q2);
4279  ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q3);
4280  ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q4);
4281  ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q5);
4282  ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q6);
4283  ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q7);
4284  ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q8);
4285  ASSERT_EQUAL_128(0x403c3834302c2824, 0x201c1814100c0804, q9);
4286  ASSERT_EQUAL_128(0x3b3a33322b2a2322, 0x1b1a13120b0a0302, q10);
4287  ASSERT_EQUAL_128(0x3d3c35342d2c2524, 0x1d1c15140d0c0504, q11);
4288  ASSERT_EQUAL_128(0x3f3e37362f2e2726, 0x1f1e17160f0e0706, q12);
4289  ASSERT_EQUAL_128(0x4140393831302928, 0x2120191811100908, q13);
4290  ASSERT_EQUAL_128(0x3635343326252423, 0x1615141306050403, q14);
4291  ASSERT_EQUAL_128(0x3a3938372a292827, 0x1a1918170a090807, q15);
4292  ASSERT_EQUAL_128(0x3e3d3c3b2e2d2c2b, 0x1e1d1c1b0e0d0c0b, q16);
4293  ASSERT_EQUAL_128(0x4241403f3231302f, 0x2221201f1211100f, q17);
4294  ASSERT_EQUAL_128(0x2b2a292827262524, 0x0b0a090807060504, q30);
4295  ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x131211100f0e0d0c, q31);
4296  ASSERT_EQUAL_128(0x3b3a393837363534, 0x1b1a191817161514, q0);
4297  ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x232221201f1e1d1c, q1);
4298
4299
4300
4301  ASSERT_EQUAL_64(src_base + 1, x17);
4302  ASSERT_EQUAL_64(src_base + 1 + 64, x18);
4303  ASSERT_EQUAL_64(src_base + 2 + 64, x19);
4304  ASSERT_EQUAL_64(src_base + 3 + 64, x20);
4305  ASSERT_EQUAL_64(src_base + 4 + 64, x21);
4306
4307  TEARDOWN();
4308}
4309
4310
4311TEST(neon_ld4_lane) {
4312  SETUP();
4313
4314  uint8_t src[64];
4315  for (unsigned i = 0; i < sizeof(src); i++) {
4316    src[i] = i;
4317  }
4318  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
4319
4320  START();
4321
4322  // Test loading whole register by element.
4323  __ Mov(x17, src_base);
4324  for (int i = 15; i >= 0; i--) {
4325    __ Ld4(v0.B(), v1.B(), v2.B(), v3.B(), i, MemOperand(x17));
4326    __ Add(x17, x17, 1);
4327  }
4328
4329  __ Mov(x17, src_base);
4330  for (int i = 7; i >= 0; i--) {
4331    __ Ld4(v4.H(), v5.H(), v6.H(), v7.H(), i, MemOperand(x17));
4332    __ Add(x17, x17, 1);
4333  }
4334
4335  __ Mov(x17, src_base);
4336  for (int i = 3; i >= 0; i--) {
4337    __ Ld4(v8.S(), v9.S(), v10.S(), v11.S(), i, MemOperand(x17));
4338    __ Add(x17, x17, 1);
4339  }
4340
4341  __ Mov(x17, src_base);
4342  for (int i = 1; i >= 0; i--) {
4343    __ Ld4(v12.D(), v13.D(), v14.D(), v15.D(), i, MemOperand(x17));
4344    __ Add(x17, x17, 1);
4345  }
4346
4347  // Test loading a single element into an initialised register.
4348  __ Mov(x17, src_base);
4349  __ Mov(x4, x17);
4350  __ Ldr(q16, MemOperand(x4, 16, PostIndex));
4351  __ Ldr(q17, MemOperand(x4, 16, PostIndex));
4352  __ Ldr(q18, MemOperand(x4, 16, PostIndex));
4353  __ Ldr(q19, MemOperand(x4));
4354  __ Ld4(v16.B(), v17.B(), v18.B(), v19.B(), 4, MemOperand(x17));
4355
4356  __ Mov(x5, x17);
4357  __ Ldr(q20, MemOperand(x5, 16, PostIndex));
4358  __ Ldr(q21, MemOperand(x5, 16, PostIndex));
4359  __ Ldr(q22, MemOperand(x5, 16, PostIndex));
4360  __ Ldr(q23, MemOperand(x5));
4361  __ Ld4(v20.H(), v21.H(), v22.H(), v23.H(), 3, MemOperand(x17));
4362
4363  __ Mov(x6, x17);
4364  __ Ldr(q24, MemOperand(x6, 16, PostIndex));
4365  __ Ldr(q25, MemOperand(x6, 16, PostIndex));
4366  __ Ldr(q26, MemOperand(x6, 16, PostIndex));
4367  __ Ldr(q27, MemOperand(x6));
4368  __ Ld4(v24.S(), v25.S(), v26.S(), v27.S(), 2, MemOperand(x17));
4369
4370  __ Mov(x7, x17);
4371  __ Ldr(q28, MemOperand(x7, 16, PostIndex));
4372  __ Ldr(q29, MemOperand(x7, 16, PostIndex));
4373  __ Ldr(q30, MemOperand(x7, 16, PostIndex));
4374  __ Ldr(q31, MemOperand(x7));
4375  __ Ld4(v28.D(), v29.D(), v30.D(), v31.D(), 1, MemOperand(x17));
4376
4377  END();
4378
4379  RUN();
4380
4381  ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0);
4382  ASSERT_EQUAL_128(0x0102030405060708, 0x090a0b0c0d0e0f10, q1);
4383  ASSERT_EQUAL_128(0x0203040506070809, 0x0a0b0c0d0e0f1011, q2);
4384  ASSERT_EQUAL_128(0x030405060708090a, 0x0b0c0d0e0f101112, q3);
4385  ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q4);
4386  ASSERT_EQUAL_128(0x0302040305040605, 0x0706080709080a09, q5);
4387  ASSERT_EQUAL_128(0x0504060507060807, 0x09080a090b0a0c0b, q6);
4388  ASSERT_EQUAL_128(0x0706080709080a09, 0x0b0a0c0b0d0c0e0d, q7);
4389  ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q8);
4390  ASSERT_EQUAL_128(0x0706050408070605, 0x090807060a090807, q9);
4391  ASSERT_EQUAL_128(0x0b0a09080c0b0a09, 0x0d0c0b0a0e0d0c0b, q10);
4392  ASSERT_EQUAL_128(0x0f0e0d0c100f0e0d, 0x11100f0e1211100f, q11);
4393  ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q12);
4394  ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x100f0e0d0c0b0a09, q13);
4395  ASSERT_EQUAL_128(0x1716151413121110, 0x1817161514131211, q14);
4396  ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x201f1e1d1c1b1a19, q15);
4397  ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q16);
4398  ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q17);
4399  ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q18);
4400  ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736350333323130, q19);
4401  ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q20);
4402  ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q21);
4403  ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q22);
4404  ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x0706353433323130, q23);
4405  ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q24);
4406  ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q25);
4407  ASSERT_EQUAL_128(0x2f2e2d2c0b0a0908, 0x2726252423222120, q26);
4408  ASSERT_EQUAL_128(0x3f3e3d3c0f0e0d0c, 0x3736353433323130, q27);
4409  ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q28);
4410  ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q29);
4411  ASSERT_EQUAL_128(0x1716151413121110, 0x2726252423222120, q30);
4412  ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x3736353433323130, q31);
4413
4414  TEARDOWN();
4415}
4416
4417
4418
4419TEST(neon_ld4_lane_postindex) {
4420  SETUP();
4421
4422  uint8_t src[64];
4423  for (unsigned i = 0; i < sizeof(src); i++) {
4424    src[i] = i;
4425  }
4426  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
4427
4428  START();
4429
4430  // Test loading whole register by element.
4431  __ Mov(x17, src_base);
4432  for (int i = 15; i >= 0; i--) {
4433    __ Ld4(v0.B(), v1.B(), v2.B(), v3.B(), i,
4434           MemOperand(x17, 4, PostIndex));
4435  }
4436
4437  __ Mov(x18, src_base);
4438  for (int i = 7; i >= 0; i--) {
4439    __ Ld4(v4.H(), v5.H(), v6.H(), v7.H(), i,
4440           MemOperand(x18, 8, PostIndex));
4441  }
4442
4443  __ Mov(x19, src_base);
4444  for (int i = 3; i >= 0; i--) {
4445    __ Ld4(v8.S(), v9.S(), v10.S(), v11.S(), i,
4446           MemOperand(x19, 16, PostIndex));
4447  }
4448
4449  __ Mov(x20, src_base);
4450  for (int i = 1; i >= 0; i--) {
4451    __ Ld4(v12.D(), v13.D(), v14.D(), v15.D(), i,
4452           MemOperand(x20, 32, PostIndex));
4453  }
4454
4455  // Test loading a single element into an initialised register.
4456  __ Mov(x25, 1);
4457  __ Mov(x21, src_base);
4458  __ Mov(x22, src_base);
4459  __ Mov(x23, src_base);
4460  __ Mov(x24, src_base);
4461
4462  __ Mov(x4, x21);
4463  __ Ldr(q16, MemOperand(x4, 16, PostIndex));
4464  __ Ldr(q17, MemOperand(x4, 16, PostIndex));
4465  __ Ldr(q18, MemOperand(x4, 16, PostIndex));
4466  __ Ldr(q19, MemOperand(x4));
4467  __ Ld4(v16.B(), v17.B(), v18.B(), v19.B(), 4,
4468         MemOperand(x21, x25, PostIndex));
4469  __ Add(x25, x25, 1);
4470
4471  __ Mov(x5, x22);
4472  __ Ldr(q20, MemOperand(x5, 16, PostIndex));
4473  __ Ldr(q21, MemOperand(x5, 16, PostIndex));
4474  __ Ldr(q22, MemOperand(x5, 16, PostIndex));
4475  __ Ldr(q23, MemOperand(x5));
4476  __ Ld4(v20.H(), v21.H(), v22.H(), v23.H(), 3,
4477         MemOperand(x22, x25, PostIndex));
4478  __ Add(x25, x25, 1);
4479
4480  __ Mov(x6, x23);
4481  __ Ldr(q24, MemOperand(x6, 16, PostIndex));
4482  __ Ldr(q25, MemOperand(x6, 16, PostIndex));
4483  __ Ldr(q26, MemOperand(x6, 16, PostIndex));
4484  __ Ldr(q27, MemOperand(x6));
4485  __ Ld4(v24.S(), v25.S(), v26.S(), v27.S(), 2,
4486         MemOperand(x23, x25, PostIndex));
4487  __ Add(x25, x25, 1);
4488
4489  __ Mov(x7, x24);
4490  __ Ldr(q28, MemOperand(x7, 16, PostIndex));
4491  __ Ldr(q29, MemOperand(x7, 16, PostIndex));
4492  __ Ldr(q30, MemOperand(x7, 16, PostIndex));
4493  __ Ldr(q31, MemOperand(x7));
4494  __ Ld4(v28.D(), v29.D(), v30.D(), v31.D(), 1,
4495         MemOperand(x24, x25, PostIndex));
4496
4497  END();
4498
4499  RUN();
4500
4501  ASSERT_EQUAL_128(0x0004080c1014181c, 0x2024282c3034383c, q0);
4502  ASSERT_EQUAL_128(0x0105090d1115191d, 0x2125292d3135393d, q1);
4503  ASSERT_EQUAL_128(0x02060a0e12161a1e, 0x22262a2e32363a3e, q2);
4504  ASSERT_EQUAL_128(0x03070b0f13171b1f, 0x23272b2f33373b3f, q3);
4505  ASSERT_EQUAL_128(0x0100090811101918, 0x2120292831303938, q4);
4506  ASSERT_EQUAL_128(0x03020b0a13121b1a, 0x23222b2a33323b3a, q5);
4507  ASSERT_EQUAL_128(0x05040d0c15141d1c, 0x25242d2c35343d3c, q6);
4508  ASSERT_EQUAL_128(0x07060f0e17161f1e, 0x27262f2e37363f3e, q7);
4509  ASSERT_EQUAL_128(0x0302010013121110, 0x2322212033323130, q8);
4510  ASSERT_EQUAL_128(0x0706050417161514, 0x2726252437363534, q9);
4511  ASSERT_EQUAL_128(0x0b0a09081b1a1918, 0x2b2a29283b3a3938, q10);
4512  ASSERT_EQUAL_128(0x0f0e0d0c1f1e1d1c, 0x2f2e2d2c3f3e3d3c, q11);
4513  ASSERT_EQUAL_128(0x0706050403020100, 0x2726252423222120, q12);
4514  ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x2f2e2d2c2b2a2928, q13);
4515  ASSERT_EQUAL_128(0x1716151413121110, 0x3736353433323130, q14);
4516  ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x3f3e3d3c3b3a3938, q15);
4517  ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q16);
4518  ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q17);
4519  ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q18);
4520  ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736350333323130, q19);
4521  ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q20);
4522  ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q21);
4523  ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q22);
4524  ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x0706353433323130, q23);
4525  ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q24);
4526  ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q25);
4527  ASSERT_EQUAL_128(0x2f2e2d2c0b0a0908, 0x2726252423222120, q26);
4528  ASSERT_EQUAL_128(0x3f3e3d3c0f0e0d0c, 0x3736353433323130, q27);
4529  ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q28);
4530  ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q29);
4531  ASSERT_EQUAL_128(0x1716151413121110, 0x2726252423222120, q30);
4532  ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x3736353433323130, q31);
4533
4534  ASSERT_EQUAL_64(src_base + 64, x17);
4535  ASSERT_EQUAL_64(src_base + 64, x18);
4536  ASSERT_EQUAL_64(src_base + 64, x19);
4537  ASSERT_EQUAL_64(src_base + 64, x20);
4538  ASSERT_EQUAL_64(src_base + 1, x21);
4539  ASSERT_EQUAL_64(src_base + 2, x22);
4540  ASSERT_EQUAL_64(src_base + 3, x23);
4541  ASSERT_EQUAL_64(src_base + 4, x24);
4542
4543  TEARDOWN();
4544}
4545
4546
4547TEST(neon_ld4_alllanes) {
4548  SETUP();
4549
4550  uint8_t src[64];
4551  for (unsigned i = 0; i < sizeof(src); i++) {
4552    src[i] = i;
4553  }
4554  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
4555
4556  START();
4557  __ Mov(x17, src_base + 1);
4558  __ Mov(x18, 1);
4559  __ Ld4r(v0.V8B(), v1.V8B(), v2.V8B(), v3.V8B(), MemOperand(x17));
4560  __ Add(x17, x17, 4);
4561  __ Ld4r(v4.V16B(), v5.V16B(), v6.V16B(), v7.V16B(), MemOperand(x17));
4562  __ Add(x17, x17, 1);
4563  __ Ld4r(v8.V4H(), v9.V4H(), v10.V4H(), v11.V4H(), MemOperand(x17));
4564  __ Add(x17, x17, 1);
4565  __ Ld4r(v12.V8H(), v13.V8H(), v14.V8H(), v15.V8H(), MemOperand(x17));
4566  __ Add(x17, x17, 8);
4567  __ Ld4r(v16.V2S(), v17.V2S(), v18.V2S(), v19.V2S(), MemOperand(x17));
4568  __ Add(x17, x17, 1);
4569  __ Ld4r(v20.V4S(), v21.V4S(), v22.V4S(), v23.V4S(), MemOperand(x17));
4570  __ Add(x17, x17, 16);
4571  __ Ld4r(v24.V2D(), v25.V2D(), v26.V2D(), v27.V2D(), MemOperand(x17));
4572
4573
4574  END();
4575
4576  RUN();
4577
4578  ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
4579  ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
4580  ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2);
4581  ASSERT_EQUAL_128(0x0000000000000000, 0x0404040404040404, q3);
4582  ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4);
4583  ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5);
4584  ASSERT_EQUAL_128(0x0707070707070707, 0x0707070707070707, q6);
4585  ASSERT_EQUAL_128(0x0808080808080808, 0x0808080808080808, q7);
4586  ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q8);
4587  ASSERT_EQUAL_128(0x0000000000000000, 0x0908090809080908, q9);
4588  ASSERT_EQUAL_128(0x0000000000000000, 0x0b0a0b0a0b0a0b0a, q10);
4589  ASSERT_EQUAL_128(0x0000000000000000, 0x0d0c0d0c0d0c0d0c, q11);
4590  ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q12);
4591  ASSERT_EQUAL_128(0x0a090a090a090a09, 0x0a090a090a090a09, q13);
4592  ASSERT_EQUAL_128(0x0c0b0c0b0c0b0c0b, 0x0c0b0c0b0c0b0c0b, q14);
4593  ASSERT_EQUAL_128(0x0e0d0e0d0e0d0e0d, 0x0e0d0e0d0e0d0e0d, q15);
4594  ASSERT_EQUAL_128(0x0000000000000000, 0x1211100f1211100f, q16);
4595  ASSERT_EQUAL_128(0x0000000000000000, 0x1615141316151413, q17);
4596  ASSERT_EQUAL_128(0x0000000000000000, 0x1a1918171a191817, q18);
4597  ASSERT_EQUAL_128(0x0000000000000000, 0x1e1d1c1b1e1d1c1b, q19);
4598  ASSERT_EQUAL_128(0x1312111013121110, 0x1312111013121110, q20);
4599  ASSERT_EQUAL_128(0x1716151417161514, 0x1716151417161514, q21);
4600  ASSERT_EQUAL_128(0x1b1a19181b1a1918, 0x1b1a19181b1a1918, q22);
4601  ASSERT_EQUAL_128(0x1f1e1d1c1f1e1d1c, 0x1f1e1d1c1f1e1d1c, q23);
4602  ASSERT_EQUAL_128(0x2726252423222120, 0x2726252423222120, q24);
4603  ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2f2e2d2c2b2a2928, q25);
4604  ASSERT_EQUAL_128(0x3736353433323130, 0x3736353433323130, q26);
4605  ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3f3e3d3c3b3a3938, q27);
4606
4607  TEARDOWN();
4608}
4609
4610
4611TEST(neon_ld4_alllanes_postindex) {
4612  SETUP();
4613
4614  uint8_t src[64];
4615  for (unsigned i = 0; i < sizeof(src); i++) {
4616    src[i] = i;
4617  }
4618  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
4619  __ Mov(x17, src_base + 1);
4620  __ Mov(x18, 1);
4621
4622  START();
4623  __ Mov(x17, src_base + 1);
4624  __ Mov(x18, 1);
4625  __ Ld4r(v0.V8B(), v1.V8B(), v2.V8B(), v3.V8B(),
4626          MemOperand(x17, 4, PostIndex));
4627  __ Ld4r(v4.V16B(), v5.V16B(), v6.V16B(), v7.V16B(),
4628          MemOperand(x17, x18, PostIndex));
4629  __ Ld4r(v8.V4H(), v9.V4H(), v10.V4H(), v11.V4H(),
4630          MemOperand(x17, x18, PostIndex));
4631  __ Ld4r(v12.V8H(), v13.V8H(), v14.V8H(), v15.V8H(),
4632          MemOperand(x17, 8, PostIndex));
4633  __ Ld4r(v16.V2S(), v17.V2S(), v18.V2S(), v19.V2S(),
4634          MemOperand(x17, x18, PostIndex));
4635  __ Ld4r(v20.V4S(), v21.V4S(), v22.V4S(), v23.V4S(),
4636          MemOperand(x17, 16, PostIndex));
4637  __ Ld4r(v24.V2D(), v25.V2D(), v26.V2D(), v27.V2D(),
4638          MemOperand(x17, 32, PostIndex));
4639  END();
4640
4641  RUN();
4642
4643  ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
4644  ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
4645  ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2);
4646  ASSERT_EQUAL_128(0x0000000000000000, 0x0404040404040404, q3);
4647  ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4);
4648  ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5);
4649  ASSERT_EQUAL_128(0x0707070707070707, 0x0707070707070707, q6);
4650  ASSERT_EQUAL_128(0x0808080808080808, 0x0808080808080808, q7);
4651  ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q8);
4652  ASSERT_EQUAL_128(0x0000000000000000, 0x0908090809080908, q9);
4653  ASSERT_EQUAL_128(0x0000000000000000, 0x0b0a0b0a0b0a0b0a, q10);
4654  ASSERT_EQUAL_128(0x0000000000000000, 0x0d0c0d0c0d0c0d0c, q11);
4655  ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q12);
4656  ASSERT_EQUAL_128(0x0a090a090a090a09, 0x0a090a090a090a09, q13);
4657  ASSERT_EQUAL_128(0x0c0b0c0b0c0b0c0b, 0x0c0b0c0b0c0b0c0b, q14);
4658  ASSERT_EQUAL_128(0x0e0d0e0d0e0d0e0d, 0x0e0d0e0d0e0d0e0d, q15);
4659  ASSERT_EQUAL_128(0x0000000000000000, 0x1211100f1211100f, q16);
4660  ASSERT_EQUAL_128(0x0000000000000000, 0x1615141316151413, q17);
4661  ASSERT_EQUAL_128(0x0000000000000000, 0x1a1918171a191817, q18);
4662  ASSERT_EQUAL_128(0x0000000000000000, 0x1e1d1c1b1e1d1c1b, q19);
4663  ASSERT_EQUAL_128(0x1312111013121110, 0x1312111013121110, q20);
4664  ASSERT_EQUAL_128(0x1716151417161514, 0x1716151417161514, q21);
4665  ASSERT_EQUAL_128(0x1b1a19181b1a1918, 0x1b1a19181b1a1918, q22);
4666  ASSERT_EQUAL_128(0x1f1e1d1c1f1e1d1c, 0x1f1e1d1c1f1e1d1c, q23);
4667  ASSERT_EQUAL_128(0x2726252423222120, 0x2726252423222120, q24);
4668  ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2f2e2d2c2b2a2928, q25);
4669  ASSERT_EQUAL_128(0x3736353433323130, 0x3736353433323130, q26);
4670  ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3f3e3d3c3b3a3938, q27);
4671  ASSERT_EQUAL_64(src_base + 64, x17);
4672
4673  TEARDOWN();
4674}
4675
4676
4677TEST(neon_st1_lane) {
4678  SETUP();
4679
4680  uint8_t src[64];
4681  for (unsigned i = 0; i < sizeof(src); i++) {
4682    src[i] = i;
4683  }
4684  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
4685
4686  START();
4687  __ Mov(x17, src_base);
4688  __ Mov(x18, -16);
4689  __ Ldr(q0, MemOperand(x17));
4690
4691  for (int i = 15; i >= 0; i--) {
4692    __ St1(v0.B(), i, MemOperand(x17));
4693    __ Add(x17, x17, 1);
4694  }
4695  __ Ldr(q1, MemOperand(x17, x18));
4696
4697  for (int i = 7; i >= 0; i--) {
4698    __ St1(v0.H(), i, MemOperand(x17));
4699    __ Add(x17, x17, 2);
4700  }
4701  __ Ldr(q2, MemOperand(x17, x18));
4702
4703  for (int i = 3; i >= 0; i--) {
4704    __ St1(v0.S(), i, MemOperand(x17));
4705    __ Add(x17, x17, 4);
4706  }
4707  __ Ldr(q3, MemOperand(x17, x18));
4708
4709  for (int i = 1; i >= 0; i--) {
4710    __ St1(v0.D(), i, MemOperand(x17));
4711    __ Add(x17, x17, 8);
4712  }
4713  __ Ldr(q4, MemOperand(x17, x18));
4714
4715  END();
4716
4717  RUN();
4718
4719  ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q1);
4720  ASSERT_EQUAL_128(0x0100030205040706, 0x09080b0a0d0c0f0e, q2);
4721  ASSERT_EQUAL_128(0x0302010007060504, 0x0b0a09080f0e0d0c, q3);
4722  ASSERT_EQUAL_128(0x0706050403020100, 0x0f0e0d0c0b0a0908, q4);
4723
4724  TEARDOWN();
4725}
4726
4727
4728TEST(neon_st2_lane) {
4729  SETUP();
4730
4731  // Struct size * addressing modes * element sizes * vector size.
4732  uint8_t dst[2 * 2 * 4 * 16];
4733  memset(dst, 0, sizeof(dst));
4734  uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
4735
4736  START();
4737  __ Mov(x17, dst_base);
4738  __ Mov(x18, dst_base);
4739  __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
4740  __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f);
4741
4742  // Test B stores with and without post index.
4743  for (int i = 15; i >= 0; i--) {
4744    __ St2(v0.B(), v1.B(), i, MemOperand(x18));
4745    __ Add(x18, x18, 2);
4746  }
4747  for (int i = 15; i >= 0; i--) {
4748    __ St2(v0.B(), v1.B(), i, MemOperand(x18, 2, PostIndex));
4749  }
4750  __ Ldr(q2, MemOperand(x17, 0 * 16));
4751  __ Ldr(q3, MemOperand(x17, 1 * 16));
4752  __ Ldr(q4, MemOperand(x17, 2 * 16));
4753  __ Ldr(q5, MemOperand(x17, 3 * 16));
4754
4755  // Test H stores with and without post index.
4756  __ Mov(x0, 4);
4757  for (int i = 7; i >= 0; i--) {
4758    __ St2(v0.H(), v1.H(), i, MemOperand(x18));
4759    __ Add(x18, x18, 4);
4760  }
4761  for (int i = 7; i >= 0; i--) {
4762    __ St2(v0.H(), v1.H(), i, MemOperand(x18, x0, PostIndex));
4763  }
4764  __ Ldr(q6, MemOperand(x17, 4 * 16));
4765  __ Ldr(q7, MemOperand(x17, 5 * 16));
4766  __ Ldr(q16, MemOperand(x17, 6 * 16));
4767  __ Ldr(q17, MemOperand(x17, 7 * 16));
4768
4769  // Test S stores with and without post index.
4770  for (int i = 3; i >= 0; i--) {
4771    __ St2(v0.S(), v1.S(), i, MemOperand(x18));
4772    __ Add(x18, x18, 8);
4773  }
4774  for (int i = 3; i >= 0; i--) {
4775    __ St2(v0.S(), v1.S(), i, MemOperand(x18, 8, PostIndex));
4776  }
4777  __ Ldr(q18, MemOperand(x17, 8 * 16));
4778  __ Ldr(q19, MemOperand(x17, 9 * 16));
4779  __ Ldr(q20, MemOperand(x17, 10 * 16));
4780  __ Ldr(q21, MemOperand(x17, 11 * 16));
4781
4782  // Test D stores with and without post index.
4783  __ Mov(x0, 16);
4784  __ St2(v0.D(), v1.D(), 1, MemOperand(x18));
4785  __ Add(x18, x18, 16);
4786  __ St2(v0.D(), v1.D(), 0, MemOperand(x18, 16, PostIndex));
4787  __ St2(v0.D(), v1.D(), 1, MemOperand(x18, x0, PostIndex));
4788  __ St2(v0.D(), v1.D(), 0, MemOperand(x18, x0, PostIndex));
4789  __ Ldr(q22, MemOperand(x17, 12 * 16));
4790  __ Ldr(q23, MemOperand(x17, 13 * 16));
4791  __ Ldr(q24, MemOperand(x17, 14 * 16));
4792  __ Ldr(q25, MemOperand(x17, 15 * 16));
4793  END();
4794
4795  RUN();
4796
4797  ASSERT_EQUAL_128(0x1707160615051404, 0x1303120211011000, q2);
4798  ASSERT_EQUAL_128(0x1f0f1e0e1d0d1c0c, 0x1b0b1a0a19091808, q3);
4799  ASSERT_EQUAL_128(0x1707160615051404, 0x1303120211011000, q4);
4800  ASSERT_EQUAL_128(0x1f0f1e0e1d0d1c0c, 0x1b0b1a0a19091808, q5);
4801
4802  ASSERT_EQUAL_128(0x1617060714150405, 0x1213020310110001, q6);
4803  ASSERT_EQUAL_128(0x1e1f0e0f1c1d0c0d, 0x1a1b0a0b18190809, q7);
4804  ASSERT_EQUAL_128(0x1617060714150405, 0x1213020310110001, q16);
4805  ASSERT_EQUAL_128(0x1e1f0e0f1c1d0c0d, 0x1a1b0a0b18190809, q17);
4806
4807  ASSERT_EQUAL_128(0x1415161704050607, 0x1011121300010203, q18);
4808  ASSERT_EQUAL_128(0x1c1d1e1f0c0d0e0f, 0x18191a1b08090a0b, q19);
4809  ASSERT_EQUAL_128(0x1415161704050607, 0x1011121300010203, q20);
4810  ASSERT_EQUAL_128(0x1c1d1e1f0c0d0e0f, 0x18191a1b08090a0b, q21);
4811
4812  ASSERT_EQUAL_128(0x1011121314151617, 0x0001020304050607, q22);
4813  ASSERT_EQUAL_128(0x18191a1b1c1d1e1f, 0x08090a0b0c0d0e0f, q23);
4814  ASSERT_EQUAL_128(0x1011121314151617, 0x0001020304050607, q22);
4815  ASSERT_EQUAL_128(0x18191a1b1c1d1e1f, 0x08090a0b0c0d0e0f, q23);
4816
4817  TEARDOWN();
4818}
4819
4820
4821TEST(neon_st3_lane) {
4822  SETUP();
4823
4824  // Struct size * addressing modes * element sizes * vector size.
4825  uint8_t dst[3 * 2 * 4 * 16];
4826  memset(dst, 0, sizeof(dst));
4827  uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
4828
4829  START();
4830  __ Mov(x17, dst_base);
4831  __ Mov(x18, dst_base);
4832  __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
4833  __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f);
4834  __ Movi(v2.V2D(), 0x2021222324252627, 0x28292a2b2c2d2e2f);
4835
4836  // Test B stores with and without post index.
4837  for (int i = 15; i >= 0; i--) {
4838    __ St3(v0.B(), v1.B(), v2.B(), i, MemOperand(x18));
4839    __ Add(x18, x18, 3);
4840  }
4841  for (int i = 15; i >= 0; i--) {
4842    __ St3(v0.B(), v1.B(), v2.B(), i, MemOperand(x18, 3, PostIndex));
4843  }
4844  __ Ldr(q3, MemOperand(x17, 0 * 16));
4845  __ Ldr(q4, MemOperand(x17, 1 * 16));
4846  __ Ldr(q5, MemOperand(x17, 2 * 16));
4847  __ Ldr(q6, MemOperand(x17, 3 * 16));
4848  __ Ldr(q7, MemOperand(x17, 4 * 16));
4849  __ Ldr(q16, MemOperand(x17, 5 * 16));
4850
4851  // Test H stores with and without post index.
4852  __ Mov(x0, 6);
4853  for (int i = 7; i >= 0; i--) {
4854    __ St3(v0.H(), v1.H(), v2.H(), i, MemOperand(x18));
4855    __ Add(x18, x18, 6);
4856  }
4857  for (int i = 7; i >= 0; i--) {
4858    __ St3(v0.H(), v1.H(), v2.H(), i, MemOperand(x18, x0, PostIndex));
4859  }
4860  __ Ldr(q17, MemOperand(x17, 6 * 16));
4861  __ Ldr(q18, MemOperand(x17, 7 * 16));
4862  __ Ldr(q19, MemOperand(x17, 8 * 16));
4863  __ Ldr(q20, MemOperand(x17, 9 * 16));
4864  __ Ldr(q21, MemOperand(x17, 10 * 16));
4865  __ Ldr(q22, MemOperand(x17, 11 * 16));
4866
4867  // Test S stores with and without post index.
4868  for (int i = 3; i >= 0; i--) {
4869    __ St3(v0.S(), v1.S(), v2.S(), i, MemOperand(x18));
4870    __ Add(x18, x18, 12);
4871  }
4872  for (int i = 3; i >= 0; i--) {
4873    __ St3(v0.S(), v1.S(), v2.S(), i, MemOperand(x18, 12, PostIndex));
4874  }
4875  __ Ldr(q23, MemOperand(x17, 12 * 16));
4876  __ Ldr(q24, MemOperand(x17, 13 * 16));
4877  __ Ldr(q25, MemOperand(x17, 14 * 16));
4878  __ Ldr(q26, MemOperand(x17, 15 * 16));
4879  __ Ldr(q27, MemOperand(x17, 16 * 16));
4880  __ Ldr(q28, MemOperand(x17, 17 * 16));
4881
4882  // Test D stores with and without post index.
4883  __ Mov(x0, 24);
4884  __ St3(v0.D(), v1.D(), v2.D(), 1, MemOperand(x18));
4885  __ Add(x18, x18, 24);
4886  __ St3(v0.D(), v1.D(), v2.D(), 0, MemOperand(x18, 24, PostIndex));
4887  __ St3(v0.D(), v1.D(), v2.D(), 1, MemOperand(x18, x0, PostIndex));
4888  __ Ldr(q29, MemOperand(x17, 18 * 16));
4889  __ Ldr(q30, MemOperand(x17, 19 * 16));
4890  __ Ldr(q31, MemOperand(x17, 20 * 16));
4891  END();
4892
4893  RUN();
4894
4895  ASSERT_EQUAL_128(0x0524140423130322, 0x1202211101201000, q3);
4896  ASSERT_EQUAL_128(0x1a0a291909281808, 0x2717072616062515, q4);
4897  ASSERT_EQUAL_128(0x2f1f0f2e1e0e2d1d, 0x0d2c1c0c2b1b0b2a, q5);
4898  ASSERT_EQUAL_128(0x0524140423130322, 0x1202211101201000, q6);
4899  ASSERT_EQUAL_128(0x1a0a291909281808, 0x2717072616062515, q7);
4900  ASSERT_EQUAL_128(0x2f1f0f2e1e0e2d1d, 0x0d2c1c0c2b1b0b2a, q16);
4901
4902  ASSERT_EQUAL_128(0x1415040522231213, 0x0203202110110001, q17);
4903  ASSERT_EQUAL_128(0x0a0b282918190809, 0x2627161706072425, q18);
4904  ASSERT_EQUAL_128(0x2e2f1e1f0e0f2c2d, 0x1c1d0c0d2a2b1a1b, q19);
4905  ASSERT_EQUAL_128(0x1415040522231213, 0x0203202110110001, q20);
4906  ASSERT_EQUAL_128(0x0a0b282918190809, 0x2627161706072425, q21);
4907  ASSERT_EQUAL_128(0x2e2f1e1f0e0f2c2d, 0x1c1d0c0d2a2b1a1b, q22);
4908
4909  ASSERT_EQUAL_128(0x0405060720212223, 0x1011121300010203, q23);
4910  ASSERT_EQUAL_128(0x18191a1b08090a0b, 0x2425262714151617, q24);
4911  ASSERT_EQUAL_128(0x2c2d2e2f1c1d1e1f, 0x0c0d0e0f28292a2b, q25);
4912  ASSERT_EQUAL_128(0x0405060720212223, 0x1011121300010203, q26);
4913  ASSERT_EQUAL_128(0x18191a1b08090a0b, 0x2425262714151617, q27);
4914  ASSERT_EQUAL_128(0x2c2d2e2f1c1d1e1f, 0x0c0d0e0f28292a2b, q28);
4915
4916  TEARDOWN();
4917}
4918
4919
4920TEST(neon_st4_lane) {
4921  SETUP();
4922
4923  // Struct size * element sizes * vector size.
4924  uint8_t dst[4 * 4 * 16];
4925  memset(dst, 0, sizeof(dst));
4926  uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
4927
4928  START();
4929  __ Mov(x17, dst_base);
4930  __ Mov(x18, dst_base);
4931  __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
4932  __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f);
4933  __ Movi(v2.V2D(), 0x2021222324252627, 0x28292a2b2c2d2e2f);
4934  __ Movi(v3.V2D(), 0x2021222324252627, 0x28292a2b2c2d2e2f);
4935
4936  // Test B stores without post index.
4937  for (int i = 15; i >= 0; i--) {
4938    __ St4(v0.B(), v1.B(), v2.B(), v3.B(), i, MemOperand(x18));
4939    __ Add(x18, x18, 4);
4940  }
4941  __ Ldr(q4, MemOperand(x17, 0 * 16));
4942  __ Ldr(q5, MemOperand(x17, 1 * 16));
4943  __ Ldr(q6, MemOperand(x17, 2 * 16));
4944  __ Ldr(q7, MemOperand(x17, 3 * 16));
4945
4946  // Test H stores with post index.
4947  __ Mov(x0, 8);
4948  for (int i = 7; i >= 0; i--) {
4949    __ St4(v0.H(), v1.H(), v2.H(), v3.H(), i, MemOperand(x18, x0, PostIndex));
4950  }
4951  __ Ldr(q16, MemOperand(x17, 4 * 16));
4952  __ Ldr(q17, MemOperand(x17, 5 * 16));
4953  __ Ldr(q18, MemOperand(x17, 6 * 16));
4954  __ Ldr(q19, MemOperand(x17, 7 * 16));
4955
4956  // Test S stores without post index.
4957  for (int i = 3; i >= 0; i--) {
4958    __ St4(v0.S(), v1.S(), v2.S(), v3.S(), i, MemOperand(x18));
4959    __ Add(x18, x18, 16);
4960  }
4961  __ Ldr(q20, MemOperand(x17, 8 * 16));
4962  __ Ldr(q21, MemOperand(x17, 9 * 16));
4963  __ Ldr(q22, MemOperand(x17, 10 * 16));
4964  __ Ldr(q23, MemOperand(x17, 11 * 16));
4965
4966  // Test D stores with post index.
4967  __ Mov(x0, 32);
4968  __ St4(v0.D(), v1.D(), v2.D(), v3.D(), 0, MemOperand(x18, 32, PostIndex));
4969  __ St4(v0.D(), v1.D(), v2.D(), v3.D(), 1, MemOperand(x18, x0, PostIndex));
4970
4971  __ Ldr(q24, MemOperand(x17, 12 * 16));
4972  __ Ldr(q25, MemOperand(x17, 13 * 16));
4973  __ Ldr(q26, MemOperand(x17, 14 * 16));
4974  __ Ldr(q27, MemOperand(x17, 15 * 16));
4975  END();
4976
4977  RUN();
4978
4979  ASSERT_EQUAL_128(0x2323130322221202, 0x2121110120201000, q4);
4980  ASSERT_EQUAL_128(0x2727170726261606, 0x2525150524241404, q5);
4981  ASSERT_EQUAL_128(0x2b2b1b0b2a2a1a0a, 0x2929190928281808, q6);
4982  ASSERT_EQUAL_128(0x2f2f1f0f2e2e1e0e, 0x2d2d1d0d2c2c1c0c, q7);
4983
4984  ASSERT_EQUAL_128(0x2223222312130203, 0x2021202110110001, q16);
4985  ASSERT_EQUAL_128(0x2627262716170607, 0x2425242514150405, q17);
4986  ASSERT_EQUAL_128(0x2a2b2a2b1a1b0a0b, 0x2829282918190809, q18);
4987  ASSERT_EQUAL_128(0x2e2f2e2f1e1f0e0f, 0x2c2d2c2d1c1d0c0d, q19);
4988
4989  ASSERT_EQUAL_128(0x2021222320212223, 0x1011121300010203, q20);
4990  ASSERT_EQUAL_128(0x2425262724252627, 0x1415161704050607, q21);
4991  ASSERT_EQUAL_128(0x28292a2b28292a2b, 0x18191a1b08090a0b, q22);
4992  ASSERT_EQUAL_128(0x2c2d2e2f2c2d2e2f, 0x1c1d1e1f0c0d0e0f, q23);
4993
4994  ASSERT_EQUAL_128(0x18191a1b1c1d1e1f, 0x08090a0b0c0d0e0f, q24);
4995  ASSERT_EQUAL_128(0x28292a2b2c2d2e2f, 0x28292a2b2c2d2e2f, q25);
4996  ASSERT_EQUAL_128(0x1011121314151617, 0x0001020304050607, q26);
4997  ASSERT_EQUAL_128(0x2021222324252627, 0x2021222324252627, q27);
4998
4999  TEARDOWN();
5000}
5001
5002
5003TEST(neon_ld1_lane_postindex) {
5004  SETUP();
5005
5006  uint8_t src[64];
5007  for (unsigned i = 0; i < sizeof(src); i++) {
5008    src[i] = i;
5009  }
5010  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
5011
5012  START();
5013  __ Mov(x17, src_base);
5014  __ Mov(x18, src_base);
5015  __ Mov(x19, src_base);
5016  __ Mov(x20, src_base);
5017  __ Mov(x21, src_base);
5018  __ Mov(x22, src_base);
5019  __ Mov(x23, src_base);
5020  __ Mov(x24, src_base);
5021
5022  // Test loading whole register by element.
5023  for (int i = 15; i >= 0; i--) {
5024    __ Ld1(v0.B(), i, MemOperand(x17, 1, PostIndex));
5025  }
5026
5027  for (int i = 7; i >= 0; i--) {
5028    __ Ld1(v1.H(), i, MemOperand(x18, 2, PostIndex));
5029  }
5030
5031  for (int i = 3; i >= 0; i--) {
5032    __ Ld1(v2.S(), i, MemOperand(x19, 4, PostIndex));
5033  }
5034
5035  for (int i = 1; i >= 0; i--) {
5036    __ Ld1(v3.D(), i, MemOperand(x20, 8, PostIndex));
5037  }
5038
5039  // Test loading a single element into an initialised register.
5040  __ Mov(x25, 1);
5041  __ Ldr(q4, MemOperand(x21));
5042  __ Ld1(v4.B(), 4, MemOperand(x21, x25, PostIndex));
5043  __ Add(x25, x25, 1);
5044
5045  __ Ldr(q5, MemOperand(x22));
5046  __ Ld1(v5.H(), 3, MemOperand(x22, x25, PostIndex));
5047  __ Add(x25, x25, 1);
5048
5049  __ Ldr(q6, MemOperand(x23));
5050  __ Ld1(v6.S(), 2, MemOperand(x23, x25, PostIndex));
5051  __ Add(x25, x25, 1);
5052
5053  __ Ldr(q7, MemOperand(x24));
5054  __ Ld1(v7.D(), 1, MemOperand(x24, x25, PostIndex));
5055
5056  END();
5057
5058  RUN();
5059
5060  ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0);
5061  ASSERT_EQUAL_128(0x0100030205040706, 0x09080b0a0d0c0f0e, q1);
5062  ASSERT_EQUAL_128(0x0302010007060504, 0x0b0a09080f0e0d0c, q2);
5063  ASSERT_EQUAL_128(0x0706050403020100, 0x0f0e0d0c0b0a0908, q3);
5064  ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q4);
5065  ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q5);
5066  ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q6);
5067  ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q7);
5068  ASSERT_EQUAL_64(src_base + 16, x17);
5069  ASSERT_EQUAL_64(src_base + 16, x18);
5070  ASSERT_EQUAL_64(src_base + 16, x19);
5071  ASSERT_EQUAL_64(src_base + 16, x20);
5072  ASSERT_EQUAL_64(src_base + 1, x21);
5073  ASSERT_EQUAL_64(src_base + 2, x22);
5074  ASSERT_EQUAL_64(src_base + 3, x23);
5075  ASSERT_EQUAL_64(src_base + 4, x24);
5076
5077  TEARDOWN();
5078}
5079
5080
5081TEST(neon_st1_lane_postindex) {
5082  SETUP();
5083
5084  uint8_t src[64];
5085  for (unsigned i = 0; i < sizeof(src); i++) {
5086    src[i] = i;
5087  }
5088  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
5089
5090  START();
5091  __ Mov(x17, src_base);
5092  __ Mov(x18, -16);
5093  __ Ldr(q0, MemOperand(x17));
5094
5095  for (int i = 15; i >= 0; i--) {
5096    __ St1(v0.B(), i, MemOperand(x17, 1, PostIndex));
5097  }
5098  __ Ldr(q1, MemOperand(x17, x18));
5099
5100  for (int i = 7; i >= 0; i--) {
5101    __ St1(v0.H(), i, MemOperand(x17, 2, PostIndex));
5102  }
5103  __ Ldr(q2, MemOperand(x17, x18));
5104
5105  for (int i = 3; i >= 0; i--) {
5106    __ St1(v0.S(), i, MemOperand(x17, 4, PostIndex));
5107  }
5108  __ Ldr(q3, MemOperand(x17, x18));
5109
5110  for (int i = 1; i >= 0; i--) {
5111    __ St1(v0.D(), i, MemOperand(x17, 8, PostIndex));
5112  }
5113  __ Ldr(q4, MemOperand(x17, x18));
5114
5115  END();
5116
5117  RUN();
5118
5119  ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q1);
5120  ASSERT_EQUAL_128(0x0100030205040706, 0x09080b0a0d0c0f0e, q2);
5121  ASSERT_EQUAL_128(0x0302010007060504, 0x0b0a09080f0e0d0c, q3);
5122  ASSERT_EQUAL_128(0x0706050403020100, 0x0f0e0d0c0b0a0908, q4);
5123
5124  TEARDOWN();
5125}
5126
5127
5128TEST(neon_ld1_alllanes) {
5129  SETUP();
5130
5131  uint8_t src[64];
5132  for (unsigned i = 0; i < sizeof(src); i++) {
5133    src[i] = i;
5134  }
5135  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
5136
5137  START();
5138  __ Mov(x17, src_base + 1);
5139  __ Ld1r(v0.V8B(), MemOperand(x17));
5140  __ Add(x17, x17, 1);
5141  __ Ld1r(v1.V16B(), MemOperand(x17));
5142  __ Add(x17, x17, 1);
5143  __ Ld1r(v2.V4H(), MemOperand(x17));
5144  __ Add(x17, x17, 1);
5145  __ Ld1r(v3.V8H(), MemOperand(x17));
5146  __ Add(x17, x17, 1);
5147  __ Ld1r(v4.V2S(), MemOperand(x17));
5148  __ Add(x17, x17, 1);
5149  __ Ld1r(v5.V4S(), MemOperand(x17));
5150  __ Add(x17, x17, 1);
5151  __ Ld1r(v6.V1D(), MemOperand(x17));
5152  __ Add(x17, x17, 1);
5153  __ Ld1r(v7.V2D(), MemOperand(x17));
5154  END();
5155
5156  RUN();
5157
5158  ASSERT_EQUAL_128(0, 0x0101010101010101, q0);
5159  ASSERT_EQUAL_128(0x0202020202020202, 0x0202020202020202, q1);
5160  ASSERT_EQUAL_128(0, 0x0403040304030403, q2);
5161  ASSERT_EQUAL_128(0x0504050405040504, 0x0504050405040504, q3);
5162  ASSERT_EQUAL_128(0, 0x0807060508070605, q4);
5163  ASSERT_EQUAL_128(0x0908070609080706, 0x0908070609080706, q5);
5164  ASSERT_EQUAL_128(0, 0x0e0d0c0b0a090807, q6);
5165  ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0f0e0d0c0b0a0908, q7);
5166
5167  TEARDOWN();
5168}
5169
5170
5171TEST(neon_ld1_alllanes_postindex) {
5172  SETUP();
5173
5174  uint8_t src[64];
5175  for (unsigned i = 0; i < sizeof(src); i++) {
5176    src[i] = i;
5177  }
5178  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
5179
5180  START();
5181  __ Mov(x17, src_base + 1);
5182  __ Mov(x18, 1);
5183  __ Ld1r(v0.V8B(), MemOperand(x17, 1, PostIndex));
5184  __ Ld1r(v1.V16B(), MemOperand(x17, x18, PostIndex));
5185  __ Ld1r(v2.V4H(), MemOperand(x17, x18, PostIndex));
5186  __ Ld1r(v3.V8H(), MemOperand(x17, 2, PostIndex));
5187  __ Ld1r(v4.V2S(), MemOperand(x17, x18, PostIndex));
5188  __ Ld1r(v5.V4S(), MemOperand(x17, 4, PostIndex));
5189  __ Ld1r(v6.V2D(), MemOperand(x17, 8, PostIndex));
5190  END();
5191
5192  RUN();
5193
5194  ASSERT_EQUAL_128(0, 0x0101010101010101, q0);
5195  ASSERT_EQUAL_128(0x0202020202020202, 0x0202020202020202, q1);
5196  ASSERT_EQUAL_128(0, 0x0403040304030403, q2);
5197  ASSERT_EQUAL_128(0x0504050405040504, 0x0504050405040504, q3);
5198  ASSERT_EQUAL_128(0, 0x0908070609080706, q4);
5199  ASSERT_EQUAL_128(0x0a0908070a090807, 0x0a0908070a090807, q5);
5200  ASSERT_EQUAL_128(0x1211100f0e0d0c0b, 0x1211100f0e0d0c0b, q6);
5201  ASSERT_EQUAL_64(src_base + 19, x17);
5202
5203  TEARDOWN();
5204}
5205
5206
5207TEST(neon_st1_d) {
5208  SETUP();
5209
5210  uint8_t src[14 * kDRegSizeInBytes];
5211  for (unsigned i = 0; i < sizeof(src); i++) {
5212    src[i] = i;
5213  }
5214  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
5215
5216  START();
5217  __ Mov(x17, src_base);
5218  __ Ldr(q0, MemOperand(x17, 16, PostIndex));
5219  __ Ldr(q1, MemOperand(x17, 16, PostIndex));
5220  __ Ldr(q2, MemOperand(x17, 16, PostIndex));
5221  __ Ldr(q3, MemOperand(x17, 16, PostIndex));
5222  __ Mov(x17, src_base);
5223
5224  __ St1(v0.V8B(), MemOperand(x17));
5225  __ Ldr(d16, MemOperand(x17, 8, PostIndex));
5226
5227  __ St1(v0.V8B(), v1.V8B(), MemOperand(x17));
5228  __ Ldr(q17, MemOperand(x17, 16, PostIndex));
5229
5230  __ St1(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x17));
5231  __ Ldr(d18, MemOperand(x17, 8, PostIndex));
5232  __ Ldr(d19, MemOperand(x17, 8, PostIndex));
5233  __ Ldr(d20, MemOperand(x17, 8, PostIndex));
5234
5235  __ St1(v0.V2S(), v1.V2S(), v2.V2S(), v3.V2S(), MemOperand(x17));
5236  __ Ldr(q21, MemOperand(x17, 16, PostIndex));
5237  __ Ldr(q22, MemOperand(x17, 16, PostIndex));
5238
5239  __ St1(v0.V1D(), v1.V1D(), v2.V1D(), v3.V1D(), MemOperand(x17));
5240  __ Ldr(q23, MemOperand(x17, 16, PostIndex));
5241  __ Ldr(q24, MemOperand(x17));
5242  END();
5243
5244  RUN();
5245
5246  ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q0);
5247  ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q1);
5248  ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q2);
5249  ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736353433323130, q3);
5250  ASSERT_EQUAL_128(0, 0x0706050403020100, q16);
5251  ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q17);
5252  ASSERT_EQUAL_128(0, 0x0706050403020100, q18);
5253  ASSERT_EQUAL_128(0, 0x1716151413121110, q19);
5254  ASSERT_EQUAL_128(0, 0x2726252423222120, q20);
5255  ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q21);
5256  ASSERT_EQUAL_128(0x3736353433323130, 0x2726252423222120, q22);
5257  ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q23);
5258  ASSERT_EQUAL_128(0x3736353433323130, 0x2726252423222120, q24);
5259
5260  TEARDOWN();
5261}
5262
5263
5264TEST(neon_st1_d_postindex) {
5265  SETUP();
5266
5267  uint8_t src[64 + 14 * kDRegSizeInBytes];
5268  for (unsigned i = 0; i < sizeof(src); i++) {
5269    src[i] = i;
5270  }
5271  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
5272
5273  START();
5274  __ Mov(x17, src_base);
5275  __ Mov(x18, -8);
5276  __ Mov(x19, -16);
5277  __ Mov(x20, -24);
5278  __ Mov(x21, -32);
5279  __ Ldr(q0, MemOperand(x17, 16, PostIndex));
5280  __ Ldr(q1, MemOperand(x17, 16, PostIndex));
5281  __ Ldr(q2, MemOperand(x17, 16, PostIndex));
5282  __ Ldr(q3, MemOperand(x17, 16, PostIndex));
5283  __ Mov(x17, src_base);
5284
5285  __ St1(v0.V8B(), MemOperand(x17, 8, PostIndex));
5286  __ Ldr(d16, MemOperand(x17, x18));
5287
5288  __ St1(v0.V8B(), v1.V8B(), MemOperand(x17, 16, PostIndex));
5289  __ Ldr(q17, MemOperand(x17, x19));
5290
5291  __ St1(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x17, 24, PostIndex));
5292  __ Ldr(d18, MemOperand(x17, x20));
5293  __ Ldr(d19, MemOperand(x17, x19));
5294  __ Ldr(d20, MemOperand(x17, x18));
5295
5296  __ St1(v0.V2S(), v1.V2S(), v2.V2S(), v3.V2S(),
5297         MemOperand(x17, 32, PostIndex));
5298  __ Ldr(q21, MemOperand(x17, x21));
5299  __ Ldr(q22, MemOperand(x17, x19));
5300
5301  __ St1(v0.V1D(), v1.V1D(), v2.V1D(), v3.V1D(),
5302         MemOperand(x17, 32, PostIndex));
5303  __ Ldr(q23, MemOperand(x17, x21));
5304  __ Ldr(q24, MemOperand(x17, x19));
5305  END();
5306
5307  RUN();
5308
5309  ASSERT_EQUAL_128(0, 0x0706050403020100, q16);
5310  ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q17);
5311  ASSERT_EQUAL_128(0, 0x0706050403020100, q18);
5312  ASSERT_EQUAL_128(0, 0x1716151413121110, q19);
5313  ASSERT_EQUAL_128(0, 0x2726252423222120, q20);
5314  ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q21);
5315  ASSERT_EQUAL_128(0x3736353433323130, 0x2726252423222120, q22);
5316  ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q23);
5317  ASSERT_EQUAL_128(0x3736353433323130, 0x2726252423222120, q24);
5318
5319  TEARDOWN();
5320}
5321
5322
5323TEST(neon_st1_q) {
5324  SETUP();
5325
5326  uint8_t src[64 + 160];
5327  for (unsigned i = 0; i < sizeof(src); i++) {
5328    src[i] = i;
5329  }
5330  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
5331
5332  START();
5333  __ Mov(x17, src_base);
5334  __ Ldr(q0, MemOperand(x17, 16, PostIndex));
5335  __ Ldr(q1, MemOperand(x17, 16, PostIndex));
5336  __ Ldr(q2, MemOperand(x17, 16, PostIndex));
5337  __ Ldr(q3, MemOperand(x17, 16, PostIndex));
5338
5339  __ St1(v0.V16B(), MemOperand(x17));
5340  __ Ldr(q16, MemOperand(x17, 16, PostIndex));
5341
5342  __ St1(v0.V8H(), v1.V8H(), MemOperand(x17));
5343  __ Ldr(q17, MemOperand(x17, 16, PostIndex));
5344  __ Ldr(q18, MemOperand(x17, 16, PostIndex));
5345
5346  __ St1(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x17));
5347  __ Ldr(q19, MemOperand(x17, 16, PostIndex));
5348  __ Ldr(q20, MemOperand(x17, 16, PostIndex));
5349  __ Ldr(q21, MemOperand(x17, 16, PostIndex));
5350
5351  __ St1(v0.V2D(), v1.V2D(), v2.V2D(), v3.V2D(), MemOperand(x17));
5352  __ Ldr(q22, MemOperand(x17, 16, PostIndex));
5353  __ Ldr(q23, MemOperand(x17, 16, PostIndex));
5354  __ Ldr(q24, MemOperand(x17, 16, PostIndex));
5355  __ Ldr(q25, MemOperand(x17));
5356  END();
5357
5358  RUN();
5359
5360  ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q16);
5361  ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q17);
5362  ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q18);
5363  ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q19);
5364  ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q20);
5365  ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q21);
5366  ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q22);
5367  ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q23);
5368  ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q24);
5369  ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736353433323130, q25);
5370
5371  TEARDOWN();
5372}
5373
5374
5375TEST(neon_st1_q_postindex) {
5376  SETUP();
5377
5378  uint8_t src[64 + 160];
5379  for (unsigned i = 0; i < sizeof(src); i++) {
5380    src[i] = i;
5381  }
5382  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
5383
5384  START();
5385  __ Mov(x17, src_base);
5386  __ Mov(x18, -16);
5387  __ Mov(x19, -32);
5388  __ Mov(x20, -48);
5389  __ Mov(x21, -64);
5390  __ Ldr(q0, MemOperand(x17, 16, PostIndex));
5391  __ Ldr(q1, MemOperand(x17, 16, PostIndex));
5392  __ Ldr(q2, MemOperand(x17, 16, PostIndex));
5393  __ Ldr(q3, MemOperand(x17, 16, PostIndex));
5394
5395  __ St1(v0.V16B(), MemOperand(x17, 16, PostIndex));
5396  __ Ldr(q16, MemOperand(x17, x18));
5397
5398  __ St1(v0.V8H(), v1.V8H(), MemOperand(x17, 32, PostIndex));
5399  __ Ldr(q17, MemOperand(x17, x19));
5400  __ Ldr(q18, MemOperand(x17, x18));
5401
5402  __ St1(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x17, 48, PostIndex));
5403  __ Ldr(q19, MemOperand(x17, x20));
5404  __ Ldr(q20, MemOperand(x17, x19));
5405  __ Ldr(q21, MemOperand(x17, x18));
5406
5407  __ St1(v0.V2D(), v1.V2D(), v2.V2D(), v3.V2D(),
5408         MemOperand(x17, 64, PostIndex));
5409  __ Ldr(q22, MemOperand(x17, x21));
5410  __ Ldr(q23, MemOperand(x17, x20));
5411  __ Ldr(q24, MemOperand(x17, x19));
5412  __ Ldr(q25, MemOperand(x17, x18));
5413
5414  END();
5415
5416  RUN();
5417
5418  ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q16);
5419  ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q17);
5420  ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q18);
5421  ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q19);
5422  ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q20);
5423  ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q21);
5424  ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q22);
5425  ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q23);
5426  ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q24);
5427  ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736353433323130, q25);
5428
5429  TEARDOWN();
5430}
5431
5432
5433TEST(neon_st2_d) {
5434  SETUP();
5435
5436  uint8_t src[4*16];
5437  for (unsigned i = 0; i < sizeof(src); i++) {
5438    src[i] = i;
5439  }
5440  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
5441
5442  START();
5443  __ Mov(x17, src_base);
5444  __ Mov(x18, src_base);
5445  __ Ldr(q0, MemOperand(x17, 16, PostIndex));
5446  __ Ldr(q1, MemOperand(x17, 16, PostIndex));
5447
5448  __ St2(v0.V8B(), v1.V8B(), MemOperand(x18));
5449  __ Add(x18, x18, 22);
5450  __ St2(v0.V4H(), v1.V4H(), MemOperand(x18));
5451  __ Add(x18, x18, 11);
5452  __ St2(v0.V2S(), v1.V2S(), MemOperand(x18));
5453
5454  __ Mov(x19, src_base);
5455  __ Ldr(q0, MemOperand(x19, 16, PostIndex));
5456  __ Ldr(q1, MemOperand(x19, 16, PostIndex));
5457  __ Ldr(q2, MemOperand(x19, 16, PostIndex));
5458  __ Ldr(q3, MemOperand(x19, 16, PostIndex));
5459
5460  END();
5461
5462  RUN();
5463
5464  ASSERT_EQUAL_128(0x1707160615051404, 0x1303120211011000, q0);
5465  ASSERT_EQUAL_128(0x0504131203021110, 0x0100151413121110, q1);
5466  ASSERT_EQUAL_128(0x1615140706050413, 0x1211100302010014, q2);
5467  ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736353433323117, q3);
5468
5469  TEARDOWN();
5470}
5471
5472
5473TEST(neon_st2_d_postindex) {
5474  SETUP();
5475
5476  uint8_t src[4*16];
5477  for (unsigned i = 0; i < sizeof(src); i++) {
5478    src[i] = i;
5479  }
5480  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
5481
5482  START();
5483  __ Mov(x22, 5);
5484  __ Mov(x17, src_base);
5485  __ Mov(x18, src_base);
5486  __ Ldr(q0, MemOperand(x17, 16, PostIndex));
5487  __ Ldr(q1, MemOperand(x17, 16, PostIndex));
5488
5489  __ St2(v0.V8B(), v1.V8B(), MemOperand(x18, x22, PostIndex));
5490  __ St2(v0.V4H(), v1.V4H(), MemOperand(x18, 16, PostIndex));
5491  __ St2(v0.V2S(), v1.V2S(), MemOperand(x18));
5492
5493
5494  __ Mov(x19, src_base);
5495  __ Ldr(q0, MemOperand(x19, 16, PostIndex));
5496  __ Ldr(q1, MemOperand(x19, 16, PostIndex));
5497  __ Ldr(q2, MemOperand(x19, 16, PostIndex));
5498
5499  END();
5500
5501  RUN();
5502
5503  ASSERT_EQUAL_128(0x1405041312030211, 0x1001000211011000, q0);
5504  ASSERT_EQUAL_128(0x0605041312111003, 0x0201001716070615, q1);
5505  ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726251716151407, q2);
5506
5507  TEARDOWN();
5508}
5509
5510
5511TEST(neon_st2_q) {
5512  SETUP();
5513
5514  uint8_t src[5*16];
5515  for (unsigned i = 0; i < sizeof(src); i++) {
5516    src[i] = i;
5517  }
5518  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
5519
5520  START();
5521  __ Mov(x17, src_base);
5522  __ Mov(x18, src_base);
5523  __ Ldr(q0, MemOperand(x17, 16, PostIndex));
5524  __ Ldr(q1, MemOperand(x17, 16, PostIndex));
5525
5526  __ St2(v0.V16B(), v1.V16B(), MemOperand(x18));
5527  __ Add(x18, x18, 8);
5528  __ St2(v0.V8H(), v1.V8H(), MemOperand(x18));
5529  __ Add(x18, x18, 22);
5530  __ St2(v0.V4S(), v1.V4S(), MemOperand(x18));
5531  __ Add(x18, x18, 2);
5532  __ St2(v0.V2D(), v1.V2D(), MemOperand(x18));
5533
5534  __ Mov(x19, src_base);
5535  __ Ldr(q0, MemOperand(x19, 16, PostIndex));
5536  __ Ldr(q1, MemOperand(x19, 16, PostIndex));
5537  __ Ldr(q2, MemOperand(x19, 16, PostIndex));
5538  __ Ldr(q3, MemOperand(x19, 16, PostIndex));
5539
5540  END();
5541
5542  RUN();
5543
5544  ASSERT_EQUAL_128(0x1312030211100100, 0x1303120211011000, q0);
5545  ASSERT_EQUAL_128(0x01000b0a19180908, 0x1716070615140504, q1);
5546  ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q2);
5547  ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0f0e0d0c0b0a0908, q3);
5548  TEARDOWN();
5549}
5550
5551
5552TEST(neon_st2_q_postindex) {
5553  SETUP();
5554
5555  uint8_t src[5*16];
5556  for (unsigned i = 0; i < sizeof(src); i++) {
5557    src[i] = i;
5558  }
5559  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
5560
5561  START();
5562  __ Mov(x22, 5);
5563  __ Mov(x17, src_base);
5564  __ Mov(x18, src_base);
5565  __ Ldr(q0, MemOperand(x17, 16, PostIndex));
5566  __ Ldr(q1, MemOperand(x17, 16, PostIndex));
5567
5568  __ St2(v0.V16B(), v1.V16B(), MemOperand(x18, x22, PostIndex));
5569  __ St2(v0.V8H(), v1.V8H(), MemOperand(x18, 32, PostIndex));
5570  __ St2(v0.V4S(), v1.V4S(), MemOperand(x18, x22, PostIndex));
5571  __ St2(v0.V2D(), v1.V2D(), MemOperand(x18));
5572
5573  __ Mov(x19, src_base);
5574  __ Ldr(q0, MemOperand(x19, 16, PostIndex));
5575  __ Ldr(q1, MemOperand(x19, 16, PostIndex));
5576  __ Ldr(q2, MemOperand(x19, 16, PostIndex));
5577  __ Ldr(q3, MemOperand(x19, 16, PostIndex));
5578  __ Ldr(q4, MemOperand(x19, 16, PostIndex));
5579
5580  END();
5581
5582  RUN();
5583
5584  ASSERT_EQUAL_128(0x1405041312030211, 0x1001000211011000, q0);
5585  ASSERT_EQUAL_128(0x1c0d0c1b1a0b0a19, 0x1809081716070615, q1);
5586  ASSERT_EQUAL_128(0x0504030201001003, 0x0201001f1e0f0e1d, q2);
5587  ASSERT_EQUAL_128(0x0d0c0b0a09081716, 0x1514131211100706, q3);
5588  ASSERT_EQUAL_128(0x4f4e4d4c4b4a1f1e, 0x1d1c1b1a19180f0e, q4);
5589
5590  TEARDOWN();
5591}
5592
5593
5594TEST(neon_st3_d) {
5595  SETUP();
5596
5597  uint8_t src[3*16];
5598  for (unsigned i = 0; i < sizeof(src); i++) {
5599    src[i] = i;
5600  }
5601  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
5602
5603  START();
5604  __ Mov(x17, src_base);
5605  __ Mov(x18, src_base);
5606  __ Ldr(q0, MemOperand(x17, 16, PostIndex));
5607  __ Ldr(q1, MemOperand(x17, 16, PostIndex));
5608  __ Ldr(q2, MemOperand(x17, 16, PostIndex));
5609
5610  __ St3(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x18));
5611  __ Add(x18, x18, 3);
5612  __ St3(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x18));
5613  __ Add(x18, x18, 2);
5614  __ St3(v0.V2S(), v1.V2S(), v2.V2S(), MemOperand(x18));
5615
5616
5617  __ Mov(x19, src_base);
5618  __ Ldr(q0, MemOperand(x19, 16, PostIndex));
5619  __ Ldr(q1, MemOperand(x19, 16, PostIndex));
5620
5621  END();
5622
5623  RUN();
5624
5625  ASSERT_EQUAL_128(0x2221201312111003, 0x0201000100201000, q0);
5626  ASSERT_EQUAL_128(0x1f1e1d2726252417, 0x1615140706050423, q1);
5627
5628  TEARDOWN();
5629}
5630
5631
5632TEST(neon_st3_d_postindex) {
5633  SETUP();
5634
5635  uint8_t src[4*16];
5636  for (unsigned i = 0; i < sizeof(src); i++) {
5637    src[i] = i;
5638  }
5639  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
5640
5641  START();
5642  __ Mov(x22, 5);
5643  __ Mov(x17, src_base);
5644  __ Mov(x18, src_base);
5645  __ Ldr(q0, MemOperand(x17, 16, PostIndex));
5646  __ Ldr(q1, MemOperand(x17, 16, PostIndex));
5647  __ Ldr(q2, MemOperand(x17, 16, PostIndex));
5648
5649  __ St3(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x18, x22, PostIndex));
5650  __ St3(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x18, 24, PostIndex));
5651  __ St3(v0.V2S(), v1.V2S(), v2.V2S(), MemOperand(x18));
5652
5653
5654  __ Mov(x19, src_base);
5655  __ Ldr(q0, MemOperand(x19, 16, PostIndex));
5656  __ Ldr(q1, MemOperand(x19, 16, PostIndex));
5657  __ Ldr(q2, MemOperand(x19, 16, PostIndex));
5658  __ Ldr(q3, MemOperand(x19, 16, PostIndex));
5659
5660  END();
5661
5662  RUN();
5663
5664  ASSERT_EQUAL_128(0x2213120302212011, 0x1001001101201000, q0);
5665  ASSERT_EQUAL_128(0x0201002726171607, 0x0625241514050423, q1);
5666  ASSERT_EQUAL_128(0x1615140706050423, 0x2221201312111003, q2);
5667  ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736352726252417, q3);
5668
5669  TEARDOWN();
5670}
5671
5672
5673TEST(neon_st3_q) {
5674  SETUP();
5675
5676  uint8_t src[6*16];
5677  for (unsigned i = 0; i < sizeof(src); i++) {
5678    src[i] = i;
5679  }
5680  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
5681
5682  START();
5683  __ Mov(x17, src_base);
5684  __ Mov(x18, src_base);
5685  __ Ldr(q0, MemOperand(x17, 16, PostIndex));
5686  __ Ldr(q1, MemOperand(x17, 16, PostIndex));
5687  __ Ldr(q2, MemOperand(x17, 16, PostIndex));
5688
5689  __ St3(v0.V16B(), v1.V16B(), v2.V16B(), MemOperand(x18));
5690  __ Add(x18, x18, 5);
5691  __ St3(v0.V8H(), v1.V8H(), v2.V8H(), MemOperand(x18));
5692  __ Add(x18, x18, 12);
5693  __ St3(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x18));
5694  __ Add(x18, x18, 22);
5695  __ St3(v0.V2D(), v1.V2D(), v2.V2D(), MemOperand(x18));
5696
5697  __ Mov(x19, src_base);
5698  __ Ldr(q0, MemOperand(x19, 16, PostIndex));
5699  __ Ldr(q1, MemOperand(x19, 16, PostIndex));
5700  __ Ldr(q2, MemOperand(x19, 16, PostIndex));
5701  __ Ldr(q3, MemOperand(x19, 16, PostIndex));
5702  __ Ldr(q4, MemOperand(x19, 16, PostIndex));
5703  __ Ldr(q5, MemOperand(x19, 16, PostIndex));
5704
5705  END();
5706
5707  RUN();
5708
5709  ASSERT_EQUAL_128(0x2213120302212011, 0x1001001101201000, q0);
5710  ASSERT_EQUAL_128(0x0605042322212013, 0x1211100302010023, q1);
5711  ASSERT_EQUAL_128(0x1007060504030201, 0x0025241716151407, q2);
5712  ASSERT_EQUAL_128(0x0827262524232221, 0x2017161514131211, q3);
5713  ASSERT_EQUAL_128(0x281f1e1d1c1b1a19, 0x180f0e0d0c0b0a09, q4);
5714  ASSERT_EQUAL_128(0x5f5e5d5c5b5a5958, 0x572f2e2d2c2b2a29, q5);
5715
5716  TEARDOWN();
5717}
5718
5719
5720TEST(neon_st3_q_postindex) {
5721  SETUP();
5722
5723  uint8_t src[7*16];
5724  for (unsigned i = 0; i < sizeof(src); i++) {
5725    src[i] = i;
5726  }
5727  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
5728
5729  START();
5730  __ Mov(x22, 5);
5731  __ Mov(x17, src_base);
5732  __ Mov(x18, src_base);
5733  __ Ldr(q0, MemOperand(x17, 16, PostIndex));
5734  __ Ldr(q1, MemOperand(x17, 16, PostIndex));
5735  __ Ldr(q2, MemOperand(x17, 16, PostIndex));
5736
5737  __ St3(v0.V16B(), v1.V16B(), v2.V16B(), MemOperand(x18, x22, PostIndex));
5738  __ St3(v0.V8H(), v1.V8H(), v2.V8H(), MemOperand(x18, 48, PostIndex));
5739  __ St3(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x18, x22, PostIndex));
5740  __ St3(v0.V2D(), v1.V2D(), v2.V2D(), MemOperand(x18));
5741
5742  __ Mov(x19, src_base);
5743  __ Ldr(q0, MemOperand(x19, 16, PostIndex));
5744  __ Ldr(q1, MemOperand(x19, 16, PostIndex));
5745  __ Ldr(q2, MemOperand(x19, 16, PostIndex));
5746  __ Ldr(q3, MemOperand(x19, 16, PostIndex));
5747  __ Ldr(q4, MemOperand(x19, 16, PostIndex));
5748  __ Ldr(q5, MemOperand(x19, 16, PostIndex));
5749  __ Ldr(q6, MemOperand(x19, 16, PostIndex));
5750
5751  END();
5752
5753  RUN();
5754
5755  ASSERT_EQUAL_128(0x2213120302212011, 0x1001001101201000, q0);
5756  ASSERT_EQUAL_128(0x1809082726171607, 0x0625241514050423, q1);
5757  ASSERT_EQUAL_128(0x0e2d2c1d1c0d0c2b, 0x2a1b1a0b0a292819, q2);
5758  ASSERT_EQUAL_128(0x0504030201001003, 0x0201002f2e1f1e0f, q3);
5759  ASSERT_EQUAL_128(0x2524232221201716, 0x1514131211100706, q4);
5760  ASSERT_EQUAL_128(0x1d1c1b1a19180f0e, 0x0d0c0b0a09082726, q5);
5761  ASSERT_EQUAL_128(0x6f6e6d6c6b6a2f2e, 0x2d2c2b2a29281f1e, q6);
5762
5763  TEARDOWN();
5764}
5765
5766
5767TEST(neon_st4_d) {
5768  SETUP();
5769
5770  uint8_t src[4*16];
5771  for (unsigned i = 0; i < sizeof(src); i++) {
5772    src[i] = i;
5773  }
5774  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
5775
5776  START();
5777  __ Mov(x17, src_base);
5778  __ Mov(x18, src_base);
5779  __ Ldr(q0, MemOperand(x17, 16, PostIndex));
5780  __ Ldr(q1, MemOperand(x17, 16, PostIndex));
5781  __ Ldr(q2, MemOperand(x17, 16, PostIndex));
5782  __ Ldr(q3, MemOperand(x17, 16, PostIndex));
5783
5784  __ St4(v0.V8B(), v1.V8B(), v2.V8B(), v3.V8B(), MemOperand(x18));
5785  __ Add(x18, x18, 12);
5786  __ St4(v0.V4H(), v1.V4H(), v2.V4H(), v3.V4H(), MemOperand(x18));
5787  __ Add(x18, x18, 15);
5788  __ St4(v0.V2S(), v1.V2S(), v2.V2S(), v3.V2S(), MemOperand(x18));
5789
5790
5791  __ Mov(x19, src_base);
5792  __ Ldr(q0, MemOperand(x19, 16, PostIndex));
5793  __ Ldr(q1, MemOperand(x19, 16, PostIndex));
5794  __ Ldr(q2, MemOperand(x19, 16, PostIndex));
5795  __ Ldr(q3, MemOperand(x19, 16, PostIndex));
5796
5797  END();
5798
5799  RUN();
5800
5801  ASSERT_EQUAL_128(0x1110010032221202, 0X3121110130201000, q0);
5802  ASSERT_EQUAL_128(0x1003020100322322, 0X1312030231302120, q1);
5803  ASSERT_EQUAL_128(0x1407060504333231, 0X3023222120131211, q2);
5804  ASSERT_EQUAL_128(0x3f3e3d3c3b373635, 0x3427262524171615, q3);
5805
5806  TEARDOWN();
5807}
5808
5809
5810TEST(neon_st4_d_postindex) {
5811  SETUP();
5812
5813  uint8_t src[5*16];
5814  for (unsigned i = 0; i < sizeof(src); i++) {
5815    src[i] = i;
5816  }
5817  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
5818
5819  START();
5820  __ Mov(x22, 5);
5821  __ Mov(x17, src_base);
5822  __ Mov(x18, src_base);
5823  __ Ldr(q0, MemOperand(x17, 16, PostIndex));
5824  __ Ldr(q1, MemOperand(x17, 16, PostIndex));
5825  __ Ldr(q2, MemOperand(x17, 16, PostIndex));
5826  __ Ldr(q3, MemOperand(x17, 16, PostIndex));
5827
5828  __ St4(v0.V8B(), v1.V8B(), v2.V8B(), v3.V8B(),
5829         MemOperand(x18, x22, PostIndex));
5830  __ St4(v0.V4H(), v1.V4H(), v2.V4H(), v3.V4H(),
5831         MemOperand(x18, 32, PostIndex));
5832  __ St4(v0.V2S(), v1.V2S(), v2.V2S(), v3.V2S(),
5833         MemOperand(x18));
5834
5835
5836  __ Mov(x19, src_base);
5837  __ Ldr(q0, MemOperand(x19, 16, PostIndex));
5838  __ Ldr(q1, MemOperand(x19, 16, PostIndex));
5839  __ Ldr(q2, MemOperand(x19, 16, PostIndex));
5840  __ Ldr(q3, MemOperand(x19, 16, PostIndex));
5841  __ Ldr(q4, MemOperand(x19, 16, PostIndex));
5842
5843  END();
5844
5845  RUN();
5846
5847  ASSERT_EQUAL_128(0x1203023130212011, 0x1001000130201000, q0);
5848  ASSERT_EQUAL_128(0x1607063534252415, 0x1405043332232213, q1);
5849  ASSERT_EQUAL_128(0x2221201312111003, 0x0201003736272617, q2);
5850  ASSERT_EQUAL_128(0x2625241716151407, 0x0605043332313023, q3);
5851  ASSERT_EQUAL_128(0x4f4e4d4c4b4a4948, 0x4746453736353427, q4);
5852
5853  TEARDOWN();
5854}
5855
5856
5857TEST(neon_st4_q) {
5858  SETUP();
5859
5860  uint8_t src[7*16];
5861  for (unsigned i = 0; i < sizeof(src); i++) {
5862    src[i] = i;
5863  }
5864  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
5865
5866  START();
5867  __ Mov(x17, src_base);
5868  __ Mov(x18, src_base);
5869  __ Ldr(q0, MemOperand(x17, 16, PostIndex));
5870  __ Ldr(q1, MemOperand(x17, 16, PostIndex));
5871  __ Ldr(q2, MemOperand(x17, 16, PostIndex));
5872  __ Ldr(q3, MemOperand(x17, 16, PostIndex));
5873
5874  __ St4(v0.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), MemOperand(x18));
5875  __ Add(x18, x18, 5);
5876  __ St4(v0.V8H(), v1.V8H(), v2.V8H(), v3.V8H(), MemOperand(x18));
5877  __ Add(x18, x18, 12);
5878  __ St4(v0.V4S(), v1.V4S(), v2.V4S(), v3.V4S(), MemOperand(x18));
5879  __ Add(x18, x18, 22);
5880  __ St4(v0.V2D(), v1.V2D(), v2.V2D(), v3.V2D(), MemOperand(x18));
5881  __ Add(x18, x18, 10);
5882
5883  __ Mov(x19, src_base);
5884  __ Ldr(q0, MemOperand(x19, 16, PostIndex));
5885  __ Ldr(q1, MemOperand(x19, 16, PostIndex));
5886  __ Ldr(q2, MemOperand(x19, 16, PostIndex));
5887  __ Ldr(q3, MemOperand(x19, 16, PostIndex));
5888  __ Ldr(q4, MemOperand(x19, 16, PostIndex));
5889  __ Ldr(q5, MemOperand(x19, 16, PostIndex));
5890  __ Ldr(q6, MemOperand(x19, 16, PostIndex));
5891
5892  END();
5893
5894  RUN();
5895
5896  ASSERT_EQUAL_128(0x1203023130212011, 0x1001000130201000, q0);
5897  ASSERT_EQUAL_128(0x3231302322212013, 0x1211100302010013, q1);
5898  ASSERT_EQUAL_128(0x1007060504030201, 0x0015140706050433, q2);
5899  ASSERT_EQUAL_128(0x3027262524232221, 0x2017161514131211, q3);
5900  ASSERT_EQUAL_128(0x180f0e0d0c0b0a09, 0x0837363534333231, q4);
5901  ASSERT_EQUAL_128(0x382f2e2d2c2b2a29, 0x281f1e1d1c1b1a19, q5);
5902  ASSERT_EQUAL_128(0x6f6e6d6c6b6a6968, 0x673f3e3d3c3b3a39, q6);
5903
5904  TEARDOWN();
5905}
5906
5907
5908TEST(neon_st4_q_postindex) {
5909  SETUP();
5910
5911  uint8_t src[9*16];
5912  for (unsigned i = 0; i < sizeof(src); i++) {
5913    src[i] = i;
5914  }
5915  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
5916
5917  START();
5918  __ Mov(x22, 5);
5919  __ Mov(x17, src_base);
5920  __ Mov(x18, src_base);
5921  __ Ldr(q0, MemOperand(x17, 16, PostIndex));
5922  __ Ldr(q1, MemOperand(x17, 16, PostIndex));
5923  __ Ldr(q2, MemOperand(x17, 16, PostIndex));
5924  __ Ldr(q3, MemOperand(x17, 16, PostIndex));
5925
5926  __ St4(v0.V16B(), v1.V16B(), v2.V16B(), v3.V16B(),
5927         MemOperand(x18, x22, PostIndex));
5928  __ St4(v0.V8H(), v1.V8H(), v2.V8H(), v3.V8H(),
5929         MemOperand(x18, 64, PostIndex));
5930  __ St4(v0.V4S(), v1.V4S(), v2.V4S(), v3.V4S(),
5931         MemOperand(x18, x22, PostIndex));
5932  __ St4(v0.V2D(), v1.V2D(), v2.V2D(), v3.V2D(),
5933         MemOperand(x18));
5934
5935  __ Mov(x19, src_base);
5936  __ Ldr(q0, MemOperand(x19, 16, PostIndex));
5937  __ Ldr(q1, MemOperand(x19, 16, PostIndex));
5938  __ Ldr(q2, MemOperand(x19, 16, PostIndex));
5939  __ Ldr(q3, MemOperand(x19, 16, PostIndex));
5940  __ Ldr(q4, MemOperand(x19, 16, PostIndex));
5941  __ Ldr(q5, MemOperand(x19, 16, PostIndex));
5942  __ Ldr(q6, MemOperand(x19, 16, PostIndex));
5943  __ Ldr(q7, MemOperand(x19, 16, PostIndex));
5944  __ Ldr(q8, MemOperand(x19, 16, PostIndex));
5945
5946  END();
5947
5948  RUN();
5949
5950  ASSERT_EQUAL_128(0x1203023130212011, 0x1001000130201000, q0);
5951  ASSERT_EQUAL_128(0x1607063534252415, 0x1405043332232213, q1);
5952  ASSERT_EQUAL_128(0x1a0b0a3938292819, 0x1809083736272617, q2);
5953  ASSERT_EQUAL_128(0x1e0f0e3d3c2d2c1d, 0x1c0d0c3b3a2b2a1b, q3);
5954  ASSERT_EQUAL_128(0x0504030201001003, 0x0201003f3e2f2e1f, q4);
5955  ASSERT_EQUAL_128(0x2524232221201716, 0x1514131211100706, q5);
5956  ASSERT_EQUAL_128(0x0d0c0b0a09083736, 0x3534333231302726, q6);
5957  ASSERT_EQUAL_128(0x2d2c2b2a29281f1e, 0x1d1c1b1a19180f0e, q7);
5958  ASSERT_EQUAL_128(0x8f8e8d8c8b8a3f3e, 0x3d3c3b3a39382f2e, q8);
5959
5960  TEARDOWN();
5961}
5962
5963
5964TEST(ldp_stp_float) {
5965  SETUP();
5966
5967  float src[2] = {1.0, 2.0};
5968  float dst[3] = {0.0, 0.0, 0.0};
5969  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
5970  uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
5971
5972  START();
5973  __ Mov(x16, src_base);
5974  __ Mov(x17, dst_base);
5975  __ Ldp(s31, s0, MemOperand(x16, 2 * sizeof(src[0]), PostIndex));
5976  __ Stp(s0, s31, MemOperand(x17, sizeof(dst[1]), PreIndex));
5977  END();
5978
5979  RUN();
5980
5981  ASSERT_EQUAL_FP32(1.0, s31);
5982  ASSERT_EQUAL_FP32(2.0, s0);
5983  ASSERT_EQUAL_FP32(0.0, dst[0]);
5984  ASSERT_EQUAL_FP32(2.0, dst[1]);
5985  ASSERT_EQUAL_FP32(1.0, dst[2]);
5986  ASSERT_EQUAL_64(src_base + 2 * sizeof(src[0]), x16);
5987  ASSERT_EQUAL_64(dst_base + sizeof(dst[1]), x17);
5988
5989  TEARDOWN();
5990}
5991
5992
5993TEST(ldp_stp_double) {
5994  SETUP();
5995
5996  double src[2] = {1.0, 2.0};
5997  double dst[3] = {0.0, 0.0, 0.0};
5998  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
5999  uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
6000
6001  START();
6002  __ Mov(x16, src_base);
6003  __ Mov(x17, dst_base);
6004  __ Ldp(d31, d0, MemOperand(x16, 2 * sizeof(src[0]), PostIndex));
6005  __ Stp(d0, d31, MemOperand(x17, sizeof(dst[1]), PreIndex));
6006  END();
6007
6008  RUN();
6009
6010  ASSERT_EQUAL_FP64(1.0, d31);
6011  ASSERT_EQUAL_FP64(2.0, d0);
6012  ASSERT_EQUAL_FP64(0.0, dst[0]);
6013  ASSERT_EQUAL_FP64(2.0, dst[1]);
6014  ASSERT_EQUAL_FP64(1.0, dst[2]);
6015  ASSERT_EQUAL_64(src_base + 2 * sizeof(src[0]), x16);
6016  ASSERT_EQUAL_64(dst_base + sizeof(dst[1]), x17);
6017
6018  TEARDOWN();
6019}
6020
6021
6022TEST(ldp_stp_quad) {
6023  SETUP();
6024
6025  uint64_t src[4] = {0x0123456789abcdef, 0xaaaaaaaa55555555,
6026                     0xfedcba9876543210, 0x55555555aaaaaaaa};
6027  uint64_t dst[6] = {0, 0, 0, 0, 0, 0};
6028  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
6029  uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
6030
6031  START();
6032  __ Mov(x16, src_base);
6033  __ Mov(x17, dst_base);
6034  __ Ldp(q31, q0, MemOperand(x16, 4 * sizeof(src[0]), PostIndex));
6035  __ Stp(q0, q31, MemOperand(x17, 2 * sizeof(dst[1]), PreIndex));
6036  END();
6037
6038  RUN();
6039
6040  ASSERT_EQUAL_128(0xaaaaaaaa55555555, 0x0123456789abcdef, q31);
6041  ASSERT_EQUAL_128(0x55555555aaaaaaaa, 0xfedcba9876543210, q0);
6042  ASSERT_EQUAL_64(0, dst[0]);
6043  ASSERT_EQUAL_64(0, dst[1]);
6044  ASSERT_EQUAL_64(0xfedcba9876543210, dst[2]);
6045  ASSERT_EQUAL_64(0x55555555aaaaaaaa, dst[3]);
6046  ASSERT_EQUAL_64(0x0123456789abcdef, dst[4]);
6047  ASSERT_EQUAL_64(0xaaaaaaaa55555555, dst[5]);
6048  ASSERT_EQUAL_64(src_base + 4 * sizeof(src[0]), x16);
6049  ASSERT_EQUAL_64(dst_base + 2 * sizeof(dst[1]), x17);
6050
6051  TEARDOWN();
6052}
6053
6054
6055TEST(ldp_stp_offset) {
6056  SETUP();
6057
6058  uint64_t src[3] = {0x0011223344556677, 0x8899aabbccddeeff,
6059                     0xffeeddccbbaa9988};
6060  uint64_t dst[7] = {0, 0, 0, 0, 0, 0, 0};
6061  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
6062  uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
6063
6064  START();
6065  __ Mov(x16, src_base);
6066  __ Mov(x17, dst_base);
6067  __ Mov(x18, src_base + 24);
6068  __ Mov(x19, dst_base + 56);
6069  __ Ldp(w0, w1, MemOperand(x16));
6070  __ Ldp(w2, w3, MemOperand(x16, 4));
6071  __ Ldp(x4, x5, MemOperand(x16, 8));
6072  __ Ldp(w6, w7, MemOperand(x18, -12));
6073  __ Ldp(x8, x9, MemOperand(x18, -16));
6074  __ Stp(w0, w1, MemOperand(x17));
6075  __ Stp(w2, w3, MemOperand(x17, 8));
6076  __ Stp(x4, x5, MemOperand(x17, 16));
6077  __ Stp(w6, w7, MemOperand(x19, -24));
6078  __ Stp(x8, x9, MemOperand(x19, -16));
6079  END();
6080
6081  RUN();
6082
6083  ASSERT_EQUAL_64(0x44556677, x0);
6084  ASSERT_EQUAL_64(0x00112233, x1);
6085  ASSERT_EQUAL_64(0x0011223344556677, dst[0]);
6086  ASSERT_EQUAL_64(0x00112233, x2);
6087  ASSERT_EQUAL_64(0xccddeeff, x3);
6088  ASSERT_EQUAL_64(0xccddeeff00112233, dst[1]);
6089  ASSERT_EQUAL_64(0x8899aabbccddeeff, x4);
6090  ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[2]);
6091  ASSERT_EQUAL_64(0xffeeddccbbaa9988, x5);
6092  ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[3]);
6093  ASSERT_EQUAL_64(0x8899aabb, x6);
6094  ASSERT_EQUAL_64(0xbbaa9988, x7);
6095  ASSERT_EQUAL_64(0xbbaa99888899aabb, dst[4]);
6096  ASSERT_EQUAL_64(0x8899aabbccddeeff, x8);
6097  ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[5]);
6098  ASSERT_EQUAL_64(0xffeeddccbbaa9988, x9);
6099  ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[6]);
6100  ASSERT_EQUAL_64(src_base, x16);
6101  ASSERT_EQUAL_64(dst_base, x17);
6102  ASSERT_EQUAL_64(src_base + 24, x18);
6103  ASSERT_EQUAL_64(dst_base + 56, x19);
6104
6105  TEARDOWN();
6106}
6107
6108
6109TEST(ldp_stp_offset_wide) {
6110  SETUP();
6111
6112  uint64_t src[3] = {0x0011223344556677, 0x8899aabbccddeeff,
6113                     0xffeeddccbbaa9988};
6114  uint64_t dst[7] = {0, 0, 0, 0, 0, 0, 0};
6115  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
6116  uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
6117  // Move base too far from the array to force multiple instructions
6118  // to be emitted.
6119  const int64_t base_offset = 1024;
6120
6121  START();
6122  __ Mov(x20, src_base - base_offset);
6123  __ Mov(x21, dst_base - base_offset);
6124  __ Mov(x18, src_base + base_offset + 24);
6125  __ Mov(x19, dst_base + base_offset + 56);
6126  __ Ldp(w0, w1, MemOperand(x20, base_offset));
6127  __ Ldp(w2, w3, MemOperand(x20, base_offset + 4));
6128  __ Ldp(x4, x5, MemOperand(x20, base_offset + 8));
6129  __ Ldp(w6, w7, MemOperand(x18, -12 - base_offset));
6130  __ Ldp(x8, x9, MemOperand(x18, -16 - base_offset));
6131  __ Stp(w0, w1, MemOperand(x21, base_offset));
6132  __ Stp(w2, w3, MemOperand(x21, base_offset + 8));
6133  __ Stp(x4, x5, MemOperand(x21, base_offset + 16));
6134  __ Stp(w6, w7, MemOperand(x19, -24 - base_offset));
6135  __ Stp(x8, x9, MemOperand(x19, -16 - base_offset));
6136  END();
6137
6138  RUN();
6139
6140  ASSERT_EQUAL_64(0x44556677, x0);
6141  ASSERT_EQUAL_64(0x00112233, x1);
6142  ASSERT_EQUAL_64(0x0011223344556677, dst[0]);
6143  ASSERT_EQUAL_64(0x00112233, x2);
6144  ASSERT_EQUAL_64(0xccddeeff, x3);
6145  ASSERT_EQUAL_64(0xccddeeff00112233, dst[1]);
6146  ASSERT_EQUAL_64(0x8899aabbccddeeff, x4);
6147  ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[2]);
6148  ASSERT_EQUAL_64(0xffeeddccbbaa9988, x5);
6149  ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[3]);
6150  ASSERT_EQUAL_64(0x8899aabb, x6);
6151  ASSERT_EQUAL_64(0xbbaa9988, x7);
6152  ASSERT_EQUAL_64(0xbbaa99888899aabb, dst[4]);
6153  ASSERT_EQUAL_64(0x8899aabbccddeeff, x8);
6154  ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[5]);
6155  ASSERT_EQUAL_64(0xffeeddccbbaa9988, x9);
6156  ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[6]);
6157  ASSERT_EQUAL_64(src_base - base_offset, x20);
6158  ASSERT_EQUAL_64(dst_base - base_offset, x21);
6159  ASSERT_EQUAL_64(src_base + base_offset + 24, x18);
6160  ASSERT_EQUAL_64(dst_base + base_offset + 56, x19);
6161
6162  TEARDOWN();
6163}
6164
6165
6166TEST(ldnp_stnp_offset) {
6167  SETUP();
6168
6169  uint64_t src[4] = {0x0011223344556677, 0x8899aabbccddeeff,
6170                     0xffeeddccbbaa9988, 0x7766554433221100};
6171  uint64_t dst[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
6172  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
6173  uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
6174
6175  START();
6176  __ Mov(x16, src_base);
6177  __ Mov(x17, dst_base);
6178  __ Mov(x18, src_base + 24);
6179  __ Mov(x19, dst_base + 64);
6180  __ Mov(x20, src_base + 32);
6181
6182  // Ensure address set up has happened before executing non-temporal ops.
6183  __ Dmb(InnerShareable, BarrierAll);
6184
6185  __ Ldnp(w0, w1, MemOperand(x16));
6186  __ Ldnp(w2, w3, MemOperand(x16, 4));
6187  __ Ldnp(x4, x5, MemOperand(x16, 8));
6188  __ Ldnp(w6, w7, MemOperand(x18, -12));
6189  __ Ldnp(x8, x9, MemOperand(x18, -16));
6190  __ Ldnp(q16, q17, MemOperand(x16));
6191  __ Ldnp(q19, q18, MemOperand(x20, -32));
6192  __ Stnp(w0, w1, MemOperand(x17));
6193  __ Stnp(w2, w3, MemOperand(x17, 8));
6194  __ Stnp(x4, x5, MemOperand(x17, 16));
6195  __ Stnp(w6, w7, MemOperand(x19, -32));
6196  __ Stnp(x8, x9, MemOperand(x19, -24));
6197  __ Stnp(q17, q16, MemOperand(x19));
6198  __ Stnp(q18, q19, MemOperand(x19, 32));
6199  END();
6200
6201  RUN();
6202
6203  ASSERT_EQUAL_64(0x44556677, x0);
6204  ASSERT_EQUAL_64(0x00112233, x1);
6205  ASSERT_EQUAL_64(0x0011223344556677, dst[0]);
6206  ASSERT_EQUAL_64(0x00112233, x2);
6207  ASSERT_EQUAL_64(0xccddeeff, x3);
6208  ASSERT_EQUAL_64(0xccddeeff00112233, dst[1]);
6209  ASSERT_EQUAL_64(0x8899aabbccddeeff, x4);
6210  ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[2]);
6211  ASSERT_EQUAL_64(0xffeeddccbbaa9988, x5);
6212  ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[3]);
6213  ASSERT_EQUAL_64(0x8899aabb, x6);
6214  ASSERT_EQUAL_64(0xbbaa9988, x7);
6215  ASSERT_EQUAL_64(0xbbaa99888899aabb, dst[4]);
6216  ASSERT_EQUAL_64(0x8899aabbccddeeff, x8);
6217  ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[5]);
6218  ASSERT_EQUAL_64(0xffeeddccbbaa9988, x9);
6219  ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[6]);
6220  ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x0011223344556677, q16);
6221  ASSERT_EQUAL_128(0x7766554433221100, 0xffeeddccbbaa9988, q17);
6222  ASSERT_EQUAL_128(0x7766554433221100, 0xffeeddccbbaa9988, q18);
6223  ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x0011223344556677, q19);
6224  ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[8]);
6225  ASSERT_EQUAL_64(0x7766554433221100, dst[9]);
6226  ASSERT_EQUAL_64(0x0011223344556677, dst[10]);
6227  ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[11]);
6228  ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[12]);
6229  ASSERT_EQUAL_64(0x7766554433221100, dst[13]);
6230  ASSERT_EQUAL_64(0x0011223344556677, dst[14]);
6231  ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[15]);
6232  ASSERT_EQUAL_64(src_base, x16);
6233  ASSERT_EQUAL_64(dst_base, x17);
6234  ASSERT_EQUAL_64(src_base + 24, x18);
6235  ASSERT_EQUAL_64(dst_base + 64, x19);
6236  ASSERT_EQUAL_64(src_base + 32, x20);
6237
6238  TEARDOWN();
6239}
6240
6241
6242TEST(ldnp_stnp_offset_float) {
6243  SETUP();
6244
6245  float src[3] = {1.2, 2.3, 3.4};
6246  float dst[6] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
6247  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
6248  uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
6249
6250  START();
6251  __ Mov(x16, src_base);
6252  __ Mov(x17, dst_base);
6253  __ Mov(x18, src_base + 12);
6254  __ Mov(x19, dst_base + 24);
6255
6256  // Ensure address set up has happened before executing non-temporal ops.
6257  __ Dmb(InnerShareable, BarrierAll);
6258
6259  __ Ldnp(s0, s1, MemOperand(x16));
6260  __ Ldnp(s2, s3, MemOperand(x16, 4));
6261  __ Ldnp(s5, s4, MemOperand(x18, -8));
6262  __ Stnp(s1, s0, MemOperand(x17));
6263  __ Stnp(s3, s2, MemOperand(x17, 8));
6264  __ Stnp(s4, s5, MemOperand(x19, -8));
6265  END();
6266
6267  RUN();
6268
6269  ASSERT_EQUAL_FP32(1.2, s0);
6270  ASSERT_EQUAL_FP32(2.3, s1);
6271  ASSERT_EQUAL_FP32(2.3, dst[0]);
6272  ASSERT_EQUAL_FP32(1.2, dst[1]);
6273  ASSERT_EQUAL_FP32(2.3, s2);
6274  ASSERT_EQUAL_FP32(3.4, s3);
6275  ASSERT_EQUAL_FP32(3.4, dst[2]);
6276  ASSERT_EQUAL_FP32(2.3, dst[3]);
6277  ASSERT_EQUAL_FP32(3.4, s4);
6278  ASSERT_EQUAL_FP32(2.3, s5);
6279  ASSERT_EQUAL_FP32(3.4, dst[4]);
6280  ASSERT_EQUAL_FP32(2.3, dst[5]);
6281  ASSERT_EQUAL_64(src_base, x16);
6282  ASSERT_EQUAL_64(dst_base, x17);
6283  ASSERT_EQUAL_64(src_base + 12, x18);
6284  ASSERT_EQUAL_64(dst_base + 24, x19);
6285
6286  TEARDOWN();
6287}
6288
6289
6290TEST(ldnp_stnp_offset_double) {
6291  SETUP();
6292
6293  double src[3] = {1.2, 2.3, 3.4};
6294  double dst[6] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
6295  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
6296  uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
6297
6298  START();
6299  __ Mov(x16, src_base);
6300  __ Mov(x17, dst_base);
6301  __ Mov(x18, src_base + 24);
6302  __ Mov(x19, dst_base + 48);
6303
6304  // Ensure address set up has happened before executing non-temporal ops.
6305  __ Dmb(InnerShareable, BarrierAll);
6306
6307  __ Ldnp(d0, d1, MemOperand(x16));
6308  __ Ldnp(d2, d3, MemOperand(x16, 8));
6309  __ Ldnp(d5, d4, MemOperand(x18, -16));
6310  __ Stnp(d1, d0, MemOperand(x17));
6311  __ Stnp(d3, d2, MemOperand(x17, 16));
6312  __ Stnp(d4, d5, MemOperand(x19, -16));
6313  END();
6314
6315  RUN();
6316
6317  ASSERT_EQUAL_FP64(1.2, d0);
6318  ASSERT_EQUAL_FP64(2.3, d1);
6319  ASSERT_EQUAL_FP64(2.3, dst[0]);
6320  ASSERT_EQUAL_FP64(1.2, dst[1]);
6321  ASSERT_EQUAL_FP64(2.3, d2);
6322  ASSERT_EQUAL_FP64(3.4, d3);
6323  ASSERT_EQUAL_FP64(3.4, dst[2]);
6324  ASSERT_EQUAL_FP64(2.3, dst[3]);
6325  ASSERT_EQUAL_FP64(3.4, d4);
6326  ASSERT_EQUAL_FP64(2.3, d5);
6327  ASSERT_EQUAL_FP64(3.4, dst[4]);
6328  ASSERT_EQUAL_FP64(2.3, dst[5]);
6329  ASSERT_EQUAL_64(src_base, x16);
6330  ASSERT_EQUAL_64(dst_base, x17);
6331  ASSERT_EQUAL_64(src_base + 24, x18);
6332  ASSERT_EQUAL_64(dst_base + 48, x19);
6333
6334  TEARDOWN();
6335}
6336
6337
6338TEST(ldp_stp_preindex) {
6339  SETUP();
6340
6341  uint64_t src[3] = {0x0011223344556677, 0x8899aabbccddeeff,
6342                     0xffeeddccbbaa9988};
6343  uint64_t dst[5] = {0, 0, 0, 0, 0};
6344  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
6345  uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
6346
6347  START();
6348  __ Mov(x16, src_base);
6349  __ Mov(x17, dst_base);
6350  __ Mov(x18, dst_base + 16);
6351  __ Ldp(w0, w1, MemOperand(x16, 4, PreIndex));
6352  __ Mov(x19, x16);
6353  __ Ldp(w2, w3, MemOperand(x16, -4, PreIndex));
6354  __ Stp(w2, w3, MemOperand(x17, 4, PreIndex));
6355  __ Mov(x20, x17);
6356  __ Stp(w0, w1, MemOperand(x17, -4, PreIndex));
6357  __ Ldp(x4, x5, MemOperand(x16, 8, PreIndex));
6358  __ Mov(x21, x16);
6359  __ Ldp(x6, x7, MemOperand(x16, -8, PreIndex));
6360  __ Stp(x7, x6, MemOperand(x18, 8, PreIndex));
6361  __ Mov(x22, x18);
6362  __ Stp(x5, x4, MemOperand(x18, -8, PreIndex));
6363  END();
6364
6365  RUN();
6366
6367  ASSERT_EQUAL_64(0x00112233, x0);
6368  ASSERT_EQUAL_64(0xccddeeff, x1);
6369  ASSERT_EQUAL_64(0x44556677, x2);
6370  ASSERT_EQUAL_64(0x00112233, x3);
6371  ASSERT_EQUAL_64(0xccddeeff00112233, dst[0]);
6372  ASSERT_EQUAL_64(0x0000000000112233, dst[1]);
6373  ASSERT_EQUAL_64(0x8899aabbccddeeff, x4);
6374  ASSERT_EQUAL_64(0xffeeddccbbaa9988, x5);
6375  ASSERT_EQUAL_64(0x0011223344556677, x6);
6376  ASSERT_EQUAL_64(0x8899aabbccddeeff, x7);
6377  ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[2]);
6378  ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[3]);
6379  ASSERT_EQUAL_64(0x0011223344556677, dst[4]);
6380  ASSERT_EQUAL_64(src_base, x16);
6381  ASSERT_EQUAL_64(dst_base, x17);
6382  ASSERT_EQUAL_64(dst_base + 16, x18);
6383  ASSERT_EQUAL_64(src_base + 4, x19);
6384  ASSERT_EQUAL_64(dst_base + 4, x20);
6385  ASSERT_EQUAL_64(src_base + 8, x21);
6386  ASSERT_EQUAL_64(dst_base + 24, x22);
6387
6388  TEARDOWN();
6389}
6390
6391
6392TEST(ldp_stp_preindex_wide) {
6393  SETUP();
6394
6395  uint64_t src[3] = {0x0011223344556677, 0x8899aabbccddeeff,
6396                     0xffeeddccbbaa9988};
6397  uint64_t dst[5] = {0, 0, 0, 0, 0};
6398  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
6399  uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
6400  // Move base too far from the array to force multiple instructions
6401  // to be emitted.
6402  const int64_t base_offset = 1024;
6403
6404  START();
6405  __ Mov(x24, src_base - base_offset);
6406  __ Mov(x25, dst_base + base_offset);
6407  __ Mov(x18, dst_base + base_offset + 16);
6408  __ Ldp(w0, w1, MemOperand(x24, base_offset + 4, PreIndex));
6409  __ Mov(x19, x24);
6410  __ Mov(x24, src_base - base_offset + 4);
6411  __ Ldp(w2, w3, MemOperand(x24, base_offset - 4, PreIndex));
6412  __ Stp(w2, w3, MemOperand(x25, 4 - base_offset , PreIndex));
6413  __ Mov(x20, x25);
6414  __ Mov(x25, dst_base + base_offset + 4);
6415  __ Mov(x24, src_base - base_offset);
6416  __ Stp(w0, w1, MemOperand(x25, -4 - base_offset, PreIndex));
6417  __ Ldp(x4, x5, MemOperand(x24, base_offset + 8, PreIndex));
6418  __ Mov(x21, x24);
6419  __ Mov(x24, src_base - base_offset + 8);
6420  __ Ldp(x6, x7, MemOperand(x24, base_offset - 8, PreIndex));
6421  __ Stp(x7, x6, MemOperand(x18, 8 - base_offset, PreIndex));
6422  __ Mov(x22, x18);
6423  __ Mov(x18, dst_base + base_offset + 16 + 8);
6424  __ Stp(x5, x4, MemOperand(x18, -8 - base_offset, PreIndex));
6425  END();
6426
6427  RUN();
6428
6429  ASSERT_EQUAL_64(0x00112233, x0);
6430  ASSERT_EQUAL_64(0xccddeeff, x1);
6431  ASSERT_EQUAL_64(0x44556677, x2);
6432  ASSERT_EQUAL_64(0x00112233, x3);
6433  ASSERT_EQUAL_64(0xccddeeff00112233, dst[0]);
6434  ASSERT_EQUAL_64(0x0000000000112233, dst[1]);
6435  ASSERT_EQUAL_64(0x8899aabbccddeeff, x4);
6436  ASSERT_EQUAL_64(0xffeeddccbbaa9988, x5);
6437  ASSERT_EQUAL_64(0x0011223344556677, x6);
6438  ASSERT_EQUAL_64(0x8899aabbccddeeff, x7);
6439  ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[2]);
6440  ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[3]);
6441  ASSERT_EQUAL_64(0x0011223344556677, dst[4]);
6442  ASSERT_EQUAL_64(src_base, x24);
6443  ASSERT_EQUAL_64(dst_base, x25);
6444  ASSERT_EQUAL_64(dst_base + 16, x18);
6445  ASSERT_EQUAL_64(src_base + 4, x19);
6446  ASSERT_EQUAL_64(dst_base + 4, x20);
6447  ASSERT_EQUAL_64(src_base + 8, x21);
6448  ASSERT_EQUAL_64(dst_base + 24, x22);
6449
6450  TEARDOWN();
6451}
6452
6453
6454TEST(ldp_stp_postindex) {
6455  SETUP();
6456
6457  uint64_t src[4] = {0x0011223344556677, 0x8899aabbccddeeff,
6458                     0xffeeddccbbaa9988, 0x7766554433221100};
6459  uint64_t dst[5] = {0, 0, 0, 0, 0};
6460  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
6461  uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
6462
6463  START();
6464  __ Mov(x16, src_base);
6465  __ Mov(x17, dst_base);
6466  __ Mov(x18, dst_base + 16);
6467  __ Ldp(w0, w1, MemOperand(x16, 4, PostIndex));
6468  __ Mov(x19, x16);
6469  __ Ldp(w2, w3, MemOperand(x16, -4, PostIndex));
6470  __ Stp(w2, w3, MemOperand(x17, 4, PostIndex));
6471  __ Mov(x20, x17);
6472  __ Stp(w0, w1, MemOperand(x17, -4, PostIndex));
6473  __ Ldp(x4, x5, MemOperand(x16, 8, PostIndex));
6474  __ Mov(x21, x16);
6475  __ Ldp(x6, x7, MemOperand(x16, -8, PostIndex));
6476  __ Stp(x7, x6, MemOperand(x18, 8, PostIndex));
6477  __ Mov(x22, x18);
6478  __ Stp(x5, x4, MemOperand(x18, -8, PostIndex));
6479  END();
6480
6481  RUN();
6482
6483  ASSERT_EQUAL_64(0x44556677, x0);
6484  ASSERT_EQUAL_64(0x00112233, x1);
6485  ASSERT_EQUAL_64(0x00112233, x2);
6486  ASSERT_EQUAL_64(0xccddeeff, x3);
6487  ASSERT_EQUAL_64(0x4455667700112233, dst[0]);
6488  ASSERT_EQUAL_64(0x0000000000112233, dst[1]);
6489  ASSERT_EQUAL_64(0x0011223344556677, x4);
6490  ASSERT_EQUAL_64(0x8899aabbccddeeff, x5);
6491  ASSERT_EQUAL_64(0x8899aabbccddeeff, x6);
6492  ASSERT_EQUAL_64(0xffeeddccbbaa9988, x7);
6493  ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[2]);
6494  ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[3]);
6495  ASSERT_EQUAL_64(0x0011223344556677, dst[4]);
6496  ASSERT_EQUAL_64(src_base, x16);
6497  ASSERT_EQUAL_64(dst_base, x17);
6498  ASSERT_EQUAL_64(dst_base + 16, x18);
6499  ASSERT_EQUAL_64(src_base + 4, x19);
6500  ASSERT_EQUAL_64(dst_base + 4, x20);
6501  ASSERT_EQUAL_64(src_base + 8, x21);
6502  ASSERT_EQUAL_64(dst_base + 24, x22);
6503
6504  TEARDOWN();
6505}
6506
6507
6508TEST(ldp_stp_postindex_wide) {
6509  SETUP();
6510
6511  uint64_t src[4] = {0x0011223344556677, 0x8899aabbccddeeff,
6512                     0xffeeddccbbaa9988, 0x7766554433221100};
6513  uint64_t dst[5] = {0, 0, 0, 0, 0};
6514  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
6515  uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
6516  // Move base too far from the array to force multiple instructions
6517  // to be emitted.
6518  const int64_t base_offset = 1024;
6519
6520  START();
6521  __ Mov(x24, src_base);
6522  __ Mov(x25, dst_base);
6523  __ Mov(x18, dst_base + 16);
6524  __ Ldp(w0, w1, MemOperand(x24, base_offset + 4, PostIndex));
6525  __ Mov(x19, x24);
6526  __ Sub(x24, x24, base_offset);
6527  __ Ldp(w2, w3, MemOperand(x24, base_offset - 4, PostIndex));
6528  __ Stp(w2, w3, MemOperand(x25, 4 - base_offset, PostIndex));
6529  __ Mov(x20, x25);
6530  __ Sub(x24, x24, base_offset);
6531  __ Add(x25, x25, base_offset);
6532  __ Stp(w0, w1, MemOperand(x25, -4 - base_offset, PostIndex));
6533  __ Ldp(x4, x5, MemOperand(x24, base_offset + 8, PostIndex));
6534  __ Mov(x21, x24);
6535  __ Sub(x24, x24, base_offset);
6536  __ Ldp(x6, x7, MemOperand(x24, base_offset - 8, PostIndex));
6537  __ Stp(x7, x6, MemOperand(x18, 8 - base_offset, PostIndex));
6538  __ Mov(x22, x18);
6539  __ Add(x18, x18, base_offset);
6540  __ Stp(x5, x4, MemOperand(x18, -8 - base_offset, PostIndex));
6541  END();
6542
6543  RUN();
6544
6545  ASSERT_EQUAL_64(0x44556677, x0);
6546  ASSERT_EQUAL_64(0x00112233, x1);
6547  ASSERT_EQUAL_64(0x00112233, x2);
6548  ASSERT_EQUAL_64(0xccddeeff, x3);
6549  ASSERT_EQUAL_64(0x4455667700112233, dst[0]);
6550  ASSERT_EQUAL_64(0x0000000000112233, dst[1]);
6551  ASSERT_EQUAL_64(0x0011223344556677, x4);
6552  ASSERT_EQUAL_64(0x8899aabbccddeeff, x5);
6553  ASSERT_EQUAL_64(0x8899aabbccddeeff, x6);
6554  ASSERT_EQUAL_64(0xffeeddccbbaa9988, x7);
6555  ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[2]);
6556  ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[3]);
6557  ASSERT_EQUAL_64(0x0011223344556677, dst[4]);
6558  ASSERT_EQUAL_64(src_base + base_offset, x24);
6559  ASSERT_EQUAL_64(dst_base - base_offset, x25);
6560  ASSERT_EQUAL_64(dst_base - base_offset + 16, x18);
6561  ASSERT_EQUAL_64(src_base + base_offset + 4, x19);
6562  ASSERT_EQUAL_64(dst_base - base_offset + 4, x20);
6563  ASSERT_EQUAL_64(src_base + base_offset + 8, x21);
6564  ASSERT_EQUAL_64(dst_base - base_offset + 24, x22);
6565
6566  TEARDOWN();
6567}
6568
6569
6570TEST(ldp_sign_extend) {
6571  SETUP();
6572
6573  uint32_t src[2] = {0x80000000, 0x7fffffff};
6574  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
6575
6576  START();
6577  __ Mov(x24, src_base);
6578  __ Ldpsw(x0, x1, MemOperand(x24));
6579  END();
6580
6581  RUN();
6582
6583  ASSERT_EQUAL_64(0xffffffff80000000, x0);
6584  ASSERT_EQUAL_64(0x000000007fffffff, x1);
6585
6586  TEARDOWN();
6587}
6588
6589
6590TEST(ldur_stur) {
6591  SETUP();
6592
6593  int64_t src[2] = {0x0123456789abcdef, 0x0123456789abcdef};
6594  int64_t dst[5] = {0, 0, 0, 0, 0};
6595  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
6596  uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
6597
6598  START();
6599  __ Mov(x17, src_base);
6600  __ Mov(x18, dst_base);
6601  __ Mov(x19, src_base + 16);
6602  __ Mov(x20, dst_base + 32);
6603  __ Mov(x21, dst_base + 40);
6604  __ Ldr(w0, MemOperand(x17, 1));
6605  __ Str(w0, MemOperand(x18, 2));
6606  __ Ldr(x1, MemOperand(x17, 3));
6607  __ Str(x1, MemOperand(x18, 9));
6608  __ Ldr(w2, MemOperand(x19, -9));
6609  __ Str(w2, MemOperand(x20, -5));
6610  __ Ldrb(w3, MemOperand(x19, -1));
6611  __ Strb(w3, MemOperand(x21, -1));
6612  END();
6613
6614  RUN();
6615
6616  ASSERT_EQUAL_64(0x6789abcd, x0);
6617  ASSERT_EQUAL_64(0x00006789abcd0000, dst[0]);
6618  ASSERT_EQUAL_64(0xabcdef0123456789, x1);
6619  ASSERT_EQUAL_64(0xcdef012345678900, dst[1]);
6620  ASSERT_EQUAL_64(0x000000ab, dst[2]);
6621  ASSERT_EQUAL_64(0xabcdef01, x2);
6622  ASSERT_EQUAL_64(0x00abcdef01000000, dst[3]);
6623  ASSERT_EQUAL_64(0x00000001, x3);
6624  ASSERT_EQUAL_64(0x0100000000000000, dst[4]);
6625  ASSERT_EQUAL_64(src_base, x17);
6626  ASSERT_EQUAL_64(dst_base, x18);
6627  ASSERT_EQUAL_64(src_base + 16, x19);
6628  ASSERT_EQUAL_64(dst_base + 32, x20);
6629
6630  TEARDOWN();
6631}
6632
6633
6634TEST(ldur_stur_fp) {
6635  SETUP();
6636
6637  int64_t src[3] = {0x0123456789abcdef, 0x0123456789abcdef,
6638                    0x0123456789abcdef};
6639  int64_t dst[5] = {0, 0, 0, 0, 0};
6640  uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
6641  uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
6642
6643  START();
6644  __ Mov(x17, src_base);
6645  __ Mov(x18, dst_base);
6646  __ Ldr(b0, MemOperand(x17));
6647  __ Str(b0, MemOperand(x18));
6648  __ Ldr(h1, MemOperand(x17, 1));
6649  __ Str(h1, MemOperand(x18, 1));
6650  __ Ldr(s2, MemOperand(x17, 2));
6651  __ Str(s2, MemOperand(x18, 3));
6652  __ Ldr(d3, MemOperand(x17, 3));
6653  __ Str(d3, MemOperand(x18, 7));
6654  __ Ldr(q4, MemOperand(x17, 4));
6655  __ Str(q4, MemOperand(x18, 15));
6656  END();
6657
6658  RUN();
6659
6660  ASSERT_EQUAL_128(0, 0xef, q0);
6661  ASSERT_EQUAL_128(0, 0xabcd, q1);
6662  ASSERT_EQUAL_128(0, 0x456789ab, q2);
6663  ASSERT_EQUAL_128(0, 0xabcdef0123456789, q3);
6664  ASSERT_EQUAL_128(0x89abcdef01234567, 0x89abcdef01234567, q4);
6665  ASSERT_EQUAL_64(0x89456789ababcdef, dst[0]);
6666  ASSERT_EQUAL_64(0x67abcdef01234567, dst[1]);
6667  ASSERT_EQUAL_64(0x6789abcdef012345, dst[2]);
6668  ASSERT_EQUAL_64(0x0089abcdef012345, dst[3]);
6669
6670  TEARDOWN();
6671}
6672
6673
6674TEST(ldr_literal) {
6675  SETUP();
6676
6677  START();
6678  __ Ldr(x2, 0x1234567890abcdef);
6679  __ Ldr(w3, 0xfedcba09);
6680  __ Ldrsw(x4, 0x7fffffff);
6681  __ Ldrsw(x5, 0x80000000);
6682  __ Ldr(q11, 0x1234000056780000, 0xabcd0000ef000000);
6683  __ Ldr(d13, 1.234);
6684  __ Ldr(s25, 2.5);
6685  END();
6686
6687  RUN();
6688
6689  ASSERT_EQUAL_64(0x1234567890abcdef, x2);
6690  ASSERT_EQUAL_64(0xfedcba09, x3);
6691  ASSERT_EQUAL_64(0x7fffffff, x4);
6692  ASSERT_EQUAL_64(0xffffffff80000000, x5);
6693  ASSERT_EQUAL_128(0x1234000056780000, 0xabcd0000ef000000, q11);
6694  ASSERT_EQUAL_FP64(1.234, d13);
6695  ASSERT_EQUAL_FP32(2.5, s25);
6696
6697  TEARDOWN();
6698}
6699
6700
6701TEST(ldr_literal_range) {
6702  SETUP();
6703
6704  START();
6705  // Make sure the pool is empty;
6706  masm.EmitLiteralPool(LiteralPool::kBranchRequired);
6707  ASSERT_LITERAL_POOL_SIZE(0);
6708
6709  // Create some literal pool entries.
6710  __ Ldr(x0, 0x1234567890abcdef);
6711  __ Ldr(w1, 0xfedcba09);
6712  __ Ldrsw(x2, 0x7fffffff);
6713  __ Ldrsw(x3, 0x80000000);
6714  __ Ldr(q2, 0x1234000056780000, 0xabcd0000ef000000);
6715  __ Ldr(d0, 1.234);
6716  __ Ldr(s1, 2.5);
6717  ASSERT_LITERAL_POOL_SIZE(48);
6718
6719  // Emit more code than the maximum literal load range to ensure the pool
6720  // should be emitted.
6721  const ptrdiff_t end = masm.CursorOffset() + 2 * kMaxLoadLiteralRange;
6722  while (masm.CursorOffset() < end) {
6723    __ Nop();
6724  }
6725
6726  // The pool should have been emitted.
6727  ASSERT_LITERAL_POOL_SIZE(0);
6728
6729  // These loads should be after the pool (and will require a new one).
6730  __ Ldr(x4, 0x34567890abcdef12);
6731  __ Ldr(w5, 0xdcba09fe);
6732  __ Ldrsw(x6, 0x7fffffff);
6733  __ Ldrsw(x7, 0x80000000);
6734  __ Ldr(q6, 0x1234000056780000, 0xabcd0000ef000000);
6735  __ Ldr(d4, 123.4);
6736  __ Ldr(s5, 250.0);
6737  ASSERT_LITERAL_POOL_SIZE(48);
6738  END();
6739
6740  RUN();
6741
6742  // Check that the literals loaded correctly.
6743  ASSERT_EQUAL_64(0x1234567890abcdef, x0);
6744  ASSERT_EQUAL_64(0xfedcba09, x1);
6745  ASSERT_EQUAL_64(0x7fffffff, x2);
6746  ASSERT_EQUAL_64(0xffffffff80000000, x3);
6747  ASSERT_EQUAL_128(0x1234000056780000, 0xabcd0000ef000000, q2);
6748  ASSERT_EQUAL_FP64(1.234, d0);
6749  ASSERT_EQUAL_FP32(2.5, s1);
6750  ASSERT_EQUAL_64(0x34567890abcdef12, x4);
6751  ASSERT_EQUAL_64(0xdcba09fe, x5);
6752  ASSERT_EQUAL_64(0x7fffffff, x6);
6753  ASSERT_EQUAL_64(0xffffffff80000000, x7);
6754  ASSERT_EQUAL_128(0x1234000056780000, 0xabcd0000ef000000, q6);
6755  ASSERT_EQUAL_FP64(123.4, d4);
6756  ASSERT_EQUAL_FP32(250.0, s5);
6757
6758  TEARDOWN();
6759}
6760
6761
6762TEST(ldr_literal_values_q) {
6763  SETUP();
6764
6765  static const uint64_t kHalfValues[] = {
6766    0x8000000000000000, 0x7fffffffffffffff, 0x0000000000000000,
6767    0xffffffffffffffff, 0x00ff00ff00ff00ff, 0x1234567890abcdef
6768  };
6769  const int card = sizeof(kHalfValues) / sizeof(kHalfValues[0]);
6770  const Register& ref_low64 = x1;
6771  const Register& ref_high64 = x2;
6772  const Register& loaded_low64 = x3;
6773  const Register& loaded_high64 = x4;
6774  const VRegister& tgt = q0;
6775
6776  START();
6777  __ Mov(x0, 0);
6778
6779  for (int i = 0; i < card; i++) {
6780    __ Mov(ref_low64, kHalfValues[i]);
6781    for (int j = 0; j < card; j++) {
6782      __ Mov(ref_high64, kHalfValues[j]);
6783      __ Ldr(tgt, kHalfValues[j], kHalfValues[i]);
6784      __ Mov(loaded_low64, tgt.V2D(), 0);
6785      __ Mov(loaded_high64, tgt.V2D(), 1);
6786      __ Cmp(loaded_low64, ref_low64);
6787      __ Ccmp(loaded_high64, ref_high64, NoFlag, eq);
6788      __ Cset(x0, ne);
6789    }
6790  }
6791  END();
6792
6793  RUN();
6794
6795  // If one of the values differs, the trace can be used to identify which one.
6796  ASSERT_EQUAL_64(0, x0);
6797
6798  TEARDOWN();
6799}
6800
6801
6802template <typename T>
6803void LoadIntValueHelper(T values[], int card) {
6804  SETUP();
6805
6806  const bool is_32bits = (sizeof(T) == 4);
6807  const Register& tgt1 = is_32bits ? w1 : x1;
6808  const Register& tgt2 = is_32bits ? w2 : x2;
6809
6810  START();
6811  __ Mov(x0, 0);
6812
6813  // If one of the values differ then x0 will be one.
6814  for (int i = 0; i < card; ++i) {
6815    __ Mov(tgt1, values[i]);
6816    __ Ldr(tgt2, values[i]);
6817    __ Cmp(tgt1, tgt2);
6818    __ Cset(x0, ne);
6819  }
6820  END();
6821
6822  RUN();
6823
6824  // If one of the values differs, the trace can be used to identify which one.
6825  ASSERT_EQUAL_64(0, x0);
6826
6827  TEARDOWN();
6828}
6829
6830
6831TEST(ldr_literal_values_x) {
6832  static const uint64_t kValues[] = {
6833    0x8000000000000000, 0x7fffffffffffffff, 0x0000000000000000,
6834    0xffffffffffffffff, 0x00ff00ff00ff00ff, 0x1234567890abcdef
6835  };
6836
6837  LoadIntValueHelper(kValues, sizeof(kValues) / sizeof(kValues[0]));
6838}
6839
6840
6841TEST(ldr_literal_values_w) {
6842  static const uint32_t kValues[] = {
6843    0x80000000, 0x7fffffff, 0x00000000, 0xffffffff, 0x00ff00ff, 0x12345678,
6844    0x90abcdef
6845  };
6846
6847  LoadIntValueHelper(kValues, sizeof(kValues) / sizeof(kValues[0]));
6848}
6849
6850
6851template <typename T>
6852void LoadFPValueHelper(T values[], int card) {
6853  SETUP();
6854
6855  const bool is_32bits = (sizeof(T) == 4);
6856  const FPRegister& fp_tgt = is_32bits ? s2 : d2;
6857  const Register& tgt1 = is_32bits ? w1 : x1;
6858  const Register& tgt2 = is_32bits ? w2 : x2;
6859
6860  START();
6861  __ Mov(x0, 0);
6862
6863  // If one of the values differ then x0 will be one.
6864  for (int i = 0; i < card; ++i) {
6865    __ Mov(tgt1, is_32bits ? float_to_rawbits(values[i])
6866                           : double_to_rawbits(values[i]));
6867    __ Ldr(fp_tgt, values[i]);
6868    __ Fmov(tgt2, fp_tgt);
6869    __ Cmp(tgt1, tgt2);
6870    __ Cset(x0, ne);
6871  }
6872  END();
6873
6874  RUN();
6875
6876  // If one of the values differs, the trace can be used to identify which one.
6877  ASSERT_EQUAL_64(0, x0);
6878
6879  TEARDOWN();
6880}
6881
6882TEST(ldr_literal_values_d) {
6883  static const double kValues[] = {
6884    -0.0, 0.0, -1.0, 1.0, -1e10, 1e10
6885  };
6886
6887  LoadFPValueHelper(kValues, sizeof(kValues) / sizeof(kValues[0]));
6888}
6889
6890
6891TEST(ldr_literal_values_s) {
6892  static const float kValues[] = {
6893    -0.0, 0.0, -1.0, 1.0, -1e10, 1e10
6894  };
6895
6896  LoadFPValueHelper(kValues, sizeof(kValues) / sizeof(kValues[0]));
6897}
6898
6899
6900TEST(ldr_literal_custom) {
6901  SETUP();
6902  ALLOW_ASM();
6903
6904  Label end_of_pool_before;
6905  Label end_of_pool_after;
6906  Literal<uint64_t> before_x(0x1234567890abcdef);
6907  Literal<uint32_t> before_w(0xfedcba09);
6908  Literal<uint32_t> before_sx(0x80000000);
6909  Literal<uint64_t> before_q(0x1234000056780000, 0xabcd0000ef000000);
6910  Literal<double> before_d(1.234);
6911  Literal<float> before_s(2.5);
6912  Literal<uint64_t> after_x(0x1234567890abcdef);
6913  Literal<uint32_t> after_w(0xfedcba09);
6914  Literal<uint32_t> after_sx(0x80000000);
6915  Literal<uint64_t> after_q(0x1234000056780000, 0xabcd0000ef000000);
6916  Literal<double> after_d(1.234);
6917  Literal<float> after_s(2.5);
6918
6919  START();
6920
6921  // Manually generate a pool.
6922  __ B(&end_of_pool_before);
6923  __ place(&before_x);
6924  __ place(&before_w);
6925  __ place(&before_sx);
6926  __ place(&before_q);
6927  __ place(&before_d);
6928  __ place(&before_s);
6929  __ Bind(&end_of_pool_before);
6930
6931  __ ldr(x2, &before_x);
6932  __ ldr(w3, &before_w);
6933  __ ldrsw(x5, &before_sx);
6934  __ ldr(q11, &before_q);
6935  __ ldr(d13, &before_d);
6936  __ ldr(s25, &before_s);
6937
6938  __ ldr(x6, &after_x);
6939  __ ldr(w7, &after_w);
6940  __ ldrsw(x8, &after_sx);
6941  __ ldr(q18, &after_q);
6942  __ ldr(d14, &after_d);
6943  __ ldr(s26, &after_s);
6944
6945  // Manually generate a pool.
6946  __ B(&end_of_pool_after);
6947  __ place(&after_x);
6948  __ place(&after_w);
6949  __ place(&after_sx);
6950  __ place(&after_q);
6951  __ place(&after_d);
6952  __ place(&after_s);
6953  __ Bind(&end_of_pool_after);
6954
6955  END();
6956
6957  RUN();
6958
6959  ASSERT_EQUAL_64(0x1234567890abcdef, x2);
6960  ASSERT_EQUAL_64(0xfedcba09, x3);
6961  ASSERT_EQUAL_64(0xffffffff80000000, x5);
6962  ASSERT_EQUAL_128(0x1234000056780000, 0xabcd0000ef000000, q11);
6963  ASSERT_EQUAL_FP64(1.234, d13);
6964  ASSERT_EQUAL_FP32(2.5, s25);
6965
6966  ASSERT_EQUAL_64(0x1234567890abcdef, x6);
6967  ASSERT_EQUAL_64(0xfedcba09, x7);
6968  ASSERT_EQUAL_64(0xffffffff80000000, x8);
6969  ASSERT_EQUAL_128(0x1234000056780000, 0xabcd0000ef000000, q18);
6970  ASSERT_EQUAL_FP64(1.234, d14);
6971  ASSERT_EQUAL_FP32(2.5, s26);
6972
6973  TEARDOWN();
6974}
6975
6976
6977TEST(ldr_literal_custom_shared) {
6978  SETUP();
6979  ALLOW_ASM();
6980
6981  Label end_of_pool_before;
6982  Label end_of_pool_after;
6983  Literal<uint64_t> before_x(0x1234567890abcdef);
6984  Literal<uint32_t> before_w(0xfedcba09);
6985  Literal<uint64_t> before_q(0x1234000056780000, 0xabcd0000ef000000);
6986  Literal<double> before_d(1.234);
6987  Literal<float> before_s(2.5);
6988  Literal<uint64_t> after_x(0x1234567890abcdef);
6989  Literal<uint32_t> after_w(0xfedcba09);
6990  Literal<uint64_t> after_q(0x1234000056780000, 0xabcd0000ef000000);
6991  Literal<double> after_d(1.234);
6992  Literal<float> after_s(2.5);
6993
6994  START();
6995
6996  // Manually generate a pool.
6997  __ B(&end_of_pool_before);
6998  __ place(&before_x);
6999  __ place(&before_w);
7000  __ place(&before_q);
7001  __ place(&before_d);
7002  __ place(&before_s);
7003  __ Bind(&end_of_pool_before);
7004
7005  // Load the entries several times to test that literals can be shared.
7006  for (int i = 0; i < 50; i++) {
7007    __ ldr(x2, &before_x);
7008    __ ldr(w3, &before_w);
7009    __ ldrsw(x5, &before_w);    // Re-use before_w.
7010    __ ldr(q11, &before_q);
7011    __ ldr(d13, &before_d);
7012    __ ldr(s25, &before_s);
7013
7014    __ ldr(x6, &after_x);
7015    __ ldr(w7, &after_w);
7016    __ ldrsw(x8, &after_w);     // Re-use after_w.
7017    __ ldr(q18, &after_q);
7018    __ ldr(d14, &after_d);
7019    __ ldr(s26, &after_s);
7020  }
7021
7022  // Manually generate a pool.
7023  __ B(&end_of_pool_after);
7024  __ place(&after_x);
7025  __ place(&after_w);
7026  __ place(&after_q);
7027  __ place(&after_d);
7028  __ place(&after_s);
7029  __ Bind(&end_of_pool_after);
7030
7031  END();
7032
7033  RUN();
7034
7035  ASSERT_EQUAL_64(0x1234567890abcdef, x2);
7036  ASSERT_EQUAL_64(0xfedcba09, x3);
7037  ASSERT_EQUAL_64(0xfffffffffedcba09, x5);
7038  ASSERT_EQUAL_128(0x1234000056780000, 0xabcd0000ef000000, q11);
7039  ASSERT_EQUAL_FP64(1.234, d13);
7040  ASSERT_EQUAL_FP32(2.5, s25);
7041
7042  ASSERT_EQUAL_64(0x1234567890abcdef, x6);
7043  ASSERT_EQUAL_64(0xfedcba09, x7);
7044  ASSERT_EQUAL_64(0xfffffffffedcba09, x8);
7045  ASSERT_EQUAL_128(0x1234000056780000, 0xabcd0000ef000000, q18);
7046  ASSERT_EQUAL_FP64(1.234, d14);
7047  ASSERT_EQUAL_FP32(2.5, s26);
7048
7049  TEARDOWN();
7050}
7051
7052
7053TEST(prfm_offset) {
7054  SETUP();
7055
7056  START();
7057  // The address used in prfm doesn't have to be valid.
7058  __ Mov(x0, 0x0123456789abcdef);
7059
7060  for (int i = 0; i < (1 << ImmPrefetchOperation_width); i++) {
7061    // Unallocated prefetch operations are ignored, so test all of them.
7062    PrefetchOperation op = static_cast<PrefetchOperation>(i);
7063
7064    __ Prfm(op, MemOperand(x0));
7065    __ Prfm(op, MemOperand(x0, 8));
7066    __ Prfm(op, MemOperand(x0, 32760));
7067    __ Prfm(op, MemOperand(x0, 32768));
7068
7069    __ Prfm(op, MemOperand(x0, 1));
7070    __ Prfm(op, MemOperand(x0, 9));
7071    __ Prfm(op, MemOperand(x0, 255));
7072    __ Prfm(op, MemOperand(x0, 257));
7073    __ Prfm(op, MemOperand(x0, -1));
7074    __ Prfm(op, MemOperand(x0, -9));
7075    __ Prfm(op, MemOperand(x0, -255));
7076    __ Prfm(op, MemOperand(x0, -257));
7077
7078    __ Prfm(op, MemOperand(x0, 0xfedcba9876543210));
7079  }
7080
7081  END();
7082  RUN();
7083  TEARDOWN();
7084}
7085
7086
7087TEST(prfm_regoffset) {
7088  SETUP();
7089
7090  START();
7091  // The address used in prfm doesn't have to be valid.
7092  __ Mov(x0, 0x0123456789abcdef);
7093
7094  CPURegList inputs(CPURegister::kRegister, kXRegSize, 10, 18);
7095  __ Mov(x10, 0);
7096  __ Mov(x11, 1);
7097  __ Mov(x12, 8);
7098  __ Mov(x13, 255);
7099  __ Mov(x14, -0);
7100  __ Mov(x15, -1);
7101  __ Mov(x16, -8);
7102  __ Mov(x17, -255);
7103  __ Mov(x18, 0xfedcba9876543210);
7104
7105  for (int i = 0; i < (1 << ImmPrefetchOperation_width); i++) {
7106    // Unallocated prefetch operations are ignored, so test all of them.
7107    PrefetchOperation op = static_cast<PrefetchOperation>(i);
7108
7109    CPURegList loop = inputs;
7110    while (!loop.IsEmpty()) {
7111      Register input(loop.PopLowestIndex());
7112      __ Prfm(op, MemOperand(x0, input));
7113      __ Prfm(op, MemOperand(x0, input, UXTW));
7114      __ Prfm(op, MemOperand(x0, input, UXTW, 3));
7115      __ Prfm(op, MemOperand(x0, input, LSL));
7116      __ Prfm(op, MemOperand(x0, input, LSL, 3));
7117      __ Prfm(op, MemOperand(x0, input, SXTW));
7118      __ Prfm(op, MemOperand(x0, input, SXTW, 3));
7119      __ Prfm(op, MemOperand(x0, input, SXTX));
7120      __ Prfm(op, MemOperand(x0, input, SXTX, 3));
7121    }
7122  }
7123
7124  END();
7125  RUN();
7126  TEARDOWN();
7127}
7128
7129
7130TEST(prfm_literal_imm19) {
7131  SETUP();
7132  ALLOW_ASM();
7133  START();
7134
7135  for (int i = 0; i < (1 << ImmPrefetchOperation_width); i++) {
7136    // Unallocated prefetch operations are ignored, so test all of them.
7137    PrefetchOperation op = static_cast<PrefetchOperation>(i);
7138
7139    // The address used in prfm doesn't have to be valid.
7140    __ prfm(op, 0);
7141    __ prfm(op, 1);
7142    __ prfm(op, -1);
7143    __ prfm(op, 1000);
7144    __ prfm(op, -1000);
7145    __ prfm(op, 0x3ffff);
7146    __ prfm(op, -0x40000);
7147  }
7148
7149  END();
7150  RUN();
7151  TEARDOWN();
7152}
7153
7154
7155TEST(prfm_literal) {
7156  SETUP();
7157  ALLOW_ASM();
7158
7159  Label end_of_pool_before;
7160  Label end_of_pool_after;
7161  Literal<uint64_t> before(0);
7162  Literal<uint64_t> after(0);
7163
7164  START();
7165
7166  // Manually generate a pool.
7167  __ B(&end_of_pool_before);
7168  __ place(&before);
7169  __ Bind(&end_of_pool_before);
7170
7171  for (int i = 0; i < (1 << ImmPrefetchOperation_width); i++) {
7172    // Unallocated prefetch operations are ignored, so test all of them.
7173    PrefetchOperation op = static_cast<PrefetchOperation>(i);
7174
7175    CodeBufferCheckScope guard(&masm, 2 * kInstructionSize);
7176    __ prfm(op, &before);
7177    __ prfm(op, &after);
7178  }
7179
7180  // Manually generate a pool.
7181  __ B(&end_of_pool_after);
7182  __ place(&after);
7183  __ Bind(&end_of_pool_after);
7184
7185  END();
7186  RUN();
7187  TEARDOWN();
7188}
7189
7190
7191TEST(prfm_wide) {
7192  SETUP();
7193
7194  START();
7195  // The address used in prfm doesn't have to be valid.
7196  __ Mov(x0, 0x0123456789abcdef);
7197
7198  for (int i = 0; i < (1 << ImmPrefetchOperation_width); i++) {
7199    // Unallocated prefetch operations are ignored, so test all of them.
7200    PrefetchOperation op = static_cast<PrefetchOperation>(i);
7201
7202    __ Prfm(op, MemOperand(x0, 0x40000));
7203    __ Prfm(op, MemOperand(x0, -0x40001));
7204    __ Prfm(op, MemOperand(x0, UINT64_C(0x5555555555555555)));
7205    __ Prfm(op, MemOperand(x0, UINT64_C(0xfedcba9876543210)));
7206  }
7207
7208  END();
7209  RUN();
7210  TEARDOWN();
7211}
7212
7213
7214TEST(load_prfm_literal) {
7215  // Test literals shared between both prfm and ldr.
7216  SETUP();
7217  ALLOW_ASM();
7218
7219  Label end_of_pool_before;
7220  Label end_of_pool_after;
7221  Literal<uint64_t> before_x(0x1234567890abcdef);
7222  Literal<uint32_t> before_w(0xfedcba09);
7223  Literal<uint32_t> before_sx(0x80000000);
7224  Literal<double> before_d(1.234);
7225  Literal<float> before_s(2.5);
7226  Literal<uint64_t> after_x(0x1234567890abcdef);
7227  Literal<uint32_t> after_w(0xfedcba09);
7228  Literal<uint32_t> after_sx(0x80000000);
7229  Literal<double> after_d(1.234);
7230  Literal<float> after_s(2.5);
7231
7232  START();
7233
7234  // Manually generate a pool.
7235  __ B(&end_of_pool_before);
7236  __ place(&before_x);
7237  __ place(&before_w);
7238  __ place(&before_sx);
7239  __ place(&before_d);
7240  __ place(&before_s);
7241  __ Bind(&end_of_pool_before);
7242
7243  for (int i = 0; i < (1 << ImmPrefetchOperation_width); i++) {
7244    // Unallocated prefetch operations are ignored, so test all of them.
7245    PrefetchOperation op = static_cast<PrefetchOperation>(i);
7246
7247    __ prfm(op, &before_x);
7248    __ prfm(op, &before_w);
7249    __ prfm(op, &before_sx);
7250    __ prfm(op, &before_d);
7251    __ prfm(op, &before_s);
7252
7253    __ prfm(op, &after_x);
7254    __ prfm(op, &after_w);
7255    __ prfm(op, &after_sx);
7256    __ prfm(op, &after_d);
7257    __ prfm(op, &after_s);
7258  }
7259
7260  __ ldr(x2, &before_x);
7261  __ ldr(w3, &before_w);
7262  __ ldrsw(x5, &before_sx);
7263  __ ldr(d13, &before_d);
7264  __ ldr(s25, &before_s);
7265
7266  __ ldr(x6, &after_x);
7267  __ ldr(w7, &after_w);
7268  __ ldrsw(x8, &after_sx);
7269  __ ldr(d14, &after_d);
7270  __ ldr(s26, &after_s);
7271
7272  // Manually generate a pool.
7273  __ B(&end_of_pool_after);
7274  __ place(&after_x);
7275  __ place(&after_w);
7276  __ place(&after_sx);
7277  __ place(&after_d);
7278  __ place(&after_s);
7279  __ Bind(&end_of_pool_after);
7280
7281  END();
7282
7283  RUN();
7284
7285  ASSERT_EQUAL_64(0x1234567890abcdef, x2);
7286  ASSERT_EQUAL_64(0xfedcba09, x3);
7287  ASSERT_EQUAL_64(0xffffffff80000000, x5);
7288  ASSERT_EQUAL_FP64(1.234, d13);
7289  ASSERT_EQUAL_FP32(2.5, s25);
7290
7291  ASSERT_EQUAL_64(0x1234567890abcdef, x6);
7292  ASSERT_EQUAL_64(0xfedcba09, x7);
7293  ASSERT_EQUAL_64(0xffffffff80000000, x8);
7294  ASSERT_EQUAL_FP64(1.234, d14);
7295  ASSERT_EQUAL_FP32(2.5, s26);
7296
7297  TEARDOWN();
7298}
7299
7300
7301TEST(add_sub_imm) {
7302  SETUP();
7303
7304  START();
7305  __ Mov(x0, 0x0);
7306  __ Mov(x1, 0x1111);
7307  __ Mov(x2, 0xffffffffffffffff);
7308  __ Mov(x3, 0x8000000000000000);
7309
7310  __ Add(x10, x0, Operand(0x123));
7311  __ Add(x11, x1, Operand(0x122000));
7312  __ Add(x12, x0, Operand(0xabc << 12));
7313  __ Add(x13, x2, Operand(1));
7314
7315  __ Add(w14, w0, Operand(0x123));
7316  __ Add(w15, w1, Operand(0x122000));
7317  __ Add(w16, w0, Operand(0xabc << 12));
7318  __ Add(w17, w2, Operand(1));
7319
7320  __ Sub(x20, x0, Operand(0x1));
7321  __ Sub(x21, x1, Operand(0x111));
7322  __ Sub(x22, x1, Operand(0x1 << 12));
7323  __ Sub(x23, x3, Operand(1));
7324
7325  __ Sub(w24, w0, Operand(0x1));
7326  __ Sub(w25, w1, Operand(0x111));
7327  __ Sub(w26, w1, Operand(0x1 << 12));
7328  __ Sub(w27, w3, Operand(1));
7329  END();
7330
7331  RUN();
7332
7333  ASSERT_EQUAL_64(0x123, x10);
7334  ASSERT_EQUAL_64(0x123111, x11);
7335  ASSERT_EQUAL_64(0xabc000, x12);
7336  ASSERT_EQUAL_64(0x0, x13);
7337
7338  ASSERT_EQUAL_32(0x123, w14);
7339  ASSERT_EQUAL_32(0x123111, w15);
7340  ASSERT_EQUAL_32(0xabc000, w16);
7341  ASSERT_EQUAL_32(0x0, w17);
7342
7343  ASSERT_EQUAL_64(0xffffffffffffffff, x20);
7344  ASSERT_EQUAL_64(0x1000, x21);
7345  ASSERT_EQUAL_64(0x111, x22);
7346  ASSERT_EQUAL_64(0x7fffffffffffffff, x23);
7347
7348  ASSERT_EQUAL_32(0xffffffff, w24);
7349  ASSERT_EQUAL_32(0x1000, w25);
7350  ASSERT_EQUAL_32(0x111, w26);
7351  ASSERT_EQUAL_32(0xffffffff, w27);
7352
7353  TEARDOWN();
7354}
7355
7356
7357TEST(add_sub_wide_imm) {
7358  SETUP();
7359
7360  START();
7361  __ Mov(x0, 0x0);
7362  __ Mov(x1, 0x1);
7363
7364  __ Add(x10, x0, Operand(0x1234567890abcdef));
7365  __ Add(x11, x1, Operand(0xffffffff));
7366
7367  __ Add(w12, w0, Operand(0x12345678));
7368  __ Add(w13, w1, Operand(0xffffffff));
7369
7370  __ Add(w18, w0, Operand(kWMinInt));
7371  __ Sub(w19, w0, Operand(kWMinInt));
7372
7373  __ Sub(x20, x0, Operand(0x1234567890abcdef));
7374  __ Sub(w21, w0, Operand(0x12345678));
7375
7376  END();
7377
7378  RUN();
7379
7380  ASSERT_EQUAL_64(0x1234567890abcdef, x10);
7381  ASSERT_EQUAL_64(0x100000000, x11);
7382
7383  ASSERT_EQUAL_32(0x12345678, w12);
7384  ASSERT_EQUAL_64(0x0, x13);
7385
7386  ASSERT_EQUAL_32(kWMinInt, w18);
7387  ASSERT_EQUAL_32(kWMinInt, w19);
7388
7389  ASSERT_EQUAL_64(-0x1234567890abcdef, x20);
7390  ASSERT_EQUAL_32(-0x12345678, w21);
7391
7392  TEARDOWN();
7393}
7394
7395
7396TEST(add_sub_shifted) {
7397  SETUP();
7398
7399  START();
7400  __ Mov(x0, 0);
7401  __ Mov(x1, 0x0123456789abcdef);
7402  __ Mov(x2, 0xfedcba9876543210);
7403  __ Mov(x3, 0xffffffffffffffff);
7404
7405  __ Add(x10, x1, Operand(x2));
7406  __ Add(x11, x0, Operand(x1, LSL, 8));
7407  __ Add(x12, x0, Operand(x1, LSR, 8));
7408  __ Add(x13, x0, Operand(x1, ASR, 8));
7409  __ Add(x14, x0, Operand(x2, ASR, 8));
7410  __ Add(w15, w0, Operand(w1, ASR, 8));
7411  __ Add(w18, w3, Operand(w1, ROR, 8));
7412  __ Add(x19, x3, Operand(x1, ROR, 8));
7413
7414  __ Sub(x20, x3, Operand(x2));
7415  __ Sub(x21, x3, Operand(x1, LSL, 8));
7416  __ Sub(x22, x3, Operand(x1, LSR, 8));
7417  __ Sub(x23, x3, Operand(x1, ASR, 8));
7418  __ Sub(x24, x3, Operand(x2, ASR, 8));
7419  __ Sub(w25, w3, Operand(w1, ASR, 8));
7420  __ Sub(w26, w3, Operand(w1, ROR, 8));
7421  __ Sub(x27, x3, Operand(x1, ROR, 8));
7422  END();
7423
7424  RUN();
7425
7426  ASSERT_EQUAL_64(0xffffffffffffffff, x10);
7427  ASSERT_EQUAL_64(0x23456789abcdef00, x11);
7428  ASSERT_EQUAL_64(0x000123456789abcd, x12);
7429  ASSERT_EQUAL_64(0x000123456789abcd, x13);
7430  ASSERT_EQUAL_64(0xfffedcba98765432, x14);
7431  ASSERT_EQUAL_64(0xff89abcd, x15);
7432  ASSERT_EQUAL_64(0xef89abcc, x18);
7433  ASSERT_EQUAL_64(0xef0123456789abcc, x19);
7434
7435  ASSERT_EQUAL_64(0x0123456789abcdef, x20);
7436  ASSERT_EQUAL_64(0xdcba9876543210ff, x21);
7437  ASSERT_EQUAL_64(0xfffedcba98765432, x22);
7438  ASSERT_EQUAL_64(0xfffedcba98765432, x23);
7439  ASSERT_EQUAL_64(0x000123456789abcd, x24);
7440  ASSERT_EQUAL_64(0x00765432, x25);
7441  ASSERT_EQUAL_64(0x10765432, x26);
7442  ASSERT_EQUAL_64(0x10fedcba98765432, x27);
7443
7444  TEARDOWN();
7445}
7446
7447
7448TEST(add_sub_extended) {
7449  SETUP();
7450
7451  START();
7452  __ Mov(x0, 0);
7453  __ Mov(x1, 0x0123456789abcdef);
7454  __ Mov(x2, 0xfedcba9876543210);
7455  __ Mov(w3, 0x80);
7456
7457  __ Add(x10, x0, Operand(x1, UXTB, 0));
7458  __ Add(x11, x0, Operand(x1, UXTB, 1));
7459  __ Add(x12, x0, Operand(x1, UXTH, 2));
7460  __ Add(x13, x0, Operand(x1, UXTW, 4));
7461
7462  __ Add(x14, x0, Operand(x1, SXTB, 0));
7463  __ Add(x15, x0, Operand(x1, SXTB, 1));
7464  __ Add(x16, x0, Operand(x1, SXTH, 2));
7465  __ Add(x17, x0, Operand(x1, SXTW, 3));
7466  __ Add(x18, x0, Operand(x2, SXTB, 0));
7467  __ Add(x19, x0, Operand(x2, SXTB, 1));
7468  __ Add(x20, x0, Operand(x2, SXTH, 2));
7469  __ Add(x21, x0, Operand(x2, SXTW, 3));
7470
7471  __ Add(x22, x1, Operand(x2, SXTB, 1));
7472  __ Sub(x23, x1, Operand(x2, SXTB, 1));
7473
7474  __ Add(w24, w1, Operand(w2, UXTB, 2));
7475  __ Add(w25, w0, Operand(w1, SXTB, 0));
7476  __ Add(w26, w0, Operand(w1, SXTB, 1));
7477  __ Add(w27, w2, Operand(w1, SXTW, 3));
7478
7479  __ Add(w28, w0, Operand(w1, SXTW, 3));
7480  __ Add(x29, x0, Operand(w1, SXTW, 3));
7481
7482  __ Sub(x30, x0, Operand(w3, SXTB, 1));
7483  END();
7484
7485  RUN();
7486
7487  ASSERT_EQUAL_64(0xef, x10);
7488  ASSERT_EQUAL_64(0x1de, x11);
7489  ASSERT_EQUAL_64(0x337bc, x12);
7490  ASSERT_EQUAL_64(0x89abcdef0, x13);
7491
7492  ASSERT_EQUAL_64(0xffffffffffffffef, x14);
7493  ASSERT_EQUAL_64(0xffffffffffffffde, x15);
7494  ASSERT_EQUAL_64(0xffffffffffff37bc, x16);
7495  ASSERT_EQUAL_64(0xfffffffc4d5e6f78, x17);
7496  ASSERT_EQUAL_64(0x10, x18);
7497  ASSERT_EQUAL_64(0x20, x19);
7498  ASSERT_EQUAL_64(0xc840, x20);
7499  ASSERT_EQUAL_64(0x3b2a19080, x21);
7500
7501  ASSERT_EQUAL_64(0x0123456789abce0f, x22);
7502  ASSERT_EQUAL_64(0x0123456789abcdcf, x23);
7503
7504  ASSERT_EQUAL_32(0x89abce2f, w24);
7505  ASSERT_EQUAL_32(0xffffffef, w25);
7506  ASSERT_EQUAL_32(0xffffffde, w26);
7507  ASSERT_EQUAL_32(0xc3b2a188, w27);
7508
7509  ASSERT_EQUAL_32(0x4d5e6f78, w28);
7510  ASSERT_EQUAL_64(0xfffffffc4d5e6f78, x29);
7511
7512  ASSERT_EQUAL_64(256, x30);
7513
7514  TEARDOWN();
7515}
7516
7517
7518TEST(add_sub_negative) {
7519  SETUP();
7520
7521  START();
7522  __ Mov(x0, 0);
7523  __ Mov(x1, 4687);
7524  __ Mov(x2, 0x1122334455667788);
7525  __ Mov(w3, 0x11223344);
7526  __ Mov(w4, 400000);
7527
7528  __ Add(x10, x0, -42);
7529  __ Add(x11, x1, -687);
7530  __ Add(x12, x2, -0x88);
7531
7532  __ Sub(x13, x0, -600);
7533  __ Sub(x14, x1, -313);
7534  __ Sub(x15, x2, -0x555);
7535
7536  __ Add(w19, w3, -0x344);
7537  __ Add(w20, w4, -2000);
7538
7539  __ Sub(w21, w3, -0xbc);
7540  __ Sub(w22, w4, -2000);
7541  END();
7542
7543  RUN();
7544
7545  ASSERT_EQUAL_64(-42, x10);
7546  ASSERT_EQUAL_64(4000, x11);
7547  ASSERT_EQUAL_64(0x1122334455667700, x12);
7548
7549  ASSERT_EQUAL_64(600, x13);
7550  ASSERT_EQUAL_64(5000, x14);
7551  ASSERT_EQUAL_64(0x1122334455667cdd, x15);
7552
7553  ASSERT_EQUAL_32(0x11223000, w19);
7554  ASSERT_EQUAL_32(398000, w20);
7555
7556  ASSERT_EQUAL_32(0x11223400, w21);
7557  ASSERT_EQUAL_32(402000, w22);
7558
7559  TEARDOWN();
7560}
7561
7562
7563TEST(add_sub_zero) {
7564  SETUP();
7565
7566  START();
7567  __ Mov(x0, 0);
7568  __ Mov(x1, 0);
7569  __ Mov(x2, 0);
7570
7571  Label blob1;
7572  __ Bind(&blob1);
7573  __ Add(x0, x0, 0);
7574  __ Sub(x1, x1, 0);
7575  __ Sub(x2, x2, xzr);
7576  VIXL_CHECK(__ SizeOfCodeGeneratedSince(&blob1) == 0);
7577
7578  Label blob2;
7579  __ Bind(&blob2);
7580  __ Add(w3, w3, 0);
7581  VIXL_CHECK(__ SizeOfCodeGeneratedSince(&blob2) != 0);
7582
7583  Label blob3;
7584  __ Bind(&blob3);
7585  __ Sub(w3, w3, wzr);
7586  VIXL_CHECK(__ SizeOfCodeGeneratedSince(&blob3) != 0);
7587
7588  END();
7589
7590  RUN();
7591
7592  ASSERT_EQUAL_64(0, x0);
7593  ASSERT_EQUAL_64(0, x1);
7594  ASSERT_EQUAL_64(0, x2);
7595
7596  TEARDOWN();
7597}
7598
7599
7600TEST(claim_drop_zero) {
7601  SETUP();
7602
7603  START();
7604
7605  Label start;
7606  __ Bind(&start);
7607  __ Claim(Operand(0));
7608  __ Drop(Operand(0));
7609  __ Claim(Operand(xzr));
7610  __ Drop(Operand(xzr));
7611  VIXL_CHECK(__ SizeOfCodeGeneratedSince(&start) == 0);
7612
7613  END();
7614
7615  RUN();
7616
7617  TEARDOWN();
7618}
7619
7620
7621TEST(neg) {
7622  SETUP();
7623
7624  START();
7625  __ Mov(x0, 0xf123456789abcdef);
7626
7627  // Immediate.
7628  __ Neg(x1, 0x123);
7629  __ Neg(w2, 0x123);
7630
7631  // Shifted.
7632  __ Neg(x3, Operand(x0, LSL, 1));
7633  __ Neg(w4, Operand(w0, LSL, 2));
7634  __ Neg(x5, Operand(x0, LSR, 3));
7635  __ Neg(w6, Operand(w0, LSR, 4));
7636  __ Neg(x7, Operand(x0, ASR, 5));
7637  __ Neg(w8, Operand(w0, ASR, 6));
7638
7639  // Extended.
7640  __ Neg(w9, Operand(w0, UXTB));
7641  __ Neg(x10, Operand(x0, SXTB, 1));
7642  __ Neg(w11, Operand(w0, UXTH, 2));
7643  __ Neg(x12, Operand(x0, SXTH, 3));
7644  __ Neg(w13, Operand(w0, UXTW, 4));
7645  __ Neg(x14, Operand(x0, SXTW, 4));
7646  END();
7647
7648  RUN();
7649
7650  ASSERT_EQUAL_64(0xfffffffffffffedd, x1);
7651  ASSERT_EQUAL_64(0xfffffedd, x2);
7652  ASSERT_EQUAL_64(0x1db97530eca86422, x3);
7653  ASSERT_EQUAL_64(0xd950c844, x4);
7654  ASSERT_EQUAL_64(0xe1db97530eca8643, x5);
7655  ASSERT_EQUAL_64(0xf7654322, x6);
7656  ASSERT_EQUAL_64(0x0076e5d4c3b2a191, x7);
7657  ASSERT_EQUAL_64(0x01d950c9, x8);
7658  ASSERT_EQUAL_64(0xffffff11, x9);
7659  ASSERT_EQUAL_64(0x0000000000000022, x10);
7660  ASSERT_EQUAL_64(0xfffcc844, x11);
7661  ASSERT_EQUAL_64(0x0000000000019088, x12);
7662  ASSERT_EQUAL_64(0x65432110, x13);
7663  ASSERT_EQUAL_64(0x0000000765432110, x14);
7664
7665  TEARDOWN();
7666}
7667
7668
7669template<typename T, typename Op>
7670static void AdcsSbcsHelper(Op op, T left, T right, int carry,
7671                           T expected, StatusFlags expected_flags) {
7672  int reg_size = sizeof(T) * 8;
7673  Register left_reg(0, reg_size);
7674  Register right_reg(1, reg_size);
7675  Register result_reg(2, reg_size);
7676
7677  SETUP();
7678  START();
7679
7680  __ Mov(left_reg, left);
7681  __ Mov(right_reg, right);
7682  __ Mov(x10, (carry ? CFlag : NoFlag));
7683
7684  __ Msr(NZCV, x10);
7685  (masm.*op)(result_reg, left_reg, right_reg);
7686
7687  END();
7688  RUN();
7689
7690  ASSERT_EQUAL_64(left, left_reg.X());
7691  ASSERT_EQUAL_64(right, right_reg.X());
7692  ASSERT_EQUAL_64(expected, result_reg.X());
7693  ASSERT_EQUAL_NZCV(expected_flags);
7694
7695  TEARDOWN();
7696}
7697
7698
7699TEST(adcs_sbcs_x) {
7700  uint64_t inputs[] = {
7701    0x0000000000000000, 0x0000000000000001,
7702    0x7ffffffffffffffe, 0x7fffffffffffffff,
7703    0x8000000000000000, 0x8000000000000001,
7704    0xfffffffffffffffe, 0xffffffffffffffff,
7705  };
7706  static const size_t input_count = sizeof(inputs) / sizeof(inputs[0]);
7707
7708  struct Expected {
7709    uint64_t carry0_result;
7710    StatusFlags carry0_flags;
7711    uint64_t carry1_result;
7712    StatusFlags carry1_flags;
7713  };
7714
7715  static const Expected expected_adcs_x[input_count][input_count] = {
7716    {{0x0000000000000000, ZFlag, 0x0000000000000001, NoFlag},
7717     {0x0000000000000001, NoFlag, 0x0000000000000002, NoFlag},
7718     {0x7ffffffffffffffe, NoFlag, 0x7fffffffffffffff, NoFlag},
7719     {0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag},
7720     {0x8000000000000000, NFlag, 0x8000000000000001, NFlag},
7721     {0x8000000000000001, NFlag, 0x8000000000000002, NFlag},
7722     {0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag},
7723     {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag}},
7724    {{0x0000000000000001, NoFlag, 0x0000000000000002, NoFlag},
7725     {0x0000000000000002, NoFlag, 0x0000000000000003, NoFlag},
7726     {0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag},
7727     {0x8000000000000000, NVFlag, 0x8000000000000001, NVFlag},
7728     {0x8000000000000001, NFlag, 0x8000000000000002, NFlag},
7729     {0x8000000000000002, NFlag, 0x8000000000000003, NFlag},
7730     {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
7731     {0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag}},
7732    {{0x7ffffffffffffffe, NoFlag, 0x7fffffffffffffff, NoFlag},
7733     {0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag},
7734     {0xfffffffffffffffc, NVFlag, 0xfffffffffffffffd, NVFlag},
7735     {0xfffffffffffffffd, NVFlag, 0xfffffffffffffffe, NVFlag},
7736     {0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag},
7737     {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
7738     {0x7ffffffffffffffc, CFlag, 0x7ffffffffffffffd, CFlag},
7739     {0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag}},
7740    {{0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag},
7741     {0x8000000000000000, NVFlag, 0x8000000000000001, NVFlag},
7742     {0xfffffffffffffffd, NVFlag, 0xfffffffffffffffe, NVFlag},
7743     {0xfffffffffffffffe, NVFlag, 0xffffffffffffffff, NVFlag},
7744     {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
7745     {0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag},
7746     {0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag},
7747     {0x7ffffffffffffffe, CFlag, 0x7fffffffffffffff, CFlag}},
7748    {{0x8000000000000000, NFlag, 0x8000000000000001, NFlag},
7749     {0x8000000000000001, NFlag, 0x8000000000000002, NFlag},
7750     {0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag},
7751     {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
7752     {0x0000000000000000, ZCVFlag, 0x0000000000000001, CVFlag},
7753     {0x0000000000000001, CVFlag, 0x0000000000000002, CVFlag},
7754     {0x7ffffffffffffffe, CVFlag, 0x7fffffffffffffff, CVFlag},
7755     {0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag}},
7756    {{0x8000000000000001, NFlag, 0x8000000000000002, NFlag},
7757     {0x8000000000000002, NFlag, 0x8000000000000003, NFlag},
7758     {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
7759     {0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag},
7760     {0x0000000000000001, CVFlag, 0x0000000000000002, CVFlag},
7761     {0x0000000000000002, CVFlag, 0x0000000000000003, CVFlag},
7762     {0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag},
7763     {0x8000000000000000, NCFlag, 0x8000000000000001, NCFlag}},
7764    {{0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag},
7765     {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
7766     {0x7ffffffffffffffc, CFlag, 0x7ffffffffffffffd, CFlag},
7767     {0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag},
7768     {0x7ffffffffffffffe, CVFlag, 0x7fffffffffffffff, CVFlag},
7769     {0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag},
7770     {0xfffffffffffffffc, NCFlag, 0xfffffffffffffffd, NCFlag},
7771     {0xfffffffffffffffd, NCFlag, 0xfffffffffffffffe, NCFlag}},
7772    {{0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
7773     {0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag},
7774     {0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag},
7775     {0x7ffffffffffffffe, CFlag, 0x7fffffffffffffff, CFlag},
7776     {0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag},
7777     {0x8000000000000000, NCFlag, 0x8000000000000001, NCFlag},
7778     {0xfffffffffffffffd, NCFlag, 0xfffffffffffffffe, NCFlag},
7779     {0xfffffffffffffffe, NCFlag, 0xffffffffffffffff, NCFlag}}
7780  };
7781
7782  static const Expected expected_sbcs_x[input_count][input_count] = {
7783    {{0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
7784     {0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag},
7785     {0x8000000000000001, NFlag, 0x8000000000000002, NFlag},
7786     {0x8000000000000000, NFlag, 0x8000000000000001, NFlag},
7787     {0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag},
7788     {0x7ffffffffffffffe, NoFlag, 0x7fffffffffffffff, NoFlag},
7789     {0x0000000000000001, NoFlag, 0x0000000000000002, NoFlag},
7790     {0x0000000000000000, ZFlag, 0x0000000000000001, NoFlag}},
7791    {{0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag},
7792     {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
7793     {0x8000000000000002, NFlag, 0x8000000000000003, NFlag},
7794     {0x8000000000000001, NFlag, 0x8000000000000002, NFlag},
7795     {0x8000000000000000, NVFlag, 0x8000000000000001, NVFlag},
7796     {0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag},
7797     {0x0000000000000002, NoFlag, 0x0000000000000003, NoFlag},
7798     {0x0000000000000001, NoFlag, 0x0000000000000002, NoFlag}},
7799    {{0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag},
7800     {0x7ffffffffffffffc, CFlag, 0x7ffffffffffffffd, CFlag},
7801     {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
7802     {0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag},
7803     {0xfffffffffffffffd, NVFlag, 0xfffffffffffffffe, NVFlag},
7804     {0xfffffffffffffffc, NVFlag, 0xfffffffffffffffd, NVFlag},
7805     {0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag},
7806     {0x7ffffffffffffffe, NoFlag, 0x7fffffffffffffff, NoFlag}},
7807    {{0x7ffffffffffffffe, CFlag, 0x7fffffffffffffff, CFlag},
7808     {0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag},
7809     {0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag},
7810     {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
7811     {0xfffffffffffffffe, NVFlag, 0xffffffffffffffff, NVFlag},
7812     {0xfffffffffffffffd, NVFlag, 0xfffffffffffffffe, NVFlag},
7813     {0x8000000000000000, NVFlag, 0x8000000000000001, NVFlag},
7814     {0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag}},
7815    {{0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag},
7816     {0x7ffffffffffffffe, CVFlag, 0x7fffffffffffffff, CVFlag},
7817     {0x0000000000000001, CVFlag, 0x0000000000000002, CVFlag},
7818     {0x0000000000000000, ZCVFlag, 0x0000000000000001, CVFlag},
7819     {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
7820     {0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag},
7821     {0x8000000000000001, NFlag, 0x8000000000000002, NFlag},
7822     {0x8000000000000000, NFlag, 0x8000000000000001, NFlag}},
7823    {{0x8000000000000000, NCFlag, 0x8000000000000001, NCFlag},
7824     {0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag},
7825     {0x0000000000000002, CVFlag, 0x0000000000000003, CVFlag},
7826     {0x0000000000000001, CVFlag, 0x0000000000000002, CVFlag},
7827     {0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag},
7828     {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
7829     {0x8000000000000002, NFlag, 0x8000000000000003, NFlag},
7830     {0x8000000000000001, NFlag, 0x8000000000000002, NFlag}},
7831    {{0xfffffffffffffffd, NCFlag, 0xfffffffffffffffe, NCFlag},
7832     {0xfffffffffffffffc, NCFlag, 0xfffffffffffffffd, NCFlag},
7833     {0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag},
7834     {0x7ffffffffffffffe, CVFlag, 0x7fffffffffffffff, CVFlag},
7835     {0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag},
7836     {0x7ffffffffffffffc, CFlag, 0x7ffffffffffffffd, CFlag},
7837     {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
7838     {0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag}},
7839    {{0xfffffffffffffffe, NCFlag, 0xffffffffffffffff, NCFlag},
7840     {0xfffffffffffffffd, NCFlag, 0xfffffffffffffffe, NCFlag},
7841     {0x8000000000000000, NCFlag, 0x8000000000000001, NCFlag},
7842     {0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag},
7843     {0x7ffffffffffffffe, CFlag, 0x7fffffffffffffff, CFlag},
7844     {0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag},
7845     {0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag},
7846     {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag}}
7847  };
7848
7849  for (size_t left = 0; left < input_count; left++) {
7850    for (size_t right = 0; right < input_count; right++) {
7851      const Expected & expected = expected_adcs_x[left][right];
7852      AdcsSbcsHelper(&MacroAssembler::Adcs, inputs[left], inputs[right], 0,
7853                     expected.carry0_result, expected.carry0_flags);
7854      AdcsSbcsHelper(&MacroAssembler::Adcs, inputs[left], inputs[right], 1,
7855                     expected.carry1_result, expected.carry1_flags);
7856    }
7857  }
7858
7859  for (size_t left = 0; left < input_count; left++) {
7860    for (size_t right = 0; right < input_count; right++) {
7861      const Expected & expected = expected_sbcs_x[left][right];
7862      AdcsSbcsHelper(&MacroAssembler::Sbcs, inputs[left], inputs[right], 0,
7863                     expected.carry0_result, expected.carry0_flags);
7864      AdcsSbcsHelper(&MacroAssembler::Sbcs, inputs[left], inputs[right], 1,
7865                     expected.carry1_result, expected.carry1_flags);
7866    }
7867  }
7868}
7869
7870
7871TEST(adcs_sbcs_w) {
7872  uint32_t inputs[] = {
7873    0x00000000, 0x00000001, 0x7ffffffe, 0x7fffffff,
7874    0x80000000, 0x80000001, 0xfffffffe, 0xffffffff,
7875  };
7876  static const size_t input_count = sizeof(inputs) / sizeof(inputs[0]);
7877
7878  struct Expected {
7879    uint32_t carry0_result;
7880    StatusFlags carry0_flags;
7881    uint32_t carry1_result;
7882    StatusFlags carry1_flags;
7883  };
7884
7885  static const Expected expected_adcs_w[input_count][input_count] = {
7886    {{0x00000000, ZFlag, 0x00000001, NoFlag},
7887     {0x00000001, NoFlag, 0x00000002, NoFlag},
7888     {0x7ffffffe, NoFlag, 0x7fffffff, NoFlag},
7889     {0x7fffffff, NoFlag, 0x80000000, NVFlag},
7890     {0x80000000, NFlag, 0x80000001, NFlag},
7891     {0x80000001, NFlag, 0x80000002, NFlag},
7892     {0xfffffffe, NFlag, 0xffffffff, NFlag},
7893     {0xffffffff, NFlag, 0x00000000, ZCFlag}},
7894    {{0x00000001, NoFlag, 0x00000002, NoFlag},
7895     {0x00000002, NoFlag, 0x00000003, NoFlag},
7896     {0x7fffffff, NoFlag, 0x80000000, NVFlag},
7897     {0x80000000, NVFlag, 0x80000001, NVFlag},
7898     {0x80000001, NFlag, 0x80000002, NFlag},
7899     {0x80000002, NFlag, 0x80000003, NFlag},
7900     {0xffffffff, NFlag, 0x00000000, ZCFlag},
7901     {0x00000000, ZCFlag, 0x00000001, CFlag}},
7902    {{0x7ffffffe, NoFlag, 0x7fffffff, NoFlag},
7903     {0x7fffffff, NoFlag, 0x80000000, NVFlag},
7904     {0xfffffffc, NVFlag, 0xfffffffd, NVFlag},
7905     {0xfffffffd, NVFlag, 0xfffffffe, NVFlag},
7906     {0xfffffffe, NFlag, 0xffffffff, NFlag},
7907     {0xffffffff, NFlag, 0x00000000, ZCFlag},
7908     {0x7ffffffc, CFlag, 0x7ffffffd, CFlag},
7909     {0x7ffffffd, CFlag, 0x7ffffffe, CFlag}},
7910    {{0x7fffffff, NoFlag, 0x80000000, NVFlag},
7911     {0x80000000, NVFlag, 0x80000001, NVFlag},
7912     {0xfffffffd, NVFlag, 0xfffffffe, NVFlag},
7913     {0xfffffffe, NVFlag, 0xffffffff, NVFlag},
7914     {0xffffffff, NFlag, 0x00000000, ZCFlag},
7915     {0x00000000, ZCFlag, 0x00000001, CFlag},
7916     {0x7ffffffd, CFlag, 0x7ffffffe, CFlag},
7917     {0x7ffffffe, CFlag, 0x7fffffff, CFlag}},
7918    {{0x80000000, NFlag, 0x80000001, NFlag},
7919     {0x80000001, NFlag, 0x80000002, NFlag},
7920     {0xfffffffe, NFlag, 0xffffffff, NFlag},
7921     {0xffffffff, NFlag, 0x00000000, ZCFlag},
7922     {0x00000000, ZCVFlag, 0x00000001, CVFlag},
7923     {0x00000001, CVFlag, 0x00000002, CVFlag},
7924     {0x7ffffffe, CVFlag, 0x7fffffff, CVFlag},
7925     {0x7fffffff, CVFlag, 0x80000000, NCFlag}},
7926    {{0x80000001, NFlag, 0x80000002, NFlag},
7927     {0x80000002, NFlag, 0x80000003, NFlag},
7928     {0xffffffff, NFlag, 0x00000000, ZCFlag},
7929     {0x00000000, ZCFlag, 0x00000001, CFlag},
7930     {0x00000001, CVFlag, 0x00000002, CVFlag},
7931     {0x00000002, CVFlag, 0x00000003, CVFlag},
7932     {0x7fffffff, CVFlag, 0x80000000, NCFlag},
7933     {0x80000000, NCFlag, 0x80000001, NCFlag}},
7934    {{0xfffffffe, NFlag, 0xffffffff, NFlag},
7935     {0xffffffff, NFlag, 0x00000000, ZCFlag},
7936     {0x7ffffffc, CFlag, 0x7ffffffd, CFlag},
7937     {0x7ffffffd, CFlag, 0x7ffffffe, CFlag},
7938     {0x7ffffffe, CVFlag, 0x7fffffff, CVFlag},
7939     {0x7fffffff, CVFlag, 0x80000000, NCFlag},
7940     {0xfffffffc, NCFlag, 0xfffffffd, NCFlag},
7941     {0xfffffffd, NCFlag, 0xfffffffe, NCFlag}},
7942    {{0xffffffff, NFlag, 0x00000000, ZCFlag},
7943     {0x00000000, ZCFlag, 0x00000001, CFlag},
7944     {0x7ffffffd, CFlag, 0x7ffffffe, CFlag},
7945     {0x7ffffffe, CFlag, 0x7fffffff, CFlag},
7946     {0x7fffffff, CVFlag, 0x80000000, NCFlag},
7947     {0x80000000, NCFlag, 0x80000001, NCFlag},
7948     {0xfffffffd, NCFlag, 0xfffffffe, NCFlag},
7949     {0xfffffffe, NCFlag, 0xffffffff, NCFlag}}
7950  };
7951
7952  static const Expected expected_sbcs_w[input_count][input_count] = {
7953    {{0xffffffff, NFlag, 0x00000000, ZCFlag},
7954     {0xfffffffe, NFlag, 0xffffffff, NFlag},
7955     {0x80000001, NFlag, 0x80000002, NFlag},
7956     {0x80000000, NFlag, 0x80000001, NFlag},
7957     {0x7fffffff, NoFlag, 0x80000000, NVFlag},
7958     {0x7ffffffe, NoFlag, 0x7fffffff, NoFlag},
7959     {0x00000001, NoFlag, 0x00000002, NoFlag},
7960     {0x00000000, ZFlag, 0x00000001, NoFlag}},
7961    {{0x00000000, ZCFlag, 0x00000001, CFlag},
7962     {0xffffffff, NFlag, 0x00000000, ZCFlag},
7963     {0x80000002, NFlag, 0x80000003, NFlag},
7964     {0x80000001, NFlag, 0x80000002, NFlag},
7965     {0x80000000, NVFlag, 0x80000001, NVFlag},
7966     {0x7fffffff, NoFlag, 0x80000000, NVFlag},
7967     {0x00000002, NoFlag, 0x00000003, NoFlag},
7968     {0x00000001, NoFlag, 0x00000002, NoFlag}},
7969    {{0x7ffffffd, CFlag, 0x7ffffffe, CFlag},
7970     {0x7ffffffc, CFlag, 0x7ffffffd, CFlag},
7971     {0xffffffff, NFlag, 0x00000000, ZCFlag},
7972     {0xfffffffe, NFlag, 0xffffffff, NFlag},
7973     {0xfffffffd, NVFlag, 0xfffffffe, NVFlag},
7974     {0xfffffffc, NVFlag, 0xfffffffd, NVFlag},
7975     {0x7fffffff, NoFlag, 0x80000000, NVFlag},
7976     {0x7ffffffe, NoFlag, 0x7fffffff, NoFlag}},
7977    {{0x7ffffffe, CFlag, 0x7fffffff, CFlag},
7978     {0x7ffffffd, CFlag, 0x7ffffffe, CFlag},
7979     {0x00000000, ZCFlag, 0x00000001, CFlag},
7980     {0xffffffff, NFlag, 0x00000000, ZCFlag},
7981     {0xfffffffe, NVFlag, 0xffffffff, NVFlag},
7982     {0xfffffffd, NVFlag, 0xfffffffe, NVFlag},
7983     {0x80000000, NVFlag, 0x80000001, NVFlag},
7984     {0x7fffffff, NoFlag, 0x80000000, NVFlag}},
7985    {{0x7fffffff, CVFlag, 0x80000000, NCFlag},
7986     {0x7ffffffe, CVFlag, 0x7fffffff, CVFlag},
7987     {0x00000001, CVFlag, 0x00000002, CVFlag},
7988     {0x00000000, ZCVFlag, 0x00000001, CVFlag},
7989     {0xffffffff, NFlag, 0x00000000, ZCFlag},
7990     {0xfffffffe, NFlag, 0xffffffff, NFlag},
7991     {0x80000001, NFlag, 0x80000002, NFlag},
7992     {0x80000000, NFlag, 0x80000001, NFlag}},
7993    {{0x80000000, NCFlag, 0x80000001, NCFlag},
7994     {0x7fffffff, CVFlag, 0x80000000, NCFlag},
7995     {0x00000002, CVFlag, 0x00000003, CVFlag},
7996     {0x00000001, CVFlag, 0x00000002, CVFlag},
7997     {0x00000000, ZCFlag, 0x00000001, CFlag},
7998     {0xffffffff, NFlag, 0x00000000, ZCFlag},
7999     {0x80000002, NFlag, 0x80000003, NFlag},
8000     {0x80000001, NFlag, 0x80000002, NFlag}},
8001    {{0xfffffffd, NCFlag, 0xfffffffe, NCFlag},
8002     {0xfffffffc, NCFlag, 0xfffffffd, NCFlag},
8003     {0x7fffffff, CVFlag, 0x80000000, NCFlag},
8004     {0x7ffffffe, CVFlag, 0x7fffffff, CVFlag},
8005     {0x7ffffffd, CFlag, 0x7ffffffe, CFlag},
8006     {0x7ffffffc, CFlag, 0x7ffffffd, CFlag},
8007     {0xffffffff, NFlag, 0x00000000, ZCFlag},
8008     {0xfffffffe, NFlag, 0xffffffff, NFlag}},
8009    {{0xfffffffe, NCFlag, 0xffffffff, NCFlag},
8010     {0xfffffffd, NCFlag, 0xfffffffe, NCFlag},
8011     {0x80000000, NCFlag, 0x80000001, NCFlag},
8012     {0x7fffffff, CVFlag, 0x80000000, NCFlag},
8013     {0x7ffffffe, CFlag, 0x7fffffff, CFlag},
8014     {0x7ffffffd, CFlag, 0x7ffffffe, CFlag},
8015     {0x00000000, ZCFlag, 0x00000001, CFlag},
8016     {0xffffffff, NFlag, 0x00000000, ZCFlag}}
8017  };
8018
8019  for (size_t left = 0; left < input_count; left++) {
8020    for (size_t right = 0; right < input_count; right++) {
8021      const Expected & expected = expected_adcs_w[left][right];
8022      AdcsSbcsHelper(&MacroAssembler::Adcs, inputs[left], inputs[right], 0,
8023                     expected.carry0_result, expected.carry0_flags);
8024      AdcsSbcsHelper(&MacroAssembler::Adcs, inputs[left], inputs[right], 1,
8025                     expected.carry1_result, expected.carry1_flags);
8026    }
8027  }
8028
8029  for (size_t left = 0; left < input_count; left++) {
8030    for (size_t right = 0; right < input_count; right++) {
8031      const Expected & expected = expected_sbcs_w[left][right];
8032      AdcsSbcsHelper(&MacroAssembler::Sbcs, inputs[left], inputs[right], 0,
8033                     expected.carry0_result, expected.carry0_flags);
8034      AdcsSbcsHelper(&MacroAssembler::Sbcs, inputs[left], inputs[right], 1,
8035                     expected.carry1_result, expected.carry1_flags);
8036    }
8037  }
8038}
8039
8040
8041TEST(adc_sbc_shift) {
8042  SETUP();
8043
8044  START();
8045  __ Mov(x0, 0);
8046  __ Mov(x1, 1);
8047  __ Mov(x2, 0x0123456789abcdef);
8048  __ Mov(x3, 0xfedcba9876543210);
8049  __ Mov(x4, 0xffffffffffffffff);
8050
8051  // Clear the C flag.
8052  __ Adds(x0, x0, Operand(0));
8053
8054  __ Adc(x5, x2, Operand(x3));
8055  __ Adc(x6, x0, Operand(x1, LSL, 60));
8056  __ Sbc(x7, x4, Operand(x3, LSR, 4));
8057  __ Adc(x8, x2, Operand(x3, ASR, 4));
8058  __ Adc(x9, x2, Operand(x3, ROR, 8));
8059
8060  __ Adc(w10, w2, Operand(w3));
8061  __ Adc(w11, w0, Operand(w1, LSL, 30));
8062  __ Sbc(w12, w4, Operand(w3, LSR, 4));
8063  __ Adc(w13, w2, Operand(w3, ASR, 4));
8064  __ Adc(w14, w2, Operand(w3, ROR, 8));
8065
8066  // Set the C flag.
8067  __ Cmp(w0, Operand(w0));
8068
8069  __ Adc(x18, x2, Operand(x3));
8070  __ Adc(x19, x0, Operand(x1, LSL, 60));
8071  __ Sbc(x20, x4, Operand(x3, LSR, 4));
8072  __ Adc(x21, x2, Operand(x3, ASR, 4));
8073  __ Adc(x22, x2, Operand(x3, ROR, 8));
8074
8075  __ Adc(w23, w2, Operand(w3));
8076  __ Adc(w24, w0, Operand(w1, LSL, 30));
8077  __ Sbc(w25, w4, Operand(w3, LSR, 4));
8078  __ Adc(w26, w2, Operand(w3, ASR, 4));
8079  __ Adc(w27, w2, Operand(w3, ROR, 8));
8080  END();
8081
8082  RUN();
8083
8084  ASSERT_EQUAL_64(0xffffffffffffffff, x5);
8085  ASSERT_EQUAL_64(INT64_C(1) << 60, x6);
8086  ASSERT_EQUAL_64(0xf0123456789abcdd, x7);
8087  ASSERT_EQUAL_64(0x0111111111111110, x8);
8088  ASSERT_EQUAL_64(0x1222222222222221, x9);
8089
8090  ASSERT_EQUAL_32(0xffffffff, w10);
8091  ASSERT_EQUAL_32(INT32_C(1) << 30, w11);
8092  ASSERT_EQUAL_32(0xf89abcdd, w12);
8093  ASSERT_EQUAL_32(0x91111110, w13);
8094  ASSERT_EQUAL_32(0x9a222221, w14);
8095
8096  ASSERT_EQUAL_64(0xffffffffffffffff + 1, x18);
8097  ASSERT_EQUAL_64((INT64_C(1) << 60) + 1, x19);
8098  ASSERT_EQUAL_64(0xf0123456789abcdd + 1, x20);
8099  ASSERT_EQUAL_64(0x0111111111111110 + 1, x21);
8100  ASSERT_EQUAL_64(0x1222222222222221 + 1, x22);
8101
8102  ASSERT_EQUAL_32(0xffffffff + 1, w23);
8103  ASSERT_EQUAL_32((INT32_C(1) << 30) + 1, w24);
8104  ASSERT_EQUAL_32(0xf89abcdd + 1, w25);
8105  ASSERT_EQUAL_32(0x91111110 + 1, w26);
8106  ASSERT_EQUAL_32(0x9a222221 + 1, w27);
8107
8108  TEARDOWN();
8109}
8110
8111
8112TEST(adc_sbc_extend) {
8113  SETUP();
8114
8115  START();
8116  // Clear the C flag.
8117  __ Adds(x0, x0, Operand(0));
8118
8119  __ Mov(x0, 0);
8120  __ Mov(x1, 1);
8121  __ Mov(x2, 0x0123456789abcdef);
8122
8123  __ Adc(x10, x1, Operand(w2, UXTB, 1));
8124  __ Adc(x11, x1, Operand(x2, SXTH, 2));
8125  __ Sbc(x12, x1, Operand(w2, UXTW, 4));
8126  __ Adc(x13, x1, Operand(x2, UXTX, 4));
8127
8128  __ Adc(w14, w1, Operand(w2, UXTB, 1));
8129  __ Adc(w15, w1, Operand(w2, SXTH, 2));
8130  __ Adc(w9, w1, Operand(w2, UXTW, 4));
8131
8132  // Set the C flag.
8133  __ Cmp(w0, Operand(w0));
8134
8135  __ Adc(x20, x1, Operand(w2, UXTB, 1));
8136  __ Adc(x21, x1, Operand(x2, SXTH, 2));
8137  __ Sbc(x22, x1, Operand(w2, UXTW, 4));
8138  __ Adc(x23, x1, Operand(x2, UXTX, 4));
8139
8140  __ Adc(w24, w1, Operand(w2, UXTB, 1));
8141  __ Adc(w25, w1, Operand(w2, SXTH, 2));
8142  __ Adc(w26, w1, Operand(w2, UXTW, 4));
8143  END();
8144
8145  RUN();
8146
8147  ASSERT_EQUAL_64(0x1df, x10);
8148  ASSERT_EQUAL_64(0xffffffffffff37bd, x11);
8149  ASSERT_EQUAL_64(0xfffffff765432110, x12);
8150  ASSERT_EQUAL_64(0x123456789abcdef1, x13);
8151
8152  ASSERT_EQUAL_32(0x1df, w14);
8153  ASSERT_EQUAL_32(0xffff37bd, w15);
8154  ASSERT_EQUAL_32(0x9abcdef1, w9);
8155
8156  ASSERT_EQUAL_64(0x1df + 1, x20);
8157  ASSERT_EQUAL_64(0xffffffffffff37bd + 1, x21);
8158  ASSERT_EQUAL_64(0xfffffff765432110 + 1, x22);
8159  ASSERT_EQUAL_64(0x123456789abcdef1 + 1, x23);
8160
8161  ASSERT_EQUAL_32(0x1df + 1, w24);
8162  ASSERT_EQUAL_32(0xffff37bd + 1, w25);
8163  ASSERT_EQUAL_32(0x9abcdef1 + 1, w26);
8164
8165  // Check that adc correctly sets the condition flags.
8166  START();
8167  __ Mov(x0, 0xff);
8168  __ Mov(x1, 0xffffffffffffffff);
8169  // Clear the C flag.
8170  __ Adds(x0, x0, Operand(0));
8171  __ Adcs(x10, x0, Operand(x1, SXTX, 1));
8172  END();
8173
8174  RUN();
8175
8176  ASSERT_EQUAL_NZCV(CFlag);
8177
8178  START();
8179  __ Mov(x0, 0x7fffffffffffffff);
8180  __ Mov(x1, 1);
8181  // Clear the C flag.
8182  __ Adds(x0, x0, Operand(0));
8183  __ Adcs(x10, x0, Operand(x1, UXTB, 2));
8184  END();
8185
8186  RUN();
8187
8188  ASSERT_EQUAL_NZCV(NVFlag);
8189
8190  START();
8191  __ Mov(x0, 0x7fffffffffffffff);
8192  // Clear the C flag.
8193  __ Adds(x0, x0, Operand(0));
8194  __ Adcs(x10, x0, Operand(1));
8195  END();
8196
8197  RUN();
8198
8199  ASSERT_EQUAL_NZCV(NVFlag);
8200
8201  TEARDOWN();
8202}
8203
8204
8205TEST(adc_sbc_wide_imm) {
8206  SETUP();
8207
8208  START();
8209  __ Mov(x0, 0);
8210
8211  // Clear the C flag.
8212  __ Adds(x0, x0, Operand(0));
8213
8214  __ Adc(x7, x0, Operand(0x1234567890abcdef));
8215  __ Adc(w8, w0, Operand(0xffffffff));
8216  __ Sbc(x9, x0, Operand(0x1234567890abcdef));
8217  __ Sbc(w10, w0, Operand(0xffffffff));
8218  __ Ngc(x11, Operand(0xffffffff00000000));
8219  __ Ngc(w12, Operand(0xffff0000));
8220
8221  // Set the C flag.
8222  __ Cmp(w0, Operand(w0));
8223
8224  __ Adc(x18, x0, Operand(0x1234567890abcdef));
8225  __ Adc(w19, w0, Operand(0xffffffff));
8226  __ Sbc(x20, x0, Operand(0x1234567890abcdef));
8227  __ Sbc(w21, w0, Operand(0xffffffff));
8228  __ Ngc(x22, Operand(0xffffffff00000000));
8229  __ Ngc(w23, Operand(0xffff0000));
8230  END();
8231
8232  RUN();
8233
8234  ASSERT_EQUAL_64(0x1234567890abcdef, x7);
8235  ASSERT_EQUAL_64(0xffffffff, x8);
8236  ASSERT_EQUAL_64(0xedcba9876f543210, x9);
8237  ASSERT_EQUAL_64(0, x10);
8238  ASSERT_EQUAL_64(0xffffffff, x11);
8239  ASSERT_EQUAL_64(0xffff, x12);
8240
8241  ASSERT_EQUAL_64(0x1234567890abcdef + 1, x18);
8242  ASSERT_EQUAL_64(0, x19);
8243  ASSERT_EQUAL_64(0xedcba9876f543211, x20);
8244  ASSERT_EQUAL_64(1, x21);
8245  ASSERT_EQUAL_64(0x0000000100000000, x22);
8246  ASSERT_EQUAL_64(0x0000000000010000, x23);
8247
8248  TEARDOWN();
8249}
8250
8251TEST(flags) {
8252  SETUP();
8253
8254  START();
8255  __ Mov(x0, 0);
8256  __ Mov(x1, 0x1111111111111111);
8257  __ Neg(x10, Operand(x0));
8258  __ Neg(x11, Operand(x1));
8259  __ Neg(w12, Operand(w1));
8260  // Clear the C flag.
8261  __ Adds(x0, x0, Operand(0));
8262  __ Ngc(x13, Operand(x0));
8263  // Set the C flag.
8264  __ Cmp(x0, Operand(x0));
8265  __ Ngc(w14, Operand(w0));
8266  END();
8267
8268  RUN();
8269
8270  ASSERT_EQUAL_64(0, x10);
8271  ASSERT_EQUAL_64(-0x1111111111111111, x11);
8272  ASSERT_EQUAL_32(-0x11111111, w12);
8273  ASSERT_EQUAL_64(-1, x13);
8274  ASSERT_EQUAL_32(0, w14);
8275
8276  START();
8277  __ Mov(x0, 0);
8278  __ Cmp(x0, Operand(x0));
8279  END();
8280
8281  RUN();
8282
8283  ASSERT_EQUAL_NZCV(ZCFlag);
8284
8285  START();
8286  __ Mov(w0, 0);
8287  __ Cmp(w0, Operand(w0));
8288  END();
8289
8290  RUN();
8291
8292  ASSERT_EQUAL_NZCV(ZCFlag);
8293
8294  START();
8295  __ Mov(x0, 0);
8296  __ Mov(x1, 0x1111111111111111);
8297  __ Cmp(x0, Operand(x1));
8298  END();
8299
8300  RUN();
8301
8302  ASSERT_EQUAL_NZCV(NFlag);
8303
8304  START();
8305  __ Mov(w0, 0);
8306  __ Mov(w1, 0x11111111);
8307  __ Cmp(w0, Operand(w1));
8308  END();
8309
8310  RUN();
8311
8312  ASSERT_EQUAL_NZCV(NFlag);
8313
8314  START();
8315  __ Mov(x1, 0x1111111111111111);
8316  __ Cmp(x1, Operand(0));
8317  END();
8318
8319  RUN();
8320
8321  ASSERT_EQUAL_NZCV(CFlag);
8322
8323  START();
8324  __ Mov(w1, 0x11111111);
8325  __ Cmp(w1, Operand(0));
8326  END();
8327
8328  RUN();
8329
8330  ASSERT_EQUAL_NZCV(CFlag);
8331
8332  START();
8333  __ Mov(x0, 1);
8334  __ Mov(x1, 0x7fffffffffffffff);
8335  __ Cmn(x1, Operand(x0));
8336  END();
8337
8338  RUN();
8339
8340  ASSERT_EQUAL_NZCV(NVFlag);
8341
8342  START();
8343  __ Mov(w0, 1);
8344  __ Mov(w1, 0x7fffffff);
8345  __ Cmn(w1, Operand(w0));
8346  END();
8347
8348  RUN();
8349
8350  ASSERT_EQUAL_NZCV(NVFlag);
8351
8352  START();
8353  __ Mov(x0, 1);
8354  __ Mov(x1, 0xffffffffffffffff);
8355  __ Cmn(x1, Operand(x0));
8356  END();
8357
8358  RUN();
8359
8360  ASSERT_EQUAL_NZCV(ZCFlag);
8361
8362  START();
8363  __ Mov(w0, 1);
8364  __ Mov(w1, 0xffffffff);
8365  __ Cmn(w1, Operand(w0));
8366  END();
8367
8368  RUN();
8369
8370  ASSERT_EQUAL_NZCV(ZCFlag);
8371
8372  START();
8373  __ Mov(w0, 0);
8374  __ Mov(w1, 1);
8375  // Clear the C flag.
8376  __ Adds(w0, w0, Operand(0));
8377  __ Ngcs(w0, Operand(w1));
8378  END();
8379
8380  RUN();
8381
8382  ASSERT_EQUAL_NZCV(NFlag);
8383
8384  START();
8385  __ Mov(w0, 0);
8386  __ Mov(w1, 0);
8387  // Set the C flag.
8388  __ Cmp(w0, Operand(w0));
8389  __ Ngcs(w0, Operand(w1));
8390  END();
8391
8392  RUN();
8393
8394  ASSERT_EQUAL_NZCV(ZCFlag);
8395
8396  TEARDOWN();
8397}
8398
8399
8400TEST(cmp_shift) {
8401  SETUP();
8402
8403  START();
8404  __ Mov(x18, 0xf0000000);
8405  __ Mov(x19, 0xf000000010000000);
8406  __ Mov(x20, 0xf0000000f0000000);
8407  __ Mov(x21, 0x7800000078000000);
8408  __ Mov(x22, 0x3c0000003c000000);
8409  __ Mov(x23, 0x8000000780000000);
8410  __ Mov(x24, 0x0000000f00000000);
8411  __ Mov(x25, 0x00000003c0000000);
8412  __ Mov(x26, 0x8000000780000000);
8413  __ Mov(x27, 0xc0000003);
8414
8415  __ Cmp(w20, Operand(w21, LSL, 1));
8416  __ Mrs(x0, NZCV);
8417
8418  __ Cmp(x20, Operand(x22, LSL, 2));
8419  __ Mrs(x1, NZCV);
8420
8421  __ Cmp(w19, Operand(w23, LSR, 3));
8422  __ Mrs(x2, NZCV);
8423
8424  __ Cmp(x18, Operand(x24, LSR, 4));
8425  __ Mrs(x3, NZCV);
8426
8427  __ Cmp(w20, Operand(w25, ASR, 2));
8428  __ Mrs(x4, NZCV);
8429
8430  __ Cmp(x20, Operand(x26, ASR, 3));
8431  __ Mrs(x5, NZCV);
8432
8433  __ Cmp(w27, Operand(w22, ROR, 28));
8434  __ Mrs(x6, NZCV);
8435
8436  __ Cmp(x20, Operand(x21, ROR, 31));
8437  __ Mrs(x7, NZCV);
8438  END();
8439
8440  RUN();
8441
8442  ASSERT_EQUAL_32(ZCFlag, w0);
8443  ASSERT_EQUAL_32(ZCFlag, w1);
8444  ASSERT_EQUAL_32(ZCFlag, w2);
8445  ASSERT_EQUAL_32(ZCFlag, w3);
8446  ASSERT_EQUAL_32(ZCFlag, w4);
8447  ASSERT_EQUAL_32(ZCFlag, w5);
8448  ASSERT_EQUAL_32(ZCFlag, w6);
8449  ASSERT_EQUAL_32(ZCFlag, w7);
8450
8451  TEARDOWN();
8452}
8453
8454
8455TEST(cmp_extend) {
8456  SETUP();
8457
8458  START();
8459  __ Mov(w20, 0x2);
8460  __ Mov(w21, 0x1);
8461  __ Mov(x22, 0xffffffffffffffff);
8462  __ Mov(x23, 0xff);
8463  __ Mov(x24, 0xfffffffffffffffe);
8464  __ Mov(x25, 0xffff);
8465  __ Mov(x26, 0xffffffff);
8466
8467  __ Cmp(w20, Operand(w21, LSL, 1));
8468  __ Mrs(x0, NZCV);
8469
8470  __ Cmp(x22, Operand(x23, SXTB, 0));
8471  __ Mrs(x1, NZCV);
8472
8473  __ Cmp(x24, Operand(x23, SXTB, 1));
8474  __ Mrs(x2, NZCV);
8475
8476  __ Cmp(x24, Operand(x23, UXTB, 1));
8477  __ Mrs(x3, NZCV);
8478
8479  __ Cmp(w22, Operand(w25, UXTH));
8480  __ Mrs(x4, NZCV);
8481
8482  __ Cmp(x22, Operand(x25, SXTH));
8483  __ Mrs(x5, NZCV);
8484
8485  __ Cmp(x22, Operand(x26, UXTW));
8486  __ Mrs(x6, NZCV);
8487
8488  __ Cmp(x24, Operand(x26, SXTW, 1));
8489  __ Mrs(x7, NZCV);
8490  END();
8491
8492  RUN();
8493
8494  ASSERT_EQUAL_32(ZCFlag, w0);
8495  ASSERT_EQUAL_32(ZCFlag, w1);
8496  ASSERT_EQUAL_32(ZCFlag, w2);
8497  ASSERT_EQUAL_32(NCFlag, w3);
8498  ASSERT_EQUAL_32(NCFlag, w4);
8499  ASSERT_EQUAL_32(ZCFlag, w5);
8500  ASSERT_EQUAL_32(NCFlag, w6);
8501  ASSERT_EQUAL_32(ZCFlag, w7);
8502
8503  TEARDOWN();
8504}
8505
8506
8507TEST(ccmp) {
8508  SETUP();
8509  ALLOW_ASM();
8510
8511  START();
8512  __ Mov(w16, 0);
8513  __ Mov(w17, 1);
8514  __ Cmp(w16, w16);
8515  __ Ccmp(w16, w17, NCFlag, eq);
8516  __ Mrs(x0, NZCV);
8517
8518  __ Cmp(w16, w16);
8519  __ Ccmp(w16, w17, NCFlag, ne);
8520  __ Mrs(x1, NZCV);
8521
8522  __ Cmp(x16, x16);
8523  __ Ccmn(x16, 2, NZCVFlag, eq);
8524  __ Mrs(x2, NZCV);
8525
8526  __ Cmp(x16, x16);
8527  __ Ccmn(x16, 2, NZCVFlag, ne);
8528  __ Mrs(x3, NZCV);
8529
8530  // The MacroAssembler does not allow al as a condition.
8531  __ ccmp(x16, x16, NZCVFlag, al);
8532  __ Mrs(x4, NZCV);
8533
8534  // The MacroAssembler does not allow nv as a condition.
8535  __ ccmp(x16, x16, NZCVFlag, nv);
8536  __ Mrs(x5, NZCV);
8537
8538  END();
8539
8540  RUN();
8541
8542  ASSERT_EQUAL_32(NFlag, w0);
8543  ASSERT_EQUAL_32(NCFlag, w1);
8544  ASSERT_EQUAL_32(NoFlag, w2);
8545  ASSERT_EQUAL_32(NZCVFlag, w3);
8546  ASSERT_EQUAL_32(ZCFlag, w4);
8547  ASSERT_EQUAL_32(ZCFlag, w5);
8548
8549  TEARDOWN();
8550}
8551
8552
8553TEST(ccmp_wide_imm) {
8554  SETUP();
8555
8556  START();
8557  __ Mov(w20, 0);
8558
8559  __ Cmp(w20, Operand(w20));
8560  __ Ccmp(w20, Operand(0x12345678), NZCVFlag, eq);
8561  __ Mrs(x0, NZCV);
8562
8563  __ Cmp(w20, Operand(w20));
8564  __ Ccmp(x20, Operand(0xffffffffffffffff), NZCVFlag, eq);
8565  __ Mrs(x1, NZCV);
8566  END();
8567
8568  RUN();
8569
8570  ASSERT_EQUAL_32(NFlag, w0);
8571  ASSERT_EQUAL_32(NoFlag, w1);
8572
8573  TEARDOWN();
8574}
8575
8576
8577TEST(ccmp_shift_extend) {
8578  SETUP();
8579
8580  START();
8581  __ Mov(w20, 0x2);
8582  __ Mov(w21, 0x1);
8583  __ Mov(x22, 0xffffffffffffffff);
8584  __ Mov(x23, 0xff);
8585  __ Mov(x24, 0xfffffffffffffffe);
8586
8587  __ Cmp(w20, Operand(w20));
8588  __ Ccmp(w20, Operand(w21, LSL, 1), NZCVFlag, eq);
8589  __ Mrs(x0, NZCV);
8590
8591  __ Cmp(w20, Operand(w20));
8592  __ Ccmp(x22, Operand(x23, SXTB, 0), NZCVFlag, eq);
8593  __ Mrs(x1, NZCV);
8594
8595  __ Cmp(w20, Operand(w20));
8596  __ Ccmp(x24, Operand(x23, SXTB, 1), NZCVFlag, eq);
8597  __ Mrs(x2, NZCV);
8598
8599  __ Cmp(w20, Operand(w20));
8600  __ Ccmp(x24, Operand(x23, UXTB, 1), NZCVFlag, eq);
8601  __ Mrs(x3, NZCV);
8602
8603  __ Cmp(w20, Operand(w20));
8604  __ Ccmp(x24, Operand(x23, UXTB, 1), NZCVFlag, ne);
8605  __ Mrs(x4, NZCV);
8606  END();
8607
8608  RUN();
8609
8610  ASSERT_EQUAL_32(ZCFlag, w0);
8611  ASSERT_EQUAL_32(ZCFlag, w1);
8612  ASSERT_EQUAL_32(ZCFlag, w2);
8613  ASSERT_EQUAL_32(NCFlag, w3);
8614  ASSERT_EQUAL_32(NZCVFlag, w4);
8615
8616  TEARDOWN();
8617}
8618
8619
8620TEST(csel) {
8621  SETUP();
8622  ALLOW_ASM();
8623
8624  START();
8625  __ Mov(x16, 0);
8626  __ Mov(x24, 0x0000000f0000000f);
8627  __ Mov(x25, 0x0000001f0000001f);
8628
8629  __ Cmp(w16, Operand(0));
8630  __ Csel(w0, w24, w25, eq);
8631  __ Csel(w1, w24, w25, ne);
8632  __ Csinc(w2, w24, w25, mi);
8633  __ Csinc(w3, w24, w25, pl);
8634
8635  // The MacroAssembler does not allow al or nv as a condition.
8636  __ csel(w13, w24, w25, al);
8637  __ csel(x14, x24, x25, nv);
8638
8639  __ Cmp(x16, Operand(1));
8640  __ Csinv(x4, x24, x25, gt);
8641  __ Csinv(x5, x24, x25, le);
8642  __ Csneg(x6, x24, x25, hs);
8643  __ Csneg(x7, x24, x25, lo);
8644
8645  __ Cset(w8, ne);
8646  __ Csetm(w9, ne);
8647  __ Cinc(x10, x25, ne);
8648  __ Cinv(x11, x24, ne);
8649  __ Cneg(x12, x24, ne);
8650
8651  // The MacroAssembler does not allow al or nv as a condition.
8652  __ csel(w15, w24, w25, al);
8653  __ csel(x17, x24, x25, nv);
8654
8655  END();
8656
8657  RUN();
8658
8659  ASSERT_EQUAL_64(0x0000000f, x0);
8660  ASSERT_EQUAL_64(0x0000001f, x1);
8661  ASSERT_EQUAL_64(0x00000020, x2);
8662  ASSERT_EQUAL_64(0x0000000f, x3);
8663  ASSERT_EQUAL_64(0xffffffe0ffffffe0, x4);
8664  ASSERT_EQUAL_64(0x0000000f0000000f, x5);
8665  ASSERT_EQUAL_64(0xffffffe0ffffffe1, x6);
8666  ASSERT_EQUAL_64(0x0000000f0000000f, x7);
8667  ASSERT_EQUAL_64(0x00000001, x8);
8668  ASSERT_EQUAL_64(0xffffffff, x9);
8669  ASSERT_EQUAL_64(0x0000001f00000020, x10);
8670  ASSERT_EQUAL_64(0xfffffff0fffffff0, x11);
8671  ASSERT_EQUAL_64(0xfffffff0fffffff1, x12);
8672  ASSERT_EQUAL_64(0x0000000f, x13);
8673  ASSERT_EQUAL_64(0x0000000f0000000f, x14);
8674  ASSERT_EQUAL_64(0x0000000f, x15);
8675  ASSERT_EQUAL_64(0x0000000f0000000f, x17);
8676
8677  TEARDOWN();
8678}
8679
8680
8681TEST(csel_imm) {
8682  SETUP();
8683
8684  START();
8685  __ Mov(x18, 0);
8686  __ Mov(x19, 0x80000000);
8687  __ Mov(x20, 0x8000000000000000);
8688
8689  __ Cmp(x18, Operand(0));
8690  __ Csel(w0, w19, -2, ne);
8691  __ Csel(w1, w19, -1, ne);
8692  __ Csel(w2, w19, 0, ne);
8693  __ Csel(w3, w19, 1, ne);
8694  __ Csel(w4, w19, 2, ne);
8695  __ Csel(w5, w19, Operand(w19, ASR, 31), ne);
8696  __ Csel(w6, w19, Operand(w19, ROR, 1), ne);
8697  __ Csel(w7, w19, 3, eq);
8698
8699  __ Csel(x8, x20, -2, ne);
8700  __ Csel(x9, x20, -1, ne);
8701  __ Csel(x10, x20, 0, ne);
8702  __ Csel(x11, x20, 1, ne);
8703  __ Csel(x12, x20, 2, ne);
8704  __ Csel(x13, x20, Operand(x20, ASR, 63), ne);
8705  __ Csel(x14, x20, Operand(x20, ROR, 1), ne);
8706  __ Csel(x15, x20, 3, eq);
8707
8708  END();
8709
8710  RUN();
8711
8712  ASSERT_EQUAL_32(-2, w0);
8713  ASSERT_EQUAL_32(-1, w1);
8714  ASSERT_EQUAL_32(0, w2);
8715  ASSERT_EQUAL_32(1, w3);
8716  ASSERT_EQUAL_32(2, w4);
8717  ASSERT_EQUAL_32(-1, w5);
8718  ASSERT_EQUAL_32(0x40000000, w6);
8719  ASSERT_EQUAL_32(0x80000000, w7);
8720
8721  ASSERT_EQUAL_64(-2, x8);
8722  ASSERT_EQUAL_64(-1, x9);
8723  ASSERT_EQUAL_64(0, x10);
8724  ASSERT_EQUAL_64(1, x11);
8725  ASSERT_EQUAL_64(2, x12);
8726  ASSERT_EQUAL_64(-1, x13);
8727  ASSERT_EQUAL_64(0x4000000000000000, x14);
8728  ASSERT_EQUAL_64(0x8000000000000000, x15);
8729
8730  TEARDOWN();
8731}
8732
8733
8734TEST(lslv) {
8735  SETUP();
8736  ALLOW_ASM();
8737
8738  uint64_t value = 0x0123456789abcdef;
8739  int shift[] = {1, 3, 5, 9, 17, 33};
8740
8741  START();
8742  __ Mov(x0, value);
8743  __ Mov(w1, shift[0]);
8744  __ Mov(w2, shift[1]);
8745  __ Mov(w3, shift[2]);
8746  __ Mov(w4, shift[3]);
8747  __ Mov(w5, shift[4]);
8748  __ Mov(w6, shift[5]);
8749
8750  // The MacroAssembler does not allow zr as an argument.
8751  __ lslv(x0, x0, xzr);
8752
8753  __ Lsl(x16, x0, x1);
8754  __ Lsl(x17, x0, x2);
8755  __ Lsl(x18, x0, x3);
8756  __ Lsl(x19, x0, x4);
8757  __ Lsl(x20, x0, x5);
8758  __ Lsl(x21, x0, x6);
8759
8760  __ Lsl(w22, w0, w1);
8761  __ Lsl(w23, w0, w2);
8762  __ Lsl(w24, w0, w3);
8763  __ Lsl(w25, w0, w4);
8764  __ Lsl(w26, w0, w5);
8765  __ Lsl(w27, w0, w6);
8766  END();
8767
8768  RUN();
8769
8770  ASSERT_EQUAL_64(value, x0);
8771  ASSERT_EQUAL_64(value << (shift[0] & 63), x16);
8772  ASSERT_EQUAL_64(value << (shift[1] & 63), x17);
8773  ASSERT_EQUAL_64(value << (shift[2] & 63), x18);
8774  ASSERT_EQUAL_64(value << (shift[3] & 63), x19);
8775  ASSERT_EQUAL_64(value << (shift[4] & 63), x20);
8776  ASSERT_EQUAL_64(value << (shift[5] & 63), x21);
8777  ASSERT_EQUAL_32(value << (shift[0] & 31), w22);
8778  ASSERT_EQUAL_32(value << (shift[1] & 31), w23);
8779  ASSERT_EQUAL_32(value << (shift[2] & 31), w24);
8780  ASSERT_EQUAL_32(value << (shift[3] & 31), w25);
8781  ASSERT_EQUAL_32(value << (shift[4] & 31), w26);
8782  ASSERT_EQUAL_32(value << (shift[5] & 31), w27);
8783
8784  TEARDOWN();
8785}
8786
8787
8788TEST(lsrv) {
8789  SETUP();
8790  ALLOW_ASM();
8791
8792  uint64_t value = 0x0123456789abcdef;
8793  int shift[] = {1, 3, 5, 9, 17, 33};
8794
8795  START();
8796  __ Mov(x0, value);
8797  __ Mov(w1, shift[0]);
8798  __ Mov(w2, shift[1]);
8799  __ Mov(w3, shift[2]);
8800  __ Mov(w4, shift[3]);
8801  __ Mov(w5, shift[4]);
8802  __ Mov(w6, shift[5]);
8803
8804  // The MacroAssembler does not allow zr as an argument.
8805  __ lsrv(x0, x0, xzr);
8806
8807  __ Lsr(x16, x0, x1);
8808  __ Lsr(x17, x0, x2);
8809  __ Lsr(x18, x0, x3);
8810  __ Lsr(x19, x0, x4);
8811  __ Lsr(x20, x0, x5);
8812  __ Lsr(x21, x0, x6);
8813
8814  __ Lsr(w22, w0, w1);
8815  __ Lsr(w23, w0, w2);
8816  __ Lsr(w24, w0, w3);
8817  __ Lsr(w25, w0, w4);
8818  __ Lsr(w26, w0, w5);
8819  __ Lsr(w27, w0, w6);
8820  END();
8821
8822  RUN();
8823
8824  ASSERT_EQUAL_64(value, x0);
8825  ASSERT_EQUAL_64(value >> (shift[0] & 63), x16);
8826  ASSERT_EQUAL_64(value >> (shift[1] & 63), x17);
8827  ASSERT_EQUAL_64(value >> (shift[2] & 63), x18);
8828  ASSERT_EQUAL_64(value >> (shift[3] & 63), x19);
8829  ASSERT_EQUAL_64(value >> (shift[4] & 63), x20);
8830  ASSERT_EQUAL_64(value >> (shift[5] & 63), x21);
8831
8832  value &= 0xffffffff;
8833  ASSERT_EQUAL_32(value >> (shift[0] & 31), w22);
8834  ASSERT_EQUAL_32(value >> (shift[1] & 31), w23);
8835  ASSERT_EQUAL_32(value >> (shift[2] & 31), w24);
8836  ASSERT_EQUAL_32(value >> (shift[3] & 31), w25);
8837  ASSERT_EQUAL_32(value >> (shift[4] & 31), w26);
8838  ASSERT_EQUAL_32(value >> (shift[5] & 31), w27);
8839
8840  TEARDOWN();
8841}
8842
8843
8844TEST(asrv) {
8845  SETUP();
8846  ALLOW_ASM();
8847
8848  int64_t value = 0xfedcba98fedcba98;
8849  int shift[] = {1, 3, 5, 9, 17, 33};
8850
8851  START();
8852  __ Mov(x0, value);
8853  __ Mov(w1, shift[0]);
8854  __ Mov(w2, shift[1]);
8855  __ Mov(w3, shift[2]);
8856  __ Mov(w4, shift[3]);
8857  __ Mov(w5, shift[4]);
8858  __ Mov(w6, shift[5]);
8859
8860  // The MacroAssembler does not allow zr as an argument.
8861  __ asrv(x0, x0, xzr);
8862
8863  __ Asr(x16, x0, x1);
8864  __ Asr(x17, x0, x2);
8865  __ Asr(x18, x0, x3);
8866  __ Asr(x19, x0, x4);
8867  __ Asr(x20, x0, x5);
8868  __ Asr(x21, x0, x6);
8869
8870  __ Asr(w22, w0, w1);
8871  __ Asr(w23, w0, w2);
8872  __ Asr(w24, w0, w3);
8873  __ Asr(w25, w0, w4);
8874  __ Asr(w26, w0, w5);
8875  __ Asr(w27, w0, w6);
8876  END();
8877
8878  RUN();
8879
8880  ASSERT_EQUAL_64(value, x0);
8881  ASSERT_EQUAL_64(value >> (shift[0] & 63), x16);
8882  ASSERT_EQUAL_64(value >> (shift[1] & 63), x17);
8883  ASSERT_EQUAL_64(value >> (shift[2] & 63), x18);
8884  ASSERT_EQUAL_64(value >> (shift[3] & 63), x19);
8885  ASSERT_EQUAL_64(value >> (shift[4] & 63), x20);
8886  ASSERT_EQUAL_64(value >> (shift[5] & 63), x21);
8887
8888  int32_t value32 = static_cast<int32_t>(value & 0xffffffff);
8889  ASSERT_EQUAL_32(value32 >> (shift[0] & 31), w22);
8890  ASSERT_EQUAL_32(value32 >> (shift[1] & 31), w23);
8891  ASSERT_EQUAL_32(value32 >> (shift[2] & 31), w24);
8892  ASSERT_EQUAL_32(value32 >> (shift[3] & 31), w25);
8893  ASSERT_EQUAL_32(value32 >> (shift[4] & 31), w26);
8894  ASSERT_EQUAL_32(value32 >> (shift[5] & 31), w27);
8895
8896  TEARDOWN();
8897}
8898
8899
8900TEST(rorv) {
8901  SETUP();
8902  ALLOW_ASM();
8903
8904  uint64_t value = 0x0123456789abcdef;
8905  int shift[] = {4, 8, 12, 16, 24, 36};
8906
8907  START();
8908  __ Mov(x0, value);
8909  __ Mov(w1, shift[0]);
8910  __ Mov(w2, shift[1]);
8911  __ Mov(w3, shift[2]);
8912  __ Mov(w4, shift[3]);
8913  __ Mov(w5, shift[4]);
8914  __ Mov(w6, shift[5]);
8915
8916  // The MacroAssembler does not allow zr as an argument.
8917  __ rorv(x0, x0, xzr);
8918
8919  __ Ror(x16, x0, x1);
8920  __ Ror(x17, x0, x2);
8921  __ Ror(x18, x0, x3);
8922  __ Ror(x19, x0, x4);
8923  __ Ror(x20, x0, x5);
8924  __ Ror(x21, x0, x6);
8925
8926  __ Ror(w22, w0, w1);
8927  __ Ror(w23, w0, w2);
8928  __ Ror(w24, w0, w3);
8929  __ Ror(w25, w0, w4);
8930  __ Ror(w26, w0, w5);
8931  __ Ror(w27, w0, w6);
8932  END();
8933
8934  RUN();
8935
8936  ASSERT_EQUAL_64(value, x0);
8937  ASSERT_EQUAL_64(0xf0123456789abcde, x16);
8938  ASSERT_EQUAL_64(0xef0123456789abcd, x17);
8939  ASSERT_EQUAL_64(0xdef0123456789abc, x18);
8940  ASSERT_EQUAL_64(0xcdef0123456789ab, x19);
8941  ASSERT_EQUAL_64(0xabcdef0123456789, x20);
8942  ASSERT_EQUAL_64(0x789abcdef0123456, x21);
8943  ASSERT_EQUAL_32(0xf89abcde, w22);
8944  ASSERT_EQUAL_32(0xef89abcd, w23);
8945  ASSERT_EQUAL_32(0xdef89abc, w24);
8946  ASSERT_EQUAL_32(0xcdef89ab, w25);
8947  ASSERT_EQUAL_32(0xabcdef89, w26);
8948  ASSERT_EQUAL_32(0xf89abcde, w27);
8949
8950  TEARDOWN();
8951}
8952
8953
8954TEST(bfm) {
8955  SETUP();
8956  ALLOW_ASM();
8957
8958  START();
8959  __ Mov(x1, 0x0123456789abcdef);
8960
8961  __ Mov(x10, 0x8888888888888888);
8962  __ Mov(x11, 0x8888888888888888);
8963  __ Mov(x12, 0x8888888888888888);
8964  __ Mov(x13, 0x8888888888888888);
8965  __ Mov(w20, 0x88888888);
8966  __ Mov(w21, 0x88888888);
8967
8968  // There are no macro instruction for bfm.
8969  __ Bfm(x10, x1, 16, 31);
8970  __ Bfm(x11, x1, 32, 15);
8971
8972  __ Bfm(w20, w1, 16, 23);
8973  __ Bfm(w21, w1, 24, 15);
8974
8975  // Aliases.
8976  __ Bfi(x12, x1, 16, 8);
8977  __ Bfxil(x13, x1, 16, 8);
8978  END();
8979
8980  RUN();
8981
8982
8983  ASSERT_EQUAL_64(0x88888888888889ab, x10);
8984  ASSERT_EQUAL_64(0x8888cdef88888888, x11);
8985
8986  ASSERT_EQUAL_32(0x888888ab, w20);
8987  ASSERT_EQUAL_32(0x88cdef88, w21);
8988
8989  ASSERT_EQUAL_64(0x8888888888ef8888, x12);
8990  ASSERT_EQUAL_64(0x88888888888888ab, x13);
8991
8992  TEARDOWN();
8993}
8994
8995
8996TEST(sbfm) {
8997  SETUP();
8998  ALLOW_ASM();
8999
9000  START();
9001  __ Mov(x1, 0x0123456789abcdef);
9002  __ Mov(x2, 0xfedcba9876543210);
9003
9004  // There are no macro instruction for sbfm.
9005  __ Sbfm(x10, x1, 16, 31);
9006  __ Sbfm(x11, x1, 32, 15);
9007  __ Sbfm(x12, x1, 32, 47);
9008  __ Sbfm(x13, x1, 48, 35);
9009
9010  __ Sbfm(w14, w1, 16, 23);
9011  __ Sbfm(w15, w1, 24, 15);
9012  __ Sbfm(w16, w2, 16, 23);
9013  __ Sbfm(w17, w2, 24, 15);
9014
9015  // Aliases.
9016  __ Asr(x18, x1, 32);
9017  __ Asr(x19, x2, 32);
9018  __ Sbfiz(x20, x1, 8, 16);
9019  __ Sbfiz(x21, x2, 8, 16);
9020  __ Sbfx(x22, x1, 8, 16);
9021  __ Sbfx(x23, x2, 8, 16);
9022  __ Sxtb(x24, w1);
9023  __ Sxtb(x25, x2);
9024  __ Sxth(x26, w1);
9025  __ Sxth(x27, x2);
9026  __ Sxtw(x28, w1);
9027  __ Sxtw(x29, x2);
9028  END();
9029
9030  RUN();
9031
9032
9033  ASSERT_EQUAL_64(0xffffffffffff89ab, x10);
9034  ASSERT_EQUAL_64(0xffffcdef00000000, x11);
9035  ASSERT_EQUAL_64(0x0000000000004567, x12);
9036  ASSERT_EQUAL_64(0x000789abcdef0000, x13);
9037
9038  ASSERT_EQUAL_32(0xffffffab, w14);
9039  ASSERT_EQUAL_32(0xffcdef00, w15);
9040  ASSERT_EQUAL_32(0x00000054, w16);
9041  ASSERT_EQUAL_32(0x00321000, w17);
9042
9043  ASSERT_EQUAL_64(0x0000000001234567, x18);
9044  ASSERT_EQUAL_64(0xfffffffffedcba98, x19);
9045  ASSERT_EQUAL_64(0xffffffffffcdef00, x20);
9046  ASSERT_EQUAL_64(0x0000000000321000, x21);
9047  ASSERT_EQUAL_64(0xffffffffffffabcd, x22);
9048  ASSERT_EQUAL_64(0x0000000000005432, x23);
9049  ASSERT_EQUAL_64(0xffffffffffffffef, x24);
9050  ASSERT_EQUAL_64(0x0000000000000010, x25);
9051  ASSERT_EQUAL_64(0xffffffffffffcdef, x26);
9052  ASSERT_EQUAL_64(0x0000000000003210, x27);
9053  ASSERT_EQUAL_64(0xffffffff89abcdef, x28);
9054  ASSERT_EQUAL_64(0x0000000076543210, x29);
9055
9056  TEARDOWN();
9057}
9058
9059
9060TEST(ubfm) {
9061  SETUP();
9062  ALLOW_ASM();
9063
9064  START();
9065  __ Mov(x1, 0x0123456789abcdef);
9066  __ Mov(x2, 0xfedcba9876543210);
9067
9068  __ Mov(x10, 0x8888888888888888);
9069  __ Mov(x11, 0x8888888888888888);
9070
9071  // There are no macro instruction for ubfm.
9072  __ Ubfm(x10, x1, 16, 31);
9073  __ Ubfm(x11, x1, 32, 15);
9074  __ Ubfm(x12, x1, 32, 47);
9075  __ Ubfm(x13, x1, 48, 35);
9076
9077  __ Ubfm(w25, w1, 16, 23);
9078  __ Ubfm(w26, w1, 24, 15);
9079  __ Ubfm(w27, w2, 16, 23);
9080  __ Ubfm(w28, w2, 24, 15);
9081
9082  // Aliases
9083  __ Lsl(x15, x1, 63);
9084  __ Lsl(x16, x1, 0);
9085  __ Lsr(x17, x1, 32);
9086  __ Ubfiz(x18, x1, 8, 16);
9087  __ Ubfx(x19, x1, 8, 16);
9088  __ Uxtb(x20, x1);
9089  __ Uxth(x21, x1);
9090  __ Uxtw(x22, x1);
9091  END();
9092
9093  RUN();
9094
9095  ASSERT_EQUAL_64(0x00000000000089ab, x10);
9096  ASSERT_EQUAL_64(0x0000cdef00000000, x11);
9097  ASSERT_EQUAL_64(0x0000000000004567, x12);
9098  ASSERT_EQUAL_64(0x000789abcdef0000, x13);
9099
9100  ASSERT_EQUAL_32(0x000000ab, w25);
9101  ASSERT_EQUAL_32(0x00cdef00, w26);
9102  ASSERT_EQUAL_32(0x00000054, w27);
9103  ASSERT_EQUAL_32(0x00321000, w28);
9104
9105  ASSERT_EQUAL_64(0x8000000000000000, x15);
9106  ASSERT_EQUAL_64(0x0123456789abcdef, x16);
9107  ASSERT_EQUAL_64(0x0000000001234567, x17);
9108  ASSERT_EQUAL_64(0x0000000000cdef00, x18);
9109  ASSERT_EQUAL_64(0x000000000000abcd, x19);
9110  ASSERT_EQUAL_64(0x00000000000000ef, x20);
9111  ASSERT_EQUAL_64(0x000000000000cdef, x21);
9112  ASSERT_EQUAL_64(0x0000000089abcdef, x22);
9113
9114  TEARDOWN();
9115}
9116
9117
9118TEST(extr) {
9119  SETUP();
9120
9121  START();
9122  __ Mov(x1, 0x0123456789abcdef);
9123  __ Mov(x2, 0xfedcba9876543210);
9124
9125  __ Extr(w10, w1, w2, 0);
9126  __ Extr(w11, w1, w2, 1);
9127  __ Extr(x12, x2, x1, 2);
9128
9129  __ Ror(w13, w1, 0);
9130  __ Ror(w14, w2, 17);
9131  __ Ror(w15, w1, 31);
9132  __ Ror(x18, x2, 0);
9133  __ Ror(x19, x2, 1);
9134  __ Ror(x20, x1, 63);
9135  END();
9136
9137  RUN();
9138
9139  ASSERT_EQUAL_64(0x76543210, x10);
9140  ASSERT_EQUAL_64(0xbb2a1908, x11);
9141  ASSERT_EQUAL_64(0x0048d159e26af37b, x12);
9142  ASSERT_EQUAL_64(0x89abcdef, x13);
9143  ASSERT_EQUAL_64(0x19083b2a, x14);
9144  ASSERT_EQUAL_64(0x13579bdf, x15);
9145  ASSERT_EQUAL_64(0xfedcba9876543210, x18);
9146  ASSERT_EQUAL_64(0x7f6e5d4c3b2a1908, x19);
9147  ASSERT_EQUAL_64(0x02468acf13579bde, x20);
9148
9149  TEARDOWN();
9150}
9151
9152
9153TEST(fmov_imm) {
9154  SETUP();
9155
9156  START();
9157  __ Fmov(s11, 1.0);
9158  __ Fmov(d22, -13.0);
9159  __ Fmov(s1, 255.0);
9160  __ Fmov(d2, 12.34567);
9161  __ Fmov(s3, 0.0);
9162  __ Fmov(d4, 0.0);
9163  __ Fmov(s5, kFP32PositiveInfinity);
9164  __ Fmov(d6, kFP64NegativeInfinity);
9165  END();
9166
9167  RUN();
9168
9169  ASSERT_EQUAL_FP32(1.0, s11);
9170  ASSERT_EQUAL_FP64(-13.0, d22);
9171  ASSERT_EQUAL_FP32(255.0, s1);
9172  ASSERT_EQUAL_FP64(12.34567, d2);
9173  ASSERT_EQUAL_FP32(0.0, s3);
9174  ASSERT_EQUAL_FP64(0.0, d4);
9175  ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s5);
9176  ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d6);
9177
9178  TEARDOWN();
9179}
9180
9181
9182TEST(fmov_reg) {
9183  SETUP();
9184
9185  START();
9186  __ Fmov(s20, 1.0);
9187  __ Fmov(w10, s20);
9188  __ Fmov(s30, w10);
9189  __ Fmov(s5, s20);
9190  __ Fmov(d1, -13.0);
9191  __ Fmov(x1, d1);
9192  __ Fmov(d2, x1);
9193  __ Fmov(d4, d1);
9194  __ Fmov(d6, rawbits_to_double(0x0123456789abcdef));
9195  __ Fmov(s6, s6);
9196
9197  __ Fmov(d0, 0.0);
9198  __ Fmov(v0.D(), 1, x1);
9199  __ Fmov(x2, v0.D(), 1);
9200
9201  END();
9202
9203  RUN();
9204
9205  ASSERT_EQUAL_32(float_to_rawbits(1.0), w10);
9206  ASSERT_EQUAL_FP32(1.0, s30);
9207  ASSERT_EQUAL_FP32(1.0, s5);
9208  ASSERT_EQUAL_64(double_to_rawbits(-13.0), x1);
9209  ASSERT_EQUAL_FP64(-13.0, d2);
9210  ASSERT_EQUAL_FP64(-13.0, d4);
9211  ASSERT_EQUAL_FP32(rawbits_to_float(0x89abcdef), s6);
9212  ASSERT_EQUAL_128(double_to_rawbits(-13.0), 0x0000000000000000, q0);
9213  ASSERT_EQUAL_64(double_to_rawbits(-13.0), x2);
9214  TEARDOWN();
9215}
9216
9217
9218TEST(fadd) {
9219  SETUP();
9220
9221  START();
9222  __ Fmov(s14, -0.0f);
9223  __ Fmov(s15, kFP32PositiveInfinity);
9224  __ Fmov(s16, kFP32NegativeInfinity);
9225  __ Fmov(s17, 3.25f);
9226  __ Fmov(s18, 1.0f);
9227  __ Fmov(s19, 0.0f);
9228
9229  __ Fmov(d26, -0.0);
9230  __ Fmov(d27, kFP64PositiveInfinity);
9231  __ Fmov(d28, kFP64NegativeInfinity);
9232  __ Fmov(d29, 0.0);
9233  __ Fmov(d30, -2.0);
9234  __ Fmov(d31, 2.25);
9235
9236  __ Fadd(s0, s17, s18);
9237  __ Fadd(s1, s18, s19);
9238  __ Fadd(s2, s14, s18);
9239  __ Fadd(s3, s15, s18);
9240  __ Fadd(s4, s16, s18);
9241  __ Fadd(s5, s15, s16);
9242  __ Fadd(s6, s16, s15);
9243
9244  __ Fadd(d7, d30, d31);
9245  __ Fadd(d8, d29, d31);
9246  __ Fadd(d9, d26, d31);
9247  __ Fadd(d10, d27, d31);
9248  __ Fadd(d11, d28, d31);
9249  __ Fadd(d12, d27, d28);
9250  __ Fadd(d13, d28, d27);
9251  END();
9252
9253  RUN();
9254
9255  ASSERT_EQUAL_FP32(4.25, s0);
9256  ASSERT_EQUAL_FP32(1.0, s1);
9257  ASSERT_EQUAL_FP32(1.0, s2);
9258  ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s3);
9259  ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s4);
9260  ASSERT_EQUAL_FP32(kFP32DefaultNaN, s5);
9261  ASSERT_EQUAL_FP32(kFP32DefaultNaN, s6);
9262  ASSERT_EQUAL_FP64(0.25, d7);
9263  ASSERT_EQUAL_FP64(2.25, d8);
9264  ASSERT_EQUAL_FP64(2.25, d9);
9265  ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d10);
9266  ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d11);
9267  ASSERT_EQUAL_FP64(kFP64DefaultNaN, d12);
9268  ASSERT_EQUAL_FP64(kFP64DefaultNaN, d13);
9269
9270  TEARDOWN();
9271}
9272
9273
9274TEST(fsub) {
9275  SETUP();
9276
9277  START();
9278  __ Fmov(s14, -0.0f);
9279  __ Fmov(s15, kFP32PositiveInfinity);
9280  __ Fmov(s16, kFP32NegativeInfinity);
9281  __ Fmov(s17, 3.25f);
9282  __ Fmov(s18, 1.0f);
9283  __ Fmov(s19, 0.0f);
9284
9285  __ Fmov(d26, -0.0);
9286  __ Fmov(d27, kFP64PositiveInfinity);
9287  __ Fmov(d28, kFP64NegativeInfinity);
9288  __ Fmov(d29, 0.0);
9289  __ Fmov(d30, -2.0);
9290  __ Fmov(d31, 2.25);
9291
9292  __ Fsub(s0, s17, s18);
9293  __ Fsub(s1, s18, s19);
9294  __ Fsub(s2, s14, s18);
9295  __ Fsub(s3, s18, s15);
9296  __ Fsub(s4, s18, s16);
9297  __ Fsub(s5, s15, s15);
9298  __ Fsub(s6, s16, s16);
9299
9300  __ Fsub(d7, d30, d31);
9301  __ Fsub(d8, d29, d31);
9302  __ Fsub(d9, d26, d31);
9303  __ Fsub(d10, d31, d27);
9304  __ Fsub(d11, d31, d28);
9305  __ Fsub(d12, d27, d27);
9306  __ Fsub(d13, d28, d28);
9307  END();
9308
9309  RUN();
9310
9311  ASSERT_EQUAL_FP32(2.25, s0);
9312  ASSERT_EQUAL_FP32(1.0, s1);
9313  ASSERT_EQUAL_FP32(-1.0, s2);
9314  ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s3);
9315  ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s4);
9316  ASSERT_EQUAL_FP32(kFP32DefaultNaN, s5);
9317  ASSERT_EQUAL_FP32(kFP32DefaultNaN, s6);
9318  ASSERT_EQUAL_FP64(-4.25, d7);
9319  ASSERT_EQUAL_FP64(-2.25, d8);
9320  ASSERT_EQUAL_FP64(-2.25, d9);
9321  ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d10);
9322  ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d11);
9323  ASSERT_EQUAL_FP64(kFP64DefaultNaN, d12);
9324  ASSERT_EQUAL_FP64(kFP64DefaultNaN, d13);
9325
9326  TEARDOWN();
9327}
9328
9329
9330TEST(fmul) {
9331  SETUP();
9332
9333  START();
9334  __ Fmov(s14, -0.0f);
9335  __ Fmov(s15, kFP32PositiveInfinity);
9336  __ Fmov(s16, kFP32NegativeInfinity);
9337  __ Fmov(s17, 3.25f);
9338  __ Fmov(s18, 2.0f);
9339  __ Fmov(s19, 0.0f);
9340  __ Fmov(s20, -2.0f);
9341
9342  __ Fmov(d26, -0.0);
9343  __ Fmov(d27, kFP64PositiveInfinity);
9344  __ Fmov(d28, kFP64NegativeInfinity);
9345  __ Fmov(d29, 0.0);
9346  __ Fmov(d30, -2.0);
9347  __ Fmov(d31, 2.25);
9348
9349  __ Fmul(s0, s17, s18);
9350  __ Fmul(s1, s18, s19);
9351  __ Fmul(s2, s14, s14);
9352  __ Fmul(s3, s15, s20);
9353  __ Fmul(s4, s16, s20);
9354  __ Fmul(s5, s15, s19);
9355  __ Fmul(s6, s19, s16);
9356
9357  __ Fmul(d7, d30, d31);
9358  __ Fmul(d8, d29, d31);
9359  __ Fmul(d9, d26, d26);
9360  __ Fmul(d10, d27, d30);
9361  __ Fmul(d11, d28, d30);
9362  __ Fmul(d12, d27, d29);
9363  __ Fmul(d13, d29, d28);
9364  END();
9365
9366  RUN();
9367
9368  ASSERT_EQUAL_FP32(6.5, s0);
9369  ASSERT_EQUAL_FP32(0.0, s1);
9370  ASSERT_EQUAL_FP32(0.0, s2);
9371  ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s3);
9372  ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s4);
9373  ASSERT_EQUAL_FP32(kFP32DefaultNaN, s5);
9374  ASSERT_EQUAL_FP32(kFP32DefaultNaN, s6);
9375  ASSERT_EQUAL_FP64(-4.5, d7);
9376  ASSERT_EQUAL_FP64(0.0, d8);
9377  ASSERT_EQUAL_FP64(0.0, d9);
9378  ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d10);
9379  ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d11);
9380  ASSERT_EQUAL_FP64(kFP64DefaultNaN, d12);
9381  ASSERT_EQUAL_FP64(kFP64DefaultNaN, d13);
9382
9383  TEARDOWN();
9384}
9385
9386
9387static void FmaddFmsubHelper(double n, double m, double a,
9388                             double fmadd, double fmsub,
9389                             double fnmadd, double fnmsub) {
9390  SETUP();
9391  START();
9392
9393  __ Fmov(d0, n);
9394  __ Fmov(d1, m);
9395  __ Fmov(d2, a);
9396  __ Fmadd(d28, d0, d1, d2);
9397  __ Fmsub(d29, d0, d1, d2);
9398  __ Fnmadd(d30, d0, d1, d2);
9399  __ Fnmsub(d31, d0, d1, d2);
9400
9401  END();
9402  RUN();
9403
9404  ASSERT_EQUAL_FP64(fmadd, d28);
9405  ASSERT_EQUAL_FP64(fmsub, d29);
9406  ASSERT_EQUAL_FP64(fnmadd, d30);
9407  ASSERT_EQUAL_FP64(fnmsub, d31);
9408
9409  TEARDOWN();
9410}
9411
9412
9413TEST(fmadd_fmsub_double) {
9414  // It's hard to check the result of fused operations because the only way to
9415  // calculate the result is using fma, which is what the simulator uses anyway.
9416
9417  // Basic operation.
9418  FmaddFmsubHelper(1.0, 2.0, 3.0, 5.0, 1.0, -5.0, -1.0);
9419  FmaddFmsubHelper(-1.0, 2.0, 3.0, 1.0, 5.0, -1.0, -5.0);
9420
9421  // Check the sign of exact zeroes.
9422  //               n     m     a     fmadd  fmsub  fnmadd fnmsub
9423  FmaddFmsubHelper(-0.0, +0.0, -0.0, -0.0,  +0.0,  +0.0,  +0.0);
9424  FmaddFmsubHelper(+0.0, +0.0, -0.0, +0.0,  -0.0,  +0.0,  +0.0);
9425  FmaddFmsubHelper(+0.0, +0.0, +0.0, +0.0,  +0.0,  -0.0,  +0.0);
9426  FmaddFmsubHelper(-0.0, +0.0, +0.0, +0.0,  +0.0,  +0.0,  -0.0);
9427  FmaddFmsubHelper(+0.0, -0.0, -0.0, -0.0,  +0.0,  +0.0,  +0.0);
9428  FmaddFmsubHelper(-0.0, -0.0, -0.0, +0.0,  -0.0,  +0.0,  +0.0);
9429  FmaddFmsubHelper(-0.0, -0.0, +0.0, +0.0,  +0.0,  -0.0,  +0.0);
9430  FmaddFmsubHelper(+0.0, -0.0, +0.0, +0.0,  +0.0,  +0.0,  -0.0);
9431
9432  // Check NaN generation.
9433  FmaddFmsubHelper(kFP64PositiveInfinity, 0.0, 42.0,
9434                   kFP64DefaultNaN, kFP64DefaultNaN,
9435                   kFP64DefaultNaN, kFP64DefaultNaN);
9436  FmaddFmsubHelper(0.0, kFP64PositiveInfinity, 42.0,
9437                   kFP64DefaultNaN, kFP64DefaultNaN,
9438                   kFP64DefaultNaN, kFP64DefaultNaN);
9439  FmaddFmsubHelper(kFP64PositiveInfinity, 1.0, kFP64PositiveInfinity,
9440                   kFP64PositiveInfinity,   //  inf + ( inf * 1) = inf
9441                   kFP64DefaultNaN,         //  inf + (-inf * 1) = NaN
9442                   kFP64NegativeInfinity,   // -inf + (-inf * 1) = -inf
9443                   kFP64DefaultNaN);        // -inf + ( inf * 1) = NaN
9444  FmaddFmsubHelper(kFP64NegativeInfinity, 1.0, kFP64PositiveInfinity,
9445                   kFP64DefaultNaN,         //  inf + (-inf * 1) = NaN
9446                   kFP64PositiveInfinity,   //  inf + ( inf * 1) = inf
9447                   kFP64DefaultNaN,         // -inf + ( inf * 1) = NaN
9448                   kFP64NegativeInfinity);  // -inf + (-inf * 1) = -inf
9449}
9450
9451
9452static void FmaddFmsubHelper(float n, float m, float a,
9453                             float fmadd, float fmsub,
9454                             float fnmadd, float fnmsub) {
9455  SETUP();
9456  START();
9457
9458  __ Fmov(s0, n);
9459  __ Fmov(s1, m);
9460  __ Fmov(s2, a);
9461  __ Fmadd(s28, s0, s1, s2);
9462  __ Fmsub(s29, s0, s1, s2);
9463  __ Fnmadd(s30, s0, s1, s2);
9464  __ Fnmsub(s31, s0, s1, s2);
9465
9466  END();
9467  RUN();
9468
9469  ASSERT_EQUAL_FP32(fmadd, s28);
9470  ASSERT_EQUAL_FP32(fmsub, s29);
9471  ASSERT_EQUAL_FP32(fnmadd, s30);
9472  ASSERT_EQUAL_FP32(fnmsub, s31);
9473
9474  TEARDOWN();
9475}
9476
9477
9478TEST(fmadd_fmsub_float) {
9479  // It's hard to check the result of fused operations because the only way to
9480  // calculate the result is using fma, which is what the simulator uses anyway.
9481
9482  // Basic operation.
9483  FmaddFmsubHelper(1.0f, 2.0f, 3.0f, 5.0f, 1.0f, -5.0f, -1.0f);
9484  FmaddFmsubHelper(-1.0f, 2.0f, 3.0f, 1.0f, 5.0f, -1.0f, -5.0f);
9485
9486  // Check the sign of exact zeroes.
9487  //               n      m      a      fmadd  fmsub  fnmadd fnmsub
9488  FmaddFmsubHelper(-0.0f, +0.0f, -0.0f, -0.0f, +0.0f, +0.0f, +0.0f);
9489  FmaddFmsubHelper(+0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, +0.0f);
9490  FmaddFmsubHelper(+0.0f, +0.0f, +0.0f, +0.0f, +0.0f, -0.0f, +0.0f);
9491  FmaddFmsubHelper(-0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, -0.0f);
9492  FmaddFmsubHelper(+0.0f, -0.0f, -0.0f, -0.0f, +0.0f, +0.0f, +0.0f);
9493  FmaddFmsubHelper(-0.0f, -0.0f, -0.0f, +0.0f, -0.0f, +0.0f, +0.0f);
9494  FmaddFmsubHelper(-0.0f, -0.0f, +0.0f, +0.0f, +0.0f, -0.0f, +0.0f);
9495  FmaddFmsubHelper(+0.0f, -0.0f, +0.0f, +0.0f, +0.0f, +0.0f, -0.0f);
9496
9497  // Check NaN generation.
9498  FmaddFmsubHelper(kFP32PositiveInfinity, 0.0f, 42.0f,
9499                   kFP32DefaultNaN, kFP32DefaultNaN,
9500                   kFP32DefaultNaN, kFP32DefaultNaN);
9501  FmaddFmsubHelper(0.0f, kFP32PositiveInfinity, 42.0f,
9502                   kFP32DefaultNaN, kFP32DefaultNaN,
9503                   kFP32DefaultNaN, kFP32DefaultNaN);
9504  FmaddFmsubHelper(kFP32PositiveInfinity, 1.0f, kFP32PositiveInfinity,
9505                   kFP32PositiveInfinity,   //  inf + ( inf * 1) = inf
9506                   kFP32DefaultNaN,         //  inf + (-inf * 1) = NaN
9507                   kFP32NegativeInfinity,   // -inf + (-inf * 1) = -inf
9508                   kFP32DefaultNaN);        // -inf + ( inf * 1) = NaN
9509  FmaddFmsubHelper(kFP32NegativeInfinity, 1.0f, kFP32PositiveInfinity,
9510                   kFP32DefaultNaN,         //  inf + (-inf * 1) = NaN
9511                   kFP32PositiveInfinity,   //  inf + ( inf * 1) = inf
9512                   kFP32DefaultNaN,         // -inf + ( inf * 1) = NaN
9513                   kFP32NegativeInfinity);  // -inf + (-inf * 1) = -inf
9514}
9515
9516
9517TEST(fmadd_fmsub_double_nans) {
9518  // Make sure that NaN propagation works correctly.
9519  double s1 = rawbits_to_double(0x7ff5555511111111);
9520  double s2 = rawbits_to_double(0x7ff5555522222222);
9521  double sa = rawbits_to_double(0x7ff55555aaaaaaaa);
9522  double q1 = rawbits_to_double(0x7ffaaaaa11111111);
9523  double q2 = rawbits_to_double(0x7ffaaaaa22222222);
9524  double qa = rawbits_to_double(0x7ffaaaaaaaaaaaaa);
9525  VIXL_ASSERT(IsSignallingNaN(s1));
9526  VIXL_ASSERT(IsSignallingNaN(s2));
9527  VIXL_ASSERT(IsSignallingNaN(sa));
9528  VIXL_ASSERT(IsQuietNaN(q1));
9529  VIXL_ASSERT(IsQuietNaN(q2));
9530  VIXL_ASSERT(IsQuietNaN(qa));
9531
9532  // The input NaNs after passing through ProcessNaN.
9533  double s1_proc = rawbits_to_double(0x7ffd555511111111);
9534  double s2_proc = rawbits_to_double(0x7ffd555522222222);
9535  double sa_proc = rawbits_to_double(0x7ffd5555aaaaaaaa);
9536  double q1_proc = q1;
9537  double q2_proc = q2;
9538  double qa_proc = qa;
9539  VIXL_ASSERT(IsQuietNaN(s1_proc));
9540  VIXL_ASSERT(IsQuietNaN(s2_proc));
9541  VIXL_ASSERT(IsQuietNaN(sa_proc));
9542  VIXL_ASSERT(IsQuietNaN(q1_proc));
9543  VIXL_ASSERT(IsQuietNaN(q2_proc));
9544  VIXL_ASSERT(IsQuietNaN(qa_proc));
9545
9546  // Negated NaNs as it would be done on ARMv8 hardware.
9547  double s1_proc_neg = rawbits_to_double(0xfffd555511111111);
9548  double sa_proc_neg = rawbits_to_double(0xfffd5555aaaaaaaa);
9549  double q1_proc_neg = rawbits_to_double(0xfffaaaaa11111111);
9550  double qa_proc_neg = rawbits_to_double(0xfffaaaaaaaaaaaaa);
9551  VIXL_ASSERT(IsQuietNaN(s1_proc_neg));
9552  VIXL_ASSERT(IsQuietNaN(sa_proc_neg));
9553  VIXL_ASSERT(IsQuietNaN(q1_proc_neg));
9554  VIXL_ASSERT(IsQuietNaN(qa_proc_neg));
9555
9556  // Quiet NaNs are propagated.
9557  FmaddFmsubHelper(q1, 0, 0, q1_proc, q1_proc_neg, q1_proc_neg, q1_proc);
9558  FmaddFmsubHelper(0, q2, 0, q2_proc, q2_proc, q2_proc, q2_proc);
9559  FmaddFmsubHelper(0, 0, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg);
9560  FmaddFmsubHelper(q1, q2, 0, q1_proc, q1_proc_neg, q1_proc_neg, q1_proc);
9561  FmaddFmsubHelper(0, q2, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg);
9562  FmaddFmsubHelper(q1, 0, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg);
9563  FmaddFmsubHelper(q1, q2, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg);
9564
9565  // Signalling NaNs are propagated, and made quiet.
9566  FmaddFmsubHelper(s1, 0, 0, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc);
9567  FmaddFmsubHelper(0, s2, 0, s2_proc, s2_proc, s2_proc, s2_proc);
9568  FmaddFmsubHelper(0, 0, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
9569  FmaddFmsubHelper(s1, s2, 0, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc);
9570  FmaddFmsubHelper(0, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
9571  FmaddFmsubHelper(s1, 0, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
9572  FmaddFmsubHelper(s1, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
9573
9574  // Signalling NaNs take precedence over quiet NaNs.
9575  FmaddFmsubHelper(s1, q2, qa, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc);
9576  FmaddFmsubHelper(q1, s2, qa, s2_proc, s2_proc, s2_proc, s2_proc);
9577  FmaddFmsubHelper(q1, q2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
9578  FmaddFmsubHelper(s1, s2, qa, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc);
9579  FmaddFmsubHelper(q1, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
9580  FmaddFmsubHelper(s1, q2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
9581  FmaddFmsubHelper(s1, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
9582
9583  // A NaN generated by the intermediate op1 * op2 overrides a quiet NaN in a.
9584  FmaddFmsubHelper(0, kFP64PositiveInfinity, qa,
9585                   kFP64DefaultNaN, kFP64DefaultNaN,
9586                   kFP64DefaultNaN, kFP64DefaultNaN);
9587  FmaddFmsubHelper(kFP64PositiveInfinity, 0, qa,
9588                   kFP64DefaultNaN, kFP64DefaultNaN,
9589                   kFP64DefaultNaN, kFP64DefaultNaN);
9590  FmaddFmsubHelper(0, kFP64NegativeInfinity, qa,
9591                   kFP64DefaultNaN, kFP64DefaultNaN,
9592                   kFP64DefaultNaN, kFP64DefaultNaN);
9593  FmaddFmsubHelper(kFP64NegativeInfinity, 0, qa,
9594                   kFP64DefaultNaN, kFP64DefaultNaN,
9595                   kFP64DefaultNaN, kFP64DefaultNaN);
9596}
9597
9598
9599TEST(fmadd_fmsub_float_nans) {
9600  // Make sure that NaN propagation works correctly.
9601  float s1 = rawbits_to_float(0x7f951111);
9602  float s2 = rawbits_to_float(0x7f952222);
9603  float sa = rawbits_to_float(0x7f95aaaa);
9604  float q1 = rawbits_to_float(0x7fea1111);
9605  float q2 = rawbits_to_float(0x7fea2222);
9606  float qa = rawbits_to_float(0x7feaaaaa);
9607  VIXL_ASSERT(IsSignallingNaN(s1));
9608  VIXL_ASSERT(IsSignallingNaN(s2));
9609  VIXL_ASSERT(IsSignallingNaN(sa));
9610  VIXL_ASSERT(IsQuietNaN(q1));
9611  VIXL_ASSERT(IsQuietNaN(q2));
9612  VIXL_ASSERT(IsQuietNaN(qa));
9613
9614  // The input NaNs after passing through ProcessNaN.
9615  float s1_proc = rawbits_to_float(0x7fd51111);
9616  float s2_proc = rawbits_to_float(0x7fd52222);
9617  float sa_proc = rawbits_to_float(0x7fd5aaaa);
9618  float q1_proc = q1;
9619  float q2_proc = q2;
9620  float qa_proc = qa;
9621  VIXL_ASSERT(IsQuietNaN(s1_proc));
9622  VIXL_ASSERT(IsQuietNaN(s2_proc));
9623  VIXL_ASSERT(IsQuietNaN(sa_proc));
9624  VIXL_ASSERT(IsQuietNaN(q1_proc));
9625  VIXL_ASSERT(IsQuietNaN(q2_proc));
9626  VIXL_ASSERT(IsQuietNaN(qa_proc));
9627
9628  // Negated NaNs as it would be done on ARMv8 hardware.
9629  float s1_proc_neg = rawbits_to_float(0xffd51111);
9630  float sa_proc_neg = rawbits_to_float(0xffd5aaaa);
9631  float q1_proc_neg = rawbits_to_float(0xffea1111);
9632  float qa_proc_neg = rawbits_to_float(0xffeaaaaa);
9633  VIXL_ASSERT(IsQuietNaN(s1_proc_neg));
9634  VIXL_ASSERT(IsQuietNaN(sa_proc_neg));
9635  VIXL_ASSERT(IsQuietNaN(q1_proc_neg));
9636  VIXL_ASSERT(IsQuietNaN(qa_proc_neg));
9637
9638  // Quiet NaNs are propagated.
9639  FmaddFmsubHelper(q1, 0, 0, q1_proc, q1_proc_neg, q1_proc_neg, q1_proc);
9640  FmaddFmsubHelper(0, q2, 0, q2_proc, q2_proc, q2_proc, q2_proc);
9641  FmaddFmsubHelper(0, 0, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg);
9642  FmaddFmsubHelper(q1, q2, 0, q1_proc, q1_proc_neg, q1_proc_neg, q1_proc);
9643  FmaddFmsubHelper(0, q2, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg);
9644  FmaddFmsubHelper(q1, 0, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg);
9645  FmaddFmsubHelper(q1, q2, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg);
9646
9647  // Signalling NaNs are propagated, and made quiet.
9648  FmaddFmsubHelper(s1, 0, 0, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc);
9649  FmaddFmsubHelper(0, s2, 0, s2_proc, s2_proc, s2_proc, s2_proc);
9650  FmaddFmsubHelper(0, 0, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
9651  FmaddFmsubHelper(s1, s2, 0, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc);
9652  FmaddFmsubHelper(0, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
9653  FmaddFmsubHelper(s1, 0, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
9654  FmaddFmsubHelper(s1, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
9655
9656  // Signalling NaNs take precedence over quiet NaNs.
9657  FmaddFmsubHelper(s1, q2, qa, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc);
9658  FmaddFmsubHelper(q1, s2, qa, s2_proc, s2_proc, s2_proc, s2_proc);
9659  FmaddFmsubHelper(q1, q2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
9660  FmaddFmsubHelper(s1, s2, qa, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc);
9661  FmaddFmsubHelper(q1, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
9662  FmaddFmsubHelper(s1, q2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
9663  FmaddFmsubHelper(s1, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
9664
9665  // A NaN generated by the intermediate op1 * op2 overrides a quiet NaN in a.
9666  FmaddFmsubHelper(0, kFP32PositiveInfinity, qa,
9667                   kFP32DefaultNaN, kFP32DefaultNaN,
9668                   kFP32DefaultNaN, kFP32DefaultNaN);
9669  FmaddFmsubHelper(kFP32PositiveInfinity, 0, qa,
9670                   kFP32DefaultNaN, kFP32DefaultNaN,
9671                   kFP32DefaultNaN, kFP32DefaultNaN);
9672  FmaddFmsubHelper(0, kFP32NegativeInfinity, qa,
9673                   kFP32DefaultNaN, kFP32DefaultNaN,
9674                   kFP32DefaultNaN, kFP32DefaultNaN);
9675  FmaddFmsubHelper(kFP32NegativeInfinity, 0, qa,
9676                   kFP32DefaultNaN, kFP32DefaultNaN,
9677                   kFP32DefaultNaN, kFP32DefaultNaN);
9678}
9679
9680
9681TEST(fdiv) {
9682  SETUP();
9683
9684  START();
9685  __ Fmov(s14, -0.0f);
9686  __ Fmov(s15, kFP32PositiveInfinity);
9687  __ Fmov(s16, kFP32NegativeInfinity);
9688  __ Fmov(s17, 3.25f);
9689  __ Fmov(s18, 2.0f);
9690  __ Fmov(s19, 2.0f);
9691  __ Fmov(s20, -2.0f);
9692
9693  __ Fmov(d26, -0.0);
9694  __ Fmov(d27, kFP64PositiveInfinity);
9695  __ Fmov(d28, kFP64NegativeInfinity);
9696  __ Fmov(d29, 0.0);
9697  __ Fmov(d30, -2.0);
9698  __ Fmov(d31, 2.25);
9699
9700  __ Fdiv(s0, s17, s18);
9701  __ Fdiv(s1, s18, s19);
9702  __ Fdiv(s2, s14, s18);
9703  __ Fdiv(s3, s18, s15);
9704  __ Fdiv(s4, s18, s16);
9705  __ Fdiv(s5, s15, s16);
9706  __ Fdiv(s6, s14, s14);
9707
9708  __ Fdiv(d7, d31, d30);
9709  __ Fdiv(d8, d29, d31);
9710  __ Fdiv(d9, d26, d31);
9711  __ Fdiv(d10, d31, d27);
9712  __ Fdiv(d11, d31, d28);
9713  __ Fdiv(d12, d28, d27);
9714  __ Fdiv(d13, d29, d29);
9715  END();
9716
9717  RUN();
9718
9719  ASSERT_EQUAL_FP32(1.625f, s0);
9720  ASSERT_EQUAL_FP32(1.0f, s1);
9721  ASSERT_EQUAL_FP32(-0.0f, s2);
9722  ASSERT_EQUAL_FP32(0.0f, s3);
9723  ASSERT_EQUAL_FP32(-0.0f, s4);
9724  ASSERT_EQUAL_FP32(kFP32DefaultNaN, s5);
9725  ASSERT_EQUAL_FP32(kFP32DefaultNaN, s6);
9726  ASSERT_EQUAL_FP64(-1.125, d7);
9727  ASSERT_EQUAL_FP64(0.0, d8);
9728  ASSERT_EQUAL_FP64(-0.0, d9);
9729  ASSERT_EQUAL_FP64(0.0, d10);
9730  ASSERT_EQUAL_FP64(-0.0, d11);
9731  ASSERT_EQUAL_FP64(kFP64DefaultNaN, d12);
9732  ASSERT_EQUAL_FP64(kFP64DefaultNaN, d13);
9733
9734  TEARDOWN();
9735}
9736
9737
9738static float MinMaxHelper(float n,
9739                          float m,
9740                          bool min,
9741                          float quiet_nan_substitute = 0.0) {
9742  const uint64_t kFP32QuietNaNMask = 0x00400000;
9743  uint32_t raw_n = float_to_rawbits(n);
9744  uint32_t raw_m = float_to_rawbits(m);
9745
9746  if (std::isnan(n) && ((raw_n & kFP32QuietNaNMask) == 0)) {
9747    // n is signalling NaN.
9748    return rawbits_to_float(raw_n | kFP32QuietNaNMask);
9749  } else if (std::isnan(m) && ((raw_m & kFP32QuietNaNMask) == 0)) {
9750    // m is signalling NaN.
9751    return rawbits_to_float(raw_m | kFP32QuietNaNMask);
9752  } else if (quiet_nan_substitute == 0.0) {
9753    if (std::isnan(n)) {
9754      // n is quiet NaN.
9755      return n;
9756    } else if (std::isnan(m)) {
9757      // m is quiet NaN.
9758      return m;
9759    }
9760  } else {
9761    // Substitute n or m if one is quiet, but not both.
9762    if (std::isnan(n) && !std::isnan(m)) {
9763      // n is quiet NaN: replace with substitute.
9764      n = quiet_nan_substitute;
9765    } else if (!std::isnan(n) && std::isnan(m)) {
9766      // m is quiet NaN: replace with substitute.
9767      m = quiet_nan_substitute;
9768    }
9769  }
9770
9771  if ((n == 0.0) && (m == 0.0) &&
9772      (copysign(1.0, n) != copysign(1.0, m))) {
9773    return min ? -0.0 : 0.0;
9774  }
9775
9776  return min ? fminf(n, m) : fmaxf(n, m);
9777}
9778
9779
9780static double MinMaxHelper(double n,
9781                           double m,
9782                           bool min,
9783                           double quiet_nan_substitute = 0.0) {
9784  const uint64_t kFP64QuietNaNMask = 0x0008000000000000;
9785  uint64_t raw_n = double_to_rawbits(n);
9786  uint64_t raw_m = double_to_rawbits(m);
9787
9788  if (std::isnan(n) && ((raw_n & kFP64QuietNaNMask) == 0)) {
9789    // n is signalling NaN.
9790    return rawbits_to_double(raw_n | kFP64QuietNaNMask);
9791  } else if (std::isnan(m) && ((raw_m & kFP64QuietNaNMask) == 0)) {
9792    // m is signalling NaN.
9793    return rawbits_to_double(raw_m | kFP64QuietNaNMask);
9794  } else if (quiet_nan_substitute == 0.0) {
9795    if (std::isnan(n)) {
9796      // n is quiet NaN.
9797      return n;
9798    } else if (std::isnan(m)) {
9799      // m is quiet NaN.
9800      return m;
9801    }
9802  } else {
9803    // Substitute n or m if one is quiet, but not both.
9804    if (std::isnan(n) && !std::isnan(m)) {
9805      // n is quiet NaN: replace with substitute.
9806      n = quiet_nan_substitute;
9807    } else if (!std::isnan(n) && std::isnan(m)) {
9808      // m is quiet NaN: replace with substitute.
9809      m = quiet_nan_substitute;
9810    }
9811  }
9812
9813  if ((n == 0.0) && (m == 0.0) &&
9814      (copysign(1.0, n) != copysign(1.0, m))) {
9815    return min ? -0.0 : 0.0;
9816  }
9817
9818  return min ? fmin(n, m) : fmax(n, m);
9819}
9820
9821
9822static void FminFmaxDoubleHelper(double n, double m, double min, double max,
9823                                 double minnm, double maxnm) {
9824  SETUP();
9825
9826  START();
9827  __ Fmov(d0, n);
9828  __ Fmov(d1, m);
9829  __ Fmin(d28, d0, d1);
9830  __ Fmax(d29, d0, d1);
9831  __ Fminnm(d30, d0, d1);
9832  __ Fmaxnm(d31, d0, d1);
9833  END();
9834
9835  RUN();
9836
9837  ASSERT_EQUAL_FP64(min, d28);
9838  ASSERT_EQUAL_FP64(max, d29);
9839  ASSERT_EQUAL_FP64(minnm, d30);
9840  ASSERT_EQUAL_FP64(maxnm, d31);
9841
9842  TEARDOWN();
9843}
9844
9845
9846TEST(fmax_fmin_d) {
9847  // Use non-standard NaNs to check that the payload bits are preserved.
9848  double snan = rawbits_to_double(0x7ff5555512345678);
9849  double qnan = rawbits_to_double(0x7ffaaaaa87654321);
9850
9851  double snan_processed = rawbits_to_double(0x7ffd555512345678);
9852  double qnan_processed = qnan;
9853
9854  VIXL_ASSERT(IsSignallingNaN(snan));
9855  VIXL_ASSERT(IsQuietNaN(qnan));
9856  VIXL_ASSERT(IsQuietNaN(snan_processed));
9857  VIXL_ASSERT(IsQuietNaN(qnan_processed));
9858
9859  // Bootstrap tests.
9860  FminFmaxDoubleHelper(0, 0, 0, 0, 0, 0);
9861  FminFmaxDoubleHelper(0, 1, 0, 1, 0, 1);
9862  FminFmaxDoubleHelper(kFP64PositiveInfinity, kFP64NegativeInfinity,
9863                       kFP64NegativeInfinity, kFP64PositiveInfinity,
9864                       kFP64NegativeInfinity, kFP64PositiveInfinity);
9865  FminFmaxDoubleHelper(snan, 0,
9866                       snan_processed, snan_processed,
9867                       snan_processed, snan_processed);
9868  FminFmaxDoubleHelper(0, snan,
9869                       snan_processed, snan_processed,
9870                       snan_processed, snan_processed);
9871  FminFmaxDoubleHelper(qnan, 0,
9872                       qnan_processed, qnan_processed,
9873                       0, 0);
9874  FminFmaxDoubleHelper(0, qnan,
9875                       qnan_processed, qnan_processed,
9876                       0, 0);
9877  FminFmaxDoubleHelper(qnan, snan,
9878                       snan_processed, snan_processed,
9879                       snan_processed, snan_processed);
9880  FminFmaxDoubleHelper(snan, qnan,
9881                       snan_processed, snan_processed,
9882                       snan_processed, snan_processed);
9883
9884  // Iterate over all combinations of inputs.
9885  double inputs[] = { DBL_MAX, DBL_MIN, 1.0, 0.0,
9886                      -DBL_MAX, -DBL_MIN, -1.0, -0.0,
9887                      kFP64PositiveInfinity, kFP64NegativeInfinity,
9888                      kFP64QuietNaN, kFP64SignallingNaN };
9889
9890  const int count = sizeof(inputs) / sizeof(inputs[0]);
9891
9892  for (int in = 0; in < count; in++) {
9893    double n = inputs[in];
9894    for (int im = 0; im < count; im++) {
9895      double m = inputs[im];
9896      FminFmaxDoubleHelper(n, m,
9897                           MinMaxHelper(n, m, true),
9898                           MinMaxHelper(n, m, false),
9899                           MinMaxHelper(n, m, true, kFP64PositiveInfinity),
9900                           MinMaxHelper(n, m, false, kFP64NegativeInfinity));
9901    }
9902  }
9903}
9904
9905
9906static void FminFmaxFloatHelper(float n, float m, float min, float max,
9907                                float minnm, float maxnm) {
9908  SETUP();
9909
9910  START();
9911  __ Fmov(s0, n);
9912  __ Fmov(s1, m);
9913  __ Fmin(s28, s0, s1);
9914  __ Fmax(s29, s0, s1);
9915  __ Fminnm(s30, s0, s1);
9916  __ Fmaxnm(s31, s0, s1);
9917  END();
9918
9919  RUN();
9920
9921  ASSERT_EQUAL_FP32(min, s28);
9922  ASSERT_EQUAL_FP32(max, s29);
9923  ASSERT_EQUAL_FP32(minnm, s30);
9924  ASSERT_EQUAL_FP32(maxnm, s31);
9925
9926  TEARDOWN();
9927}
9928
9929
9930TEST(fmax_fmin_s) {
9931  // Use non-standard NaNs to check that the payload bits are preserved.
9932  float snan = rawbits_to_float(0x7f951234);
9933  float qnan = rawbits_to_float(0x7fea8765);
9934
9935  float snan_processed = rawbits_to_float(0x7fd51234);
9936  float qnan_processed = qnan;
9937
9938  VIXL_ASSERT(IsSignallingNaN(snan));
9939  VIXL_ASSERT(IsQuietNaN(qnan));
9940  VIXL_ASSERT(IsQuietNaN(snan_processed));
9941  VIXL_ASSERT(IsQuietNaN(qnan_processed));
9942
9943  // Bootstrap tests.
9944  FminFmaxFloatHelper(0, 0, 0, 0, 0, 0);
9945  FminFmaxFloatHelper(0, 1, 0, 1, 0, 1);
9946  FminFmaxFloatHelper(kFP32PositiveInfinity, kFP32NegativeInfinity,
9947                      kFP32NegativeInfinity, kFP32PositiveInfinity,
9948                      kFP32NegativeInfinity, kFP32PositiveInfinity);
9949  FminFmaxFloatHelper(snan, 0,
9950                      snan_processed, snan_processed,
9951                      snan_processed, snan_processed);
9952  FminFmaxFloatHelper(0, snan,
9953                      snan_processed, snan_processed,
9954                      snan_processed, snan_processed);
9955  FminFmaxFloatHelper(qnan, 0,
9956                      qnan_processed, qnan_processed,
9957                      0, 0);
9958  FminFmaxFloatHelper(0, qnan,
9959                      qnan_processed, qnan_processed,
9960                      0, 0);
9961  FminFmaxFloatHelper(qnan, snan,
9962                      snan_processed, snan_processed,
9963                      snan_processed, snan_processed);
9964  FminFmaxFloatHelper(snan, qnan,
9965                      snan_processed, snan_processed,
9966                      snan_processed, snan_processed);
9967
9968  // Iterate over all combinations of inputs.
9969  float inputs[] = { FLT_MAX, FLT_MIN, 1.0, 0.0,
9970                     -FLT_MAX, -FLT_MIN, -1.0, -0.0,
9971                     kFP32PositiveInfinity, kFP32NegativeInfinity,
9972                     kFP32QuietNaN, kFP32SignallingNaN };
9973
9974  const int count = sizeof(inputs) / sizeof(inputs[0]);
9975
9976  for (int in = 0; in < count; in++) {
9977    float n = inputs[in];
9978    for (int im = 0; im < count; im++) {
9979      float m = inputs[im];
9980      FminFmaxFloatHelper(n, m,
9981                          MinMaxHelper(n, m, true),
9982                          MinMaxHelper(n, m, false),
9983                          MinMaxHelper(n, m, true, kFP32PositiveInfinity),
9984                          MinMaxHelper(n, m, false, kFP32NegativeInfinity));
9985    }
9986  }
9987}
9988
9989
9990TEST(fccmp) {
9991  SETUP();
9992  ALLOW_ASM();
9993
9994  START();
9995  __ Fmov(s16, 0.0);
9996  __ Fmov(s17, 0.5);
9997  __ Fmov(d18, -0.5);
9998  __ Fmov(d19, -1.0);
9999  __ Mov(x20, 0);
10000  __ Mov(x21, 0x7ff0000000000001);  // Double precision NaN.
10001  __ Fmov(d21, x21);
10002  __ Mov(w22, 0x7f800001);  // Single precision NaN.
10003  __ Fmov(s22, w22);
10004
10005  __ Cmp(x20, 0);
10006  __ Fccmp(s16, s16, NoFlag, eq);
10007  __ Mrs(x0, NZCV);
10008
10009  __ Cmp(x20, 0);
10010  __ Fccmp(s16, s16, VFlag, ne);
10011  __ Mrs(x1, NZCV);
10012
10013  __ Cmp(x20, 0);
10014  __ Fccmp(s16, s17, CFlag, ge);
10015  __ Mrs(x2, NZCV);
10016
10017  __ Cmp(x20, 0);
10018  __ Fccmp(s16, s17, CVFlag, lt);
10019  __ Mrs(x3, NZCV);
10020
10021  __ Cmp(x20, 0);
10022  __ Fccmp(d18, d18, ZFlag, le);
10023  __ Mrs(x4, NZCV);
10024
10025  __ Cmp(x20, 0);
10026  __ Fccmp(d18, d18, ZVFlag, gt);
10027  __ Mrs(x5, NZCV);
10028
10029  __ Cmp(x20, 0);
10030  __ Fccmp(d18, d19, ZCVFlag, ls);
10031  __ Mrs(x6, NZCV);
10032
10033  __ Cmp(x20, 0);
10034  __ Fccmp(d18, d19, NFlag, hi);
10035  __ Mrs(x7, NZCV);
10036
10037  // The Macro Assembler does not allow al or nv as condition.
10038  __ fccmp(s16, s16, NFlag, al);
10039  __ Mrs(x8, NZCV);
10040
10041  __ fccmp(d18, d18, NFlag, nv);
10042  __ Mrs(x9, NZCV);
10043
10044  __ Cmp(x20, 0);
10045  __ Fccmpe(s16, s16, NoFlag, eq);
10046  __ Mrs(x10, NZCV);
10047
10048  __ Cmp(x20, 0);
10049  __ Fccmpe(d18, d19, ZCVFlag, ls);
10050  __ Mrs(x11, NZCV);
10051
10052  __ Cmp(x20, 0);
10053  __ Fccmpe(d21, d21, NoFlag, eq);
10054  __ Mrs(x12, NZCV);
10055
10056  __ Cmp(x20, 0);
10057  __ Fccmpe(s22, s22, NoFlag, eq);
10058  __ Mrs(x13, NZCV);
10059  END();
10060
10061  RUN();
10062
10063  ASSERT_EQUAL_32(ZCFlag, w0);
10064  ASSERT_EQUAL_32(VFlag, w1);
10065  ASSERT_EQUAL_32(NFlag, w2);
10066  ASSERT_EQUAL_32(CVFlag, w3);
10067  ASSERT_EQUAL_32(ZCFlag, w4);
10068  ASSERT_EQUAL_32(ZVFlag, w5);
10069  ASSERT_EQUAL_32(CFlag, w6);
10070  ASSERT_EQUAL_32(NFlag, w7);
10071  ASSERT_EQUAL_32(ZCFlag, w8);
10072  ASSERT_EQUAL_32(ZCFlag, w9);
10073  ASSERT_EQUAL_32(ZCFlag, w10);
10074  ASSERT_EQUAL_32(CFlag, w11);
10075  ASSERT_EQUAL_32(CVFlag, w12);
10076  ASSERT_EQUAL_32(CVFlag, w13);
10077
10078  TEARDOWN();
10079}
10080
10081
10082TEST(fcmp) {
10083  SETUP();
10084
10085  START();
10086
10087  // Some of these tests require a floating-point scratch register assigned to
10088  // the macro assembler, but most do not.
10089  {
10090    UseScratchRegisterScope temps(&masm);
10091    temps.ExcludeAll();
10092    temps.Include(ip0, ip1);
10093
10094    __ Fmov(s8, 0.0);
10095    __ Fmov(s9, 0.5);
10096    __ Mov(w18, 0x7f800001);  // Single precision NaN.
10097    __ Fmov(s18, w18);
10098
10099    __ Fcmp(s8, s8);
10100    __ Mrs(x0, NZCV);
10101    __ Fcmp(s8, s9);
10102    __ Mrs(x1, NZCV);
10103    __ Fcmp(s9, s8);
10104    __ Mrs(x2, NZCV);
10105    __ Fcmp(s8, s18);
10106    __ Mrs(x3, NZCV);
10107    __ Fcmp(s18, s18);
10108    __ Mrs(x4, NZCV);
10109    __ Fcmp(s8, 0.0);
10110    __ Mrs(x5, NZCV);
10111    temps.Include(d0);
10112    __ Fcmp(s8, 255.0);
10113    temps.Exclude(d0);
10114    __ Mrs(x6, NZCV);
10115
10116    __ Fmov(d19, 0.0);
10117    __ Fmov(d20, 0.5);
10118    __ Mov(x21, 0x7ff0000000000001);  // Double precision NaN.
10119    __ Fmov(d21, x21);
10120
10121    __ Fcmp(d19, d19);
10122    __ Mrs(x10, NZCV);
10123    __ Fcmp(d19, d20);
10124    __ Mrs(x11, NZCV);
10125    __ Fcmp(d20, d19);
10126    __ Mrs(x12, NZCV);
10127    __ Fcmp(d19, d21);
10128    __ Mrs(x13, NZCV);
10129    __ Fcmp(d21, d21);
10130    __ Mrs(x14, NZCV);
10131    __ Fcmp(d19, 0.0);
10132    __ Mrs(x15, NZCV);
10133    temps.Include(d0);
10134    __ Fcmp(d19, 12.3456);
10135    temps.Exclude(d0);
10136    __ Mrs(x16, NZCV);
10137
10138    __ Fcmpe(s8, s8);
10139    __ Mrs(x22, NZCV);
10140    __ Fcmpe(s8, 0.0);
10141    __ Mrs(x23, NZCV);
10142    __ Fcmpe(d19, d19);
10143    __ Mrs(x24, NZCV);
10144    __ Fcmpe(d19, 0.0);
10145    __ Mrs(x25, NZCV);
10146    __ Fcmpe(s18, s18);
10147    __ Mrs(x26, NZCV);
10148    __ Fcmpe(d21, d21);
10149    __ Mrs(x27, NZCV);
10150  }
10151
10152  END();
10153
10154  RUN();
10155
10156  ASSERT_EQUAL_32(ZCFlag, w0);
10157  ASSERT_EQUAL_32(NFlag, w1);
10158  ASSERT_EQUAL_32(CFlag, w2);
10159  ASSERT_EQUAL_32(CVFlag, w3);
10160  ASSERT_EQUAL_32(CVFlag, w4);
10161  ASSERT_EQUAL_32(ZCFlag, w5);
10162  ASSERT_EQUAL_32(NFlag, w6);
10163  ASSERT_EQUAL_32(ZCFlag, w10);
10164  ASSERT_EQUAL_32(NFlag, w11);
10165  ASSERT_EQUAL_32(CFlag, w12);
10166  ASSERT_EQUAL_32(CVFlag, w13);
10167  ASSERT_EQUAL_32(CVFlag, w14);
10168  ASSERT_EQUAL_32(ZCFlag, w15);
10169  ASSERT_EQUAL_32(NFlag, w16);
10170  ASSERT_EQUAL_32(ZCFlag, w22);
10171  ASSERT_EQUAL_32(ZCFlag, w23);
10172  ASSERT_EQUAL_32(ZCFlag, w24);
10173  ASSERT_EQUAL_32(ZCFlag, w25);
10174  ASSERT_EQUAL_32(CVFlag, w26);
10175  ASSERT_EQUAL_32(CVFlag, w27);
10176
10177  TEARDOWN();
10178}
10179
10180
10181TEST(fcsel) {
10182  SETUP();
10183  ALLOW_ASM();
10184
10185  START();
10186  __ Mov(x16, 0);
10187  __ Fmov(s16, 1.0);
10188  __ Fmov(s17, 2.0);
10189  __ Fmov(d18, 3.0);
10190  __ Fmov(d19, 4.0);
10191
10192  __ Cmp(x16, 0);
10193  __ Fcsel(s0, s16, s17, eq);
10194  __ Fcsel(s1, s16, s17, ne);
10195  __ Fcsel(d2, d18, d19, eq);
10196  __ Fcsel(d3, d18, d19, ne);
10197  // The Macro Assembler does not allow al or nv as condition.
10198  __ fcsel(s4, s16, s17, al);
10199  __ fcsel(d5, d18, d19, nv);
10200  END();
10201
10202  RUN();
10203
10204  ASSERT_EQUAL_FP32(1.0, s0);
10205  ASSERT_EQUAL_FP32(2.0, s1);
10206  ASSERT_EQUAL_FP64(3.0, d2);
10207  ASSERT_EQUAL_FP64(4.0, d3);
10208  ASSERT_EQUAL_FP32(1.0, s4);
10209  ASSERT_EQUAL_FP64(3.0, d5);
10210
10211  TEARDOWN();
10212}
10213
10214
10215TEST(fneg) {
10216  SETUP();
10217
10218  START();
10219  __ Fmov(s16, 1.0);
10220  __ Fmov(s17, 0.0);
10221  __ Fmov(s18, kFP32PositiveInfinity);
10222  __ Fmov(d19, 1.0);
10223  __ Fmov(d20, 0.0);
10224  __ Fmov(d21, kFP64PositiveInfinity);
10225
10226  __ Fneg(s0, s16);
10227  __ Fneg(s1, s0);
10228  __ Fneg(s2, s17);
10229  __ Fneg(s3, s2);
10230  __ Fneg(s4, s18);
10231  __ Fneg(s5, s4);
10232  __ Fneg(d6, d19);
10233  __ Fneg(d7, d6);
10234  __ Fneg(d8, d20);
10235  __ Fneg(d9, d8);
10236  __ Fneg(d10, d21);
10237  __ Fneg(d11, d10);
10238  END();
10239
10240  RUN();
10241
10242  ASSERT_EQUAL_FP32(-1.0, s0);
10243  ASSERT_EQUAL_FP32(1.0, s1);
10244  ASSERT_EQUAL_FP32(-0.0, s2);
10245  ASSERT_EQUAL_FP32(0.0, s3);
10246  ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s4);
10247  ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s5);
10248  ASSERT_EQUAL_FP64(-1.0, d6);
10249  ASSERT_EQUAL_FP64(1.0, d7);
10250  ASSERT_EQUAL_FP64(-0.0, d8);
10251  ASSERT_EQUAL_FP64(0.0, d9);
10252  ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d10);
10253  ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d11);
10254
10255  TEARDOWN();
10256}
10257
10258
10259TEST(fabs) {
10260  SETUP();
10261
10262  START();
10263  __ Fmov(s16, -1.0);
10264  __ Fmov(s17, -0.0);
10265  __ Fmov(s18, kFP32NegativeInfinity);
10266  __ Fmov(d19, -1.0);
10267  __ Fmov(d20, -0.0);
10268  __ Fmov(d21, kFP64NegativeInfinity);
10269
10270  __ Fabs(s0, s16);
10271  __ Fabs(s1, s0);
10272  __ Fabs(s2, s17);
10273  __ Fabs(s3, s18);
10274  __ Fabs(d4, d19);
10275  __ Fabs(d5, d4);
10276  __ Fabs(d6, d20);
10277  __ Fabs(d7, d21);
10278  END();
10279
10280  RUN();
10281
10282  ASSERT_EQUAL_FP32(1.0, s0);
10283  ASSERT_EQUAL_FP32(1.0, s1);
10284  ASSERT_EQUAL_FP32(0.0, s2);
10285  ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s3);
10286  ASSERT_EQUAL_FP64(1.0, d4);
10287  ASSERT_EQUAL_FP64(1.0, d5);
10288  ASSERT_EQUAL_FP64(0.0, d6);
10289  ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d7);
10290
10291  TEARDOWN();
10292}
10293
10294
10295TEST(fsqrt) {
10296  SETUP();
10297
10298  START();
10299  __ Fmov(s16, 0.0);
10300  __ Fmov(s17, 1.0);
10301  __ Fmov(s18, 0.25);
10302  __ Fmov(s19, 65536.0);
10303  __ Fmov(s20, -0.0);
10304  __ Fmov(s21, kFP32PositiveInfinity);
10305  __ Fmov(s22, -1.0);
10306  __ Fmov(d23, 0.0);
10307  __ Fmov(d24, 1.0);
10308  __ Fmov(d25, 0.25);
10309  __ Fmov(d26, 4294967296.0);
10310  __ Fmov(d27, -0.0);
10311  __ Fmov(d28, kFP64PositiveInfinity);
10312  __ Fmov(d29, -1.0);
10313
10314  __ Fsqrt(s0, s16);
10315  __ Fsqrt(s1, s17);
10316  __ Fsqrt(s2, s18);
10317  __ Fsqrt(s3, s19);
10318  __ Fsqrt(s4, s20);
10319  __ Fsqrt(s5, s21);
10320  __ Fsqrt(s6, s22);
10321  __ Fsqrt(d7, d23);
10322  __ Fsqrt(d8, d24);
10323  __ Fsqrt(d9, d25);
10324  __ Fsqrt(d10, d26);
10325  __ Fsqrt(d11, d27);
10326  __ Fsqrt(d12, d28);
10327  __ Fsqrt(d13, d29);
10328  END();
10329
10330  RUN();
10331
10332  ASSERT_EQUAL_FP32(0.0, s0);
10333  ASSERT_EQUAL_FP32(1.0, s1);
10334  ASSERT_EQUAL_FP32(0.5, s2);
10335  ASSERT_EQUAL_FP32(256.0, s3);
10336  ASSERT_EQUAL_FP32(-0.0, s4);
10337  ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s5);
10338  ASSERT_EQUAL_FP32(kFP32DefaultNaN, s6);
10339  ASSERT_EQUAL_FP64(0.0, d7);
10340  ASSERT_EQUAL_FP64(1.0, d8);
10341  ASSERT_EQUAL_FP64(0.5, d9);
10342  ASSERT_EQUAL_FP64(65536.0, d10);
10343  ASSERT_EQUAL_FP64(-0.0, d11);
10344  ASSERT_EQUAL_FP64(kFP32PositiveInfinity, d12);
10345  ASSERT_EQUAL_FP64(kFP64DefaultNaN, d13);
10346
10347  TEARDOWN();
10348}
10349
10350
10351TEST(frinta) {
10352  SETUP();
10353
10354  START();
10355  __ Fmov(s16, 1.0);
10356  __ Fmov(s17, 1.1);
10357  __ Fmov(s18, 1.5);
10358  __ Fmov(s19, 1.9);
10359  __ Fmov(s20, 2.5);
10360  __ Fmov(s21, -1.5);
10361  __ Fmov(s22, -2.5);
10362  __ Fmov(s23, kFP32PositiveInfinity);
10363  __ Fmov(s24, kFP32NegativeInfinity);
10364  __ Fmov(s25, 0.0);
10365  __ Fmov(s26, -0.0);
10366  __ Fmov(s27, -0.2);
10367
10368  __ Frinta(s0, s16);
10369  __ Frinta(s1, s17);
10370  __ Frinta(s2, s18);
10371  __ Frinta(s3, s19);
10372  __ Frinta(s4, s20);
10373  __ Frinta(s5, s21);
10374  __ Frinta(s6, s22);
10375  __ Frinta(s7, s23);
10376  __ Frinta(s8, s24);
10377  __ Frinta(s9, s25);
10378  __ Frinta(s10, s26);
10379  __ Frinta(s11, s27);
10380
10381  __ Fmov(d16, 1.0);
10382  __ Fmov(d17, 1.1);
10383  __ Fmov(d18, 1.5);
10384  __ Fmov(d19, 1.9);
10385  __ Fmov(d20, 2.5);
10386  __ Fmov(d21, -1.5);
10387  __ Fmov(d22, -2.5);
10388  __ Fmov(d23, kFP32PositiveInfinity);
10389  __ Fmov(d24, kFP32NegativeInfinity);
10390  __ Fmov(d25, 0.0);
10391  __ Fmov(d26, -0.0);
10392  __ Fmov(d27, -0.2);
10393
10394  __ Frinta(d12, d16);
10395  __ Frinta(d13, d17);
10396  __ Frinta(d14, d18);
10397  __ Frinta(d15, d19);
10398  __ Frinta(d16, d20);
10399  __ Frinta(d17, d21);
10400  __ Frinta(d18, d22);
10401  __ Frinta(d19, d23);
10402  __ Frinta(d20, d24);
10403  __ Frinta(d21, d25);
10404  __ Frinta(d22, d26);
10405  __ Frinta(d23, d27);
10406  END();
10407
10408  RUN();
10409
10410  ASSERT_EQUAL_FP32(1.0, s0);
10411  ASSERT_EQUAL_FP32(1.0, s1);
10412  ASSERT_EQUAL_FP32(2.0, s2);
10413  ASSERT_EQUAL_FP32(2.0, s3);
10414  ASSERT_EQUAL_FP32(3.0, s4);
10415  ASSERT_EQUAL_FP32(-2.0, s5);
10416  ASSERT_EQUAL_FP32(-3.0, s6);
10417  ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s7);
10418  ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s8);
10419  ASSERT_EQUAL_FP32(0.0, s9);
10420  ASSERT_EQUAL_FP32(-0.0, s10);
10421  ASSERT_EQUAL_FP32(-0.0, s11);
10422  ASSERT_EQUAL_FP64(1.0, d12);
10423  ASSERT_EQUAL_FP64(1.0, d13);
10424  ASSERT_EQUAL_FP64(2.0, d14);
10425  ASSERT_EQUAL_FP64(2.0, d15);
10426  ASSERT_EQUAL_FP64(3.0, d16);
10427  ASSERT_EQUAL_FP64(-2.0, d17);
10428  ASSERT_EQUAL_FP64(-3.0, d18);
10429  ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d19);
10430  ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d20);
10431  ASSERT_EQUAL_FP64(0.0, d21);
10432  ASSERT_EQUAL_FP64(-0.0, d22);
10433  ASSERT_EQUAL_FP64(-0.0, d23);
10434
10435  TEARDOWN();
10436}
10437
10438
10439TEST(frinti) {
10440  // VIXL only supports the round-to-nearest FPCR mode, so this test has the
10441  // same results as frintn.
10442  SETUP();
10443
10444  START();
10445  __ Fmov(s16, 1.0);
10446  __ Fmov(s17, 1.1);
10447  __ Fmov(s18, 1.5);
10448  __ Fmov(s19, 1.9);
10449  __ Fmov(s20, 2.5);
10450  __ Fmov(s21, -1.5);
10451  __ Fmov(s22, -2.5);
10452  __ Fmov(s23, kFP32PositiveInfinity);
10453  __ Fmov(s24, kFP32NegativeInfinity);
10454  __ Fmov(s25, 0.0);
10455  __ Fmov(s26, -0.0);
10456  __ Fmov(s27, -0.2);
10457
10458  __ Frinti(s0, s16);
10459  __ Frinti(s1, s17);
10460  __ Frinti(s2, s18);
10461  __ Frinti(s3, s19);
10462  __ Frinti(s4, s20);
10463  __ Frinti(s5, s21);
10464  __ Frinti(s6, s22);
10465  __ Frinti(s7, s23);
10466  __ Frinti(s8, s24);
10467  __ Frinti(s9, s25);
10468  __ Frinti(s10, s26);
10469  __ Frinti(s11, s27);
10470
10471  __ Fmov(d16, 1.0);
10472  __ Fmov(d17, 1.1);
10473  __ Fmov(d18, 1.5);
10474  __ Fmov(d19, 1.9);
10475  __ Fmov(d20, 2.5);
10476  __ Fmov(d21, -1.5);
10477  __ Fmov(d22, -2.5);
10478  __ Fmov(d23, kFP32PositiveInfinity);
10479  __ Fmov(d24, kFP32NegativeInfinity);
10480  __ Fmov(d25, 0.0);
10481  __ Fmov(d26, -0.0);
10482  __ Fmov(d27, -0.2);
10483
10484  __ Frinti(d12, d16);
10485  __ Frinti(d13, d17);
10486  __ Frinti(d14, d18);
10487  __ Frinti(d15, d19);
10488  __ Frinti(d16, d20);
10489  __ Frinti(d17, d21);
10490  __ Frinti(d18, d22);
10491  __ Frinti(d19, d23);
10492  __ Frinti(d20, d24);
10493  __ Frinti(d21, d25);
10494  __ Frinti(d22, d26);
10495  __ Frinti(d23, d27);
10496  END();
10497
10498  RUN();
10499
10500  ASSERT_EQUAL_FP32(1.0, s0);
10501  ASSERT_EQUAL_FP32(1.0, s1);
10502  ASSERT_EQUAL_FP32(2.0, s2);
10503  ASSERT_EQUAL_FP32(2.0, s3);
10504  ASSERT_EQUAL_FP32(2.0, s4);
10505  ASSERT_EQUAL_FP32(-2.0, s5);
10506  ASSERT_EQUAL_FP32(-2.0, s6);
10507  ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s7);
10508  ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s8);
10509  ASSERT_EQUAL_FP32(0.0, s9);
10510  ASSERT_EQUAL_FP32(-0.0, s10);
10511  ASSERT_EQUAL_FP32(-0.0, s11);
10512  ASSERT_EQUAL_FP64(1.0, d12);
10513  ASSERT_EQUAL_FP64(1.0, d13);
10514  ASSERT_EQUAL_FP64(2.0, d14);
10515  ASSERT_EQUAL_FP64(2.0, d15);
10516  ASSERT_EQUAL_FP64(2.0, d16);
10517  ASSERT_EQUAL_FP64(-2.0, d17);
10518  ASSERT_EQUAL_FP64(-2.0, d18);
10519  ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d19);
10520  ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d20);
10521  ASSERT_EQUAL_FP64(0.0, d21);
10522  ASSERT_EQUAL_FP64(-0.0, d22);
10523  ASSERT_EQUAL_FP64(-0.0, d23);
10524
10525  TEARDOWN();
10526}
10527
10528
10529TEST(frintm) {
10530  SETUP();
10531
10532  START();
10533  __ Fmov(s16, 1.0);
10534  __ Fmov(s17, 1.1);
10535  __ Fmov(s18, 1.5);
10536  __ Fmov(s19, 1.9);
10537  __ Fmov(s20, 2.5);
10538  __ Fmov(s21, -1.5);
10539  __ Fmov(s22, -2.5);
10540  __ Fmov(s23, kFP32PositiveInfinity);
10541  __ Fmov(s24, kFP32NegativeInfinity);
10542  __ Fmov(s25, 0.0);
10543  __ Fmov(s26, -0.0);
10544  __ Fmov(s27, -0.2);
10545
10546  __ Frintm(s0, s16);
10547  __ Frintm(s1, s17);
10548  __ Frintm(s2, s18);
10549  __ Frintm(s3, s19);
10550  __ Frintm(s4, s20);
10551  __ Frintm(s5, s21);
10552  __ Frintm(s6, s22);
10553  __ Frintm(s7, s23);
10554  __ Frintm(s8, s24);
10555  __ Frintm(s9, s25);
10556  __ Frintm(s10, s26);
10557  __ Frintm(s11, s27);
10558
10559  __ Fmov(d16, 1.0);
10560  __ Fmov(d17, 1.1);
10561  __ Fmov(d18, 1.5);
10562  __ Fmov(d19, 1.9);
10563  __ Fmov(d20, 2.5);
10564  __ Fmov(d21, -1.5);
10565  __ Fmov(d22, -2.5);
10566  __ Fmov(d23, kFP32PositiveInfinity);
10567  __ Fmov(d24, kFP32NegativeInfinity);
10568  __ Fmov(d25, 0.0);
10569  __ Fmov(d26, -0.0);
10570  __ Fmov(d27, -0.2);
10571
10572  __ Frintm(d12, d16);
10573  __ Frintm(d13, d17);
10574  __ Frintm(d14, d18);
10575  __ Frintm(d15, d19);
10576  __ Frintm(d16, d20);
10577  __ Frintm(d17, d21);
10578  __ Frintm(d18, d22);
10579  __ Frintm(d19, d23);
10580  __ Frintm(d20, d24);
10581  __ Frintm(d21, d25);
10582  __ Frintm(d22, d26);
10583  __ Frintm(d23, d27);
10584  END();
10585
10586  RUN();
10587
10588  ASSERT_EQUAL_FP32(1.0, s0);
10589  ASSERT_EQUAL_FP32(1.0, s1);
10590  ASSERT_EQUAL_FP32(1.0, s2);
10591  ASSERT_EQUAL_FP32(1.0, s3);
10592  ASSERT_EQUAL_FP32(2.0, s4);
10593  ASSERT_EQUAL_FP32(-2.0, s5);
10594  ASSERT_EQUAL_FP32(-3.0, s6);
10595  ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s7);
10596  ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s8);
10597  ASSERT_EQUAL_FP32(0.0, s9);
10598  ASSERT_EQUAL_FP32(-0.0, s10);
10599  ASSERT_EQUAL_FP32(-1.0, s11);
10600  ASSERT_EQUAL_FP64(1.0, d12);
10601  ASSERT_EQUAL_FP64(1.0, d13);
10602  ASSERT_EQUAL_FP64(1.0, d14);
10603  ASSERT_EQUAL_FP64(1.0, d15);
10604  ASSERT_EQUAL_FP64(2.0, d16);
10605  ASSERT_EQUAL_FP64(-2.0, d17);
10606  ASSERT_EQUAL_FP64(-3.0, d18);
10607  ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d19);
10608  ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d20);
10609  ASSERT_EQUAL_FP64(0.0, d21);
10610  ASSERT_EQUAL_FP64(-0.0, d22);
10611  ASSERT_EQUAL_FP64(-1.0, d23);
10612
10613  TEARDOWN();
10614}
10615
10616
10617TEST(frintn) {
10618  SETUP();
10619
10620  START();
10621  __ Fmov(s16, 1.0);
10622  __ Fmov(s17, 1.1);
10623  __ Fmov(s18, 1.5);
10624  __ Fmov(s19, 1.9);
10625  __ Fmov(s20, 2.5);
10626  __ Fmov(s21, -1.5);
10627  __ Fmov(s22, -2.5);
10628  __ Fmov(s23, kFP32PositiveInfinity);
10629  __ Fmov(s24, kFP32NegativeInfinity);
10630  __ Fmov(s25, 0.0);
10631  __ Fmov(s26, -0.0);
10632  __ Fmov(s27, -0.2);
10633
10634  __ Frintn(s0, s16);
10635  __ Frintn(s1, s17);
10636  __ Frintn(s2, s18);
10637  __ Frintn(s3, s19);
10638  __ Frintn(s4, s20);
10639  __ Frintn(s5, s21);
10640  __ Frintn(s6, s22);
10641  __ Frintn(s7, s23);
10642  __ Frintn(s8, s24);
10643  __ Frintn(s9, s25);
10644  __ Frintn(s10, s26);
10645  __ Frintn(s11, s27);
10646
10647  __ Fmov(d16, 1.0);
10648  __ Fmov(d17, 1.1);
10649  __ Fmov(d18, 1.5);
10650  __ Fmov(d19, 1.9);
10651  __ Fmov(d20, 2.5);
10652  __ Fmov(d21, -1.5);
10653  __ Fmov(d22, -2.5);
10654  __ Fmov(d23, kFP32PositiveInfinity);
10655  __ Fmov(d24, kFP32NegativeInfinity);
10656  __ Fmov(d25, 0.0);
10657  __ Fmov(d26, -0.0);
10658  __ Fmov(d27, -0.2);
10659
10660  __ Frintn(d12, d16);
10661  __ Frintn(d13, d17);
10662  __ Frintn(d14, d18);
10663  __ Frintn(d15, d19);
10664  __ Frintn(d16, d20);
10665  __ Frintn(d17, d21);
10666  __ Frintn(d18, d22);
10667  __ Frintn(d19, d23);
10668  __ Frintn(d20, d24);
10669  __ Frintn(d21, d25);
10670  __ Frintn(d22, d26);
10671  __ Frintn(d23, d27);
10672  END();
10673
10674  RUN();
10675
10676  ASSERT_EQUAL_FP32(1.0, s0);
10677  ASSERT_EQUAL_FP32(1.0, s1);
10678  ASSERT_EQUAL_FP32(2.0, s2);
10679  ASSERT_EQUAL_FP32(2.0, s3);
10680  ASSERT_EQUAL_FP32(2.0, s4);
10681  ASSERT_EQUAL_FP32(-2.0, s5);
10682  ASSERT_EQUAL_FP32(-2.0, s6);
10683  ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s7);
10684  ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s8);
10685  ASSERT_EQUAL_FP32(0.0, s9);
10686  ASSERT_EQUAL_FP32(-0.0, s10);
10687  ASSERT_EQUAL_FP32(-0.0, s11);
10688  ASSERT_EQUAL_FP64(1.0, d12);
10689  ASSERT_EQUAL_FP64(1.0, d13);
10690  ASSERT_EQUAL_FP64(2.0, d14);
10691  ASSERT_EQUAL_FP64(2.0, d15);
10692  ASSERT_EQUAL_FP64(2.0, d16);
10693  ASSERT_EQUAL_FP64(-2.0, d17);
10694  ASSERT_EQUAL_FP64(-2.0, d18);
10695  ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d19);
10696  ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d20);
10697  ASSERT_EQUAL_FP64(0.0, d21);
10698  ASSERT_EQUAL_FP64(-0.0, d22);
10699  ASSERT_EQUAL_FP64(-0.0, d23);
10700
10701  TEARDOWN();
10702}
10703
10704
10705TEST(frintp) {
10706  SETUP();
10707
10708  START();
10709  __ Fmov(s16, 1.0);
10710  __ Fmov(s17, 1.1);
10711  __ Fmov(s18, 1.5);
10712  __ Fmov(s19, 1.9);
10713  __ Fmov(s20, 2.5);
10714  __ Fmov(s21, -1.5);
10715  __ Fmov(s22, -2.5);
10716  __ Fmov(s23, kFP32PositiveInfinity);
10717  __ Fmov(s24, kFP32NegativeInfinity);
10718  __ Fmov(s25, 0.0);
10719  __ Fmov(s26, -0.0);
10720  __ Fmov(s27, -0.2);
10721
10722  __ Frintp(s0, s16);
10723  __ Frintp(s1, s17);
10724  __ Frintp(s2, s18);
10725  __ Frintp(s3, s19);
10726  __ Frintp(s4, s20);
10727  __ Frintp(s5, s21);
10728  __ Frintp(s6, s22);
10729  __ Frintp(s7, s23);
10730  __ Frintp(s8, s24);
10731  __ Frintp(s9, s25);
10732  __ Frintp(s10, s26);
10733  __ Frintp(s11, s27);
10734
10735  __ Fmov(d16, 1.0);
10736  __ Fmov(d17, 1.1);
10737  __ Fmov(d18, 1.5);
10738  __ Fmov(d19, 1.9);
10739  __ Fmov(d20, 2.5);
10740  __ Fmov(d21, -1.5);
10741  __ Fmov(d22, -2.5);
10742  __ Fmov(d23, kFP32PositiveInfinity);
10743  __ Fmov(d24, kFP32NegativeInfinity);
10744  __ Fmov(d25, 0.0);
10745  __ Fmov(d26, -0.0);
10746  __ Fmov(d27, -0.2);
10747
10748  __ Frintp(d12, d16);
10749  __ Frintp(d13, d17);
10750  __ Frintp(d14, d18);
10751  __ Frintp(d15, d19);
10752  __ Frintp(d16, d20);
10753  __ Frintp(d17, d21);
10754  __ Frintp(d18, d22);
10755  __ Frintp(d19, d23);
10756  __ Frintp(d20, d24);
10757  __ Frintp(d21, d25);
10758  __ Frintp(d22, d26);
10759  __ Frintp(d23, d27);
10760  END();
10761
10762  RUN();
10763
10764  ASSERT_EQUAL_FP32(1.0, s0);
10765  ASSERT_EQUAL_FP32(2.0, s1);
10766  ASSERT_EQUAL_FP32(2.0, s2);
10767  ASSERT_EQUAL_FP32(2.0, s3);
10768  ASSERT_EQUAL_FP32(3.0, s4);
10769  ASSERT_EQUAL_FP32(-1.0, s5);
10770  ASSERT_EQUAL_FP32(-2.0, s6);
10771  ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s7);
10772  ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s8);
10773  ASSERT_EQUAL_FP32(0.0, s9);
10774  ASSERT_EQUAL_FP32(-0.0, s10);
10775  ASSERT_EQUAL_FP32(-0.0, s11);
10776  ASSERT_EQUAL_FP64(1.0, d12);
10777  ASSERT_EQUAL_FP64(2.0, d13);
10778  ASSERT_EQUAL_FP64(2.0, d14);
10779  ASSERT_EQUAL_FP64(2.0, d15);
10780  ASSERT_EQUAL_FP64(3.0, d16);
10781  ASSERT_EQUAL_FP64(-1.0, d17);
10782  ASSERT_EQUAL_FP64(-2.0, d18);
10783  ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d19);
10784  ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d20);
10785  ASSERT_EQUAL_FP64(0.0, d21);
10786  ASSERT_EQUAL_FP64(-0.0, d22);
10787  ASSERT_EQUAL_FP64(-0.0, d23);
10788
10789  TEARDOWN();
10790}
10791
10792
10793TEST(frintx) {
10794  // VIXL only supports the round-to-nearest FPCR mode, and it doesn't support
10795  // FP exceptions, so this test has the same results as frintn (and frinti).
10796  SETUP();
10797
10798  START();
10799  __ Fmov(s16, 1.0);
10800  __ Fmov(s17, 1.1);
10801  __ Fmov(s18, 1.5);
10802  __ Fmov(s19, 1.9);
10803  __ Fmov(s20, 2.5);
10804  __ Fmov(s21, -1.5);
10805  __ Fmov(s22, -2.5);
10806  __ Fmov(s23, kFP32PositiveInfinity);
10807  __ Fmov(s24, kFP32NegativeInfinity);
10808  __ Fmov(s25, 0.0);
10809  __ Fmov(s26, -0.0);
10810  __ Fmov(s27, -0.2);
10811
10812  __ Frintx(s0, s16);
10813  __ Frintx(s1, s17);
10814  __ Frintx(s2, s18);
10815  __ Frintx(s3, s19);
10816  __ Frintx(s4, s20);
10817  __ Frintx(s5, s21);
10818  __ Frintx(s6, s22);
10819  __ Frintx(s7, s23);
10820  __ Frintx(s8, s24);
10821  __ Frintx(s9, s25);
10822  __ Frintx(s10, s26);
10823  __ Frintx(s11, s27);
10824
10825  __ Fmov(d16, 1.0);
10826  __ Fmov(d17, 1.1);
10827  __ Fmov(d18, 1.5);
10828  __ Fmov(d19, 1.9);
10829  __ Fmov(d20, 2.5);
10830  __ Fmov(d21, -1.5);
10831  __ Fmov(d22, -2.5);
10832  __ Fmov(d23, kFP32PositiveInfinity);
10833  __ Fmov(d24, kFP32NegativeInfinity);
10834  __ Fmov(d25, 0.0);
10835  __ Fmov(d26, -0.0);
10836  __ Fmov(d27, -0.2);
10837
10838  __ Frintx(d12, d16);
10839  __ Frintx(d13, d17);
10840  __ Frintx(d14, d18);
10841  __ Frintx(d15, d19);
10842  __ Frintx(d16, d20);
10843  __ Frintx(d17, d21);
10844  __ Frintx(d18, d22);
10845  __ Frintx(d19, d23);
10846  __ Frintx(d20, d24);
10847  __ Frintx(d21, d25);
10848  __ Frintx(d22, d26);
10849  __ Frintx(d23, d27);
10850  END();
10851
10852  RUN();
10853
10854  ASSERT_EQUAL_FP32(1.0, s0);
10855  ASSERT_EQUAL_FP32(1.0, s1);
10856  ASSERT_EQUAL_FP32(2.0, s2);
10857  ASSERT_EQUAL_FP32(2.0, s3);
10858  ASSERT_EQUAL_FP32(2.0, s4);
10859  ASSERT_EQUAL_FP32(-2.0, s5);
10860  ASSERT_EQUAL_FP32(-2.0, s6);
10861  ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s7);
10862  ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s8);
10863  ASSERT_EQUAL_FP32(0.0, s9);
10864  ASSERT_EQUAL_FP32(-0.0, s10);
10865  ASSERT_EQUAL_FP32(-0.0, s11);
10866  ASSERT_EQUAL_FP64(1.0, d12);
10867  ASSERT_EQUAL_FP64(1.0, d13);
10868  ASSERT_EQUAL_FP64(2.0, d14);
10869  ASSERT_EQUAL_FP64(2.0, d15);
10870  ASSERT_EQUAL_FP64(2.0, d16);
10871  ASSERT_EQUAL_FP64(-2.0, d17);
10872  ASSERT_EQUAL_FP64(-2.0, d18);
10873  ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d19);
10874  ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d20);
10875  ASSERT_EQUAL_FP64(0.0, d21);
10876  ASSERT_EQUAL_FP64(-0.0, d22);
10877  ASSERT_EQUAL_FP64(-0.0, d23);
10878
10879  TEARDOWN();
10880}
10881
10882
10883TEST(frintz) {
10884  SETUP();
10885
10886  START();
10887  __ Fmov(s16, 1.0);
10888  __ Fmov(s17, 1.1);
10889  __ Fmov(s18, 1.5);
10890  __ Fmov(s19, 1.9);
10891  __ Fmov(s20, 2.5);
10892  __ Fmov(s21, -1.5);
10893  __ Fmov(s22, -2.5);
10894  __ Fmov(s23, kFP32PositiveInfinity);
10895  __ Fmov(s24, kFP32NegativeInfinity);
10896  __ Fmov(s25, 0.0);
10897  __ Fmov(s26, -0.0);
10898
10899  __ Frintz(s0, s16);
10900  __ Frintz(s1, s17);
10901  __ Frintz(s2, s18);
10902  __ Frintz(s3, s19);
10903  __ Frintz(s4, s20);
10904  __ Frintz(s5, s21);
10905  __ Frintz(s6, s22);
10906  __ Frintz(s7, s23);
10907  __ Frintz(s8, s24);
10908  __ Frintz(s9, s25);
10909  __ Frintz(s10, s26);
10910
10911  __ Fmov(d16, 1.0);
10912  __ Fmov(d17, 1.1);
10913  __ Fmov(d18, 1.5);
10914  __ Fmov(d19, 1.9);
10915  __ Fmov(d20, 2.5);
10916  __ Fmov(d21, -1.5);
10917  __ Fmov(d22, -2.5);
10918  __ Fmov(d23, kFP32PositiveInfinity);
10919  __ Fmov(d24, kFP32NegativeInfinity);
10920  __ Fmov(d25, 0.0);
10921  __ Fmov(d26, -0.0);
10922
10923  __ Frintz(d11, d16);
10924  __ Frintz(d12, d17);
10925  __ Frintz(d13, d18);
10926  __ Frintz(d14, d19);
10927  __ Frintz(d15, d20);
10928  __ Frintz(d16, d21);
10929  __ Frintz(d17, d22);
10930  __ Frintz(d18, d23);
10931  __ Frintz(d19, d24);
10932  __ Frintz(d20, d25);
10933  __ Frintz(d21, d26);
10934  END();
10935
10936  RUN();
10937
10938  ASSERT_EQUAL_FP32(1.0, s0);
10939  ASSERT_EQUAL_FP32(1.0, s1);
10940  ASSERT_EQUAL_FP32(1.0, s2);
10941  ASSERT_EQUAL_FP32(1.0, s3);
10942  ASSERT_EQUAL_FP32(2.0, s4);
10943  ASSERT_EQUAL_FP32(-1.0, s5);
10944  ASSERT_EQUAL_FP32(-2.0, s6);
10945  ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s7);
10946  ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s8);
10947  ASSERT_EQUAL_FP32(0.0, s9);
10948  ASSERT_EQUAL_FP32(-0.0, s10);
10949  ASSERT_EQUAL_FP64(1.0, d11);
10950  ASSERT_EQUAL_FP64(1.0, d12);
10951  ASSERT_EQUAL_FP64(1.0, d13);
10952  ASSERT_EQUAL_FP64(1.0, d14);
10953  ASSERT_EQUAL_FP64(2.0, d15);
10954  ASSERT_EQUAL_FP64(-1.0, d16);
10955  ASSERT_EQUAL_FP64(-2.0, d17);
10956  ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d18);
10957  ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d19);
10958  ASSERT_EQUAL_FP64(0.0, d20);
10959  ASSERT_EQUAL_FP64(-0.0, d21);
10960
10961  TEARDOWN();
10962}
10963
10964
10965TEST(fcvt_ds) {
10966  SETUP();
10967
10968  START();
10969  __ Fmov(s16, 1.0);
10970  __ Fmov(s17, 1.1);
10971  __ Fmov(s18, 1.5);
10972  __ Fmov(s19, 1.9);
10973  __ Fmov(s20, 2.5);
10974  __ Fmov(s21, -1.5);
10975  __ Fmov(s22, -2.5);
10976  __ Fmov(s23, kFP32PositiveInfinity);
10977  __ Fmov(s24, kFP32NegativeInfinity);
10978  __ Fmov(s25, 0.0);
10979  __ Fmov(s26, -0.0);
10980  __ Fmov(s27, FLT_MAX);
10981  __ Fmov(s28, FLT_MIN);
10982  __ Fmov(s29, rawbits_to_float(0x7fc12345));   // Quiet NaN.
10983  __ Fmov(s30, rawbits_to_float(0x7f812345));   // Signalling NaN.
10984
10985  __ Fcvt(d0, s16);
10986  __ Fcvt(d1, s17);
10987  __ Fcvt(d2, s18);
10988  __ Fcvt(d3, s19);
10989  __ Fcvt(d4, s20);
10990  __ Fcvt(d5, s21);
10991  __ Fcvt(d6, s22);
10992  __ Fcvt(d7, s23);
10993  __ Fcvt(d8, s24);
10994  __ Fcvt(d9, s25);
10995  __ Fcvt(d10, s26);
10996  __ Fcvt(d11, s27);
10997  __ Fcvt(d12, s28);
10998  __ Fcvt(d13, s29);
10999  __ Fcvt(d14, s30);
11000  END();
11001
11002  RUN();
11003
11004  ASSERT_EQUAL_FP64(1.0f, d0);
11005  ASSERT_EQUAL_FP64(1.1f, d1);
11006  ASSERT_EQUAL_FP64(1.5f, d2);
11007  ASSERT_EQUAL_FP64(1.9f, d3);
11008  ASSERT_EQUAL_FP64(2.5f, d4);
11009  ASSERT_EQUAL_FP64(-1.5f, d5);
11010  ASSERT_EQUAL_FP64(-2.5f, d6);
11011  ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d7);
11012  ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d8);
11013  ASSERT_EQUAL_FP64(0.0f, d9);
11014  ASSERT_EQUAL_FP64(-0.0f, d10);
11015  ASSERT_EQUAL_FP64(FLT_MAX, d11);
11016  ASSERT_EQUAL_FP64(FLT_MIN, d12);
11017
11018  // Check that the NaN payload is preserved according to A64 conversion rules:
11019  //  - The sign bit is preserved.
11020  //  - The top bit of the mantissa is forced to 1 (making it a quiet NaN).
11021  //  - The remaining mantissa bits are copied until they run out.
11022  //  - The low-order bits that haven't already been assigned are set to 0.
11023  ASSERT_EQUAL_FP64(rawbits_to_double(0x7ff82468a0000000), d13);
11024  ASSERT_EQUAL_FP64(rawbits_to_double(0x7ff82468a0000000), d14);
11025
11026  TEARDOWN();
11027}
11028
11029
11030TEST(fcvt_sd) {
11031  // Test simple conversions here. Complex behaviour (such as rounding
11032  // specifics) are tested in the simulator tests.
11033
11034  SETUP();
11035
11036  START();
11037  __ Fmov(d16, 1.0);
11038  __ Fmov(d17, 1.1);
11039  __ Fmov(d18, 1.5);
11040  __ Fmov(d19, 1.9);
11041  __ Fmov(d20, 2.5);
11042  __ Fmov(d21, -1.5);
11043  __ Fmov(d22, -2.5);
11044  __ Fmov(d23, kFP32PositiveInfinity);
11045  __ Fmov(d24, kFP32NegativeInfinity);
11046  __ Fmov(d25, 0.0);
11047  __ Fmov(d26, -0.0);
11048  __ Fmov(d27, FLT_MAX);
11049  __ Fmov(d28, FLT_MIN);
11050  __ Fmov(d29, rawbits_to_double(0x7ff82468a0000000));   // Quiet NaN.
11051  __ Fmov(d30, rawbits_to_double(0x7ff02468a0000000));   // Signalling NaN.
11052
11053  __ Fcvt(s0, d16);
11054  __ Fcvt(s1, d17);
11055  __ Fcvt(s2, d18);
11056  __ Fcvt(s3, d19);
11057  __ Fcvt(s4, d20);
11058  __ Fcvt(s5, d21);
11059  __ Fcvt(s6, d22);
11060  __ Fcvt(s7, d23);
11061  __ Fcvt(s8, d24);
11062  __ Fcvt(s9, d25);
11063  __ Fcvt(s10, d26);
11064  __ Fcvt(s11, d27);
11065  __ Fcvt(s12, d28);
11066  __ Fcvt(s13, d29);
11067  __ Fcvt(s14, d30);
11068  END();
11069
11070  RUN();
11071
11072  ASSERT_EQUAL_FP32(1.0f, s0);
11073  ASSERT_EQUAL_FP32(1.1f, s1);
11074  ASSERT_EQUAL_FP32(1.5f, s2);
11075  ASSERT_EQUAL_FP32(1.9f, s3);
11076  ASSERT_EQUAL_FP32(2.5f, s4);
11077  ASSERT_EQUAL_FP32(-1.5f, s5);
11078  ASSERT_EQUAL_FP32(-2.5f, s6);
11079  ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s7);
11080  ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s8);
11081  ASSERT_EQUAL_FP32(0.0f, s9);
11082  ASSERT_EQUAL_FP32(-0.0f, s10);
11083  ASSERT_EQUAL_FP32(FLT_MAX, s11);
11084  ASSERT_EQUAL_FP32(FLT_MIN, s12);
11085
11086  // Check that the NaN payload is preserved according to A64 conversion rules:
11087  //  - The sign bit is preserved.
11088  //  - The top bit of the mantissa is forced to 1 (making it a quiet NaN).
11089  //  - The remaining mantissa bits are copied until they run out.
11090  //  - The low-order bits that haven't already been assigned are set to 0.
11091  ASSERT_EQUAL_FP32(rawbits_to_float(0x7fc12345), s13);
11092  ASSERT_EQUAL_FP32(rawbits_to_float(0x7fc12345), s14);
11093
11094  TEARDOWN();
11095}
11096
11097
11098TEST(fcvt_half) {
11099  SETUP();
11100
11101  START();
11102  Label done;
11103  {
11104    // Check all exact conversions from half to float and back.
11105    Label ok, fail;
11106    __ Mov(w0, 0);
11107    for (int i = 0; i < 0xffff; i += 3) {
11108      if ((i & 0x7c00) == 0x7c00) continue;
11109      __ Mov(w1, i);
11110      __ Fmov(s1, w1);
11111      __ Fcvt(s2, h1);
11112      __ Fcvt(h2, s2);
11113      __ Fmov(w2, s2);
11114      __ Cmp(w1, w2);
11115      __ B(&fail, ne);
11116    }
11117    __ B(&ok);
11118    __ Bind(&fail);
11119    __ Mov(w0, 1);
11120    __ B(&done);
11121    __ Bind(&ok);
11122  }
11123  {
11124    // Check all exact conversions from half to double and back.
11125    Label ok, fail;
11126    for (int i = 0; i < 0xffff; i += 3) {
11127      if ((i & 0x7c00) == 0x7c00) continue;
11128      __ Mov(w1, i);
11129      __ Fmov(s1, w1);
11130      __ Fcvt(d2, h1);
11131      __ Fcvt(h2, d2);
11132      __ Mov(w2, v2.S(), 0);
11133      __ Cmp(w1, w2);
11134      __ B(&fail, ne);
11135    }
11136    __ B(&ok);
11137    __ Bind(&fail);
11138    __ Mov(w0, 2);
11139    __ Bind(&ok);
11140  }
11141  __ Bind(&done);
11142
11143  // Check some other interesting values.
11144  __ Fmov(s0, kFP32PositiveInfinity);
11145  __ Fmov(s1, kFP32NegativeInfinity);
11146  __ Fmov(s2, 65504);       // Max half precision.
11147  __ Fmov(s3, 6.10352e-5);  // Min positive normal.
11148  __ Fmov(s4, 6.09756e-5);  // Max subnormal.
11149  __ Fmov(s5, 5.96046e-8);  // Min positive subnormal.
11150  __ Fmov(s6, 5e-9);        // Not representable -> zero.
11151  __ Fmov(s7, -0.0);
11152  __ Fcvt(h0, s0);
11153  __ Fcvt(h1, s1);
11154  __ Fcvt(h2, s2);
11155  __ Fcvt(h3, s3);
11156  __ Fcvt(h4, s4);
11157  __ Fcvt(h5, s5);
11158  __ Fcvt(h6, s6);
11159  __ Fcvt(h7, s7);
11160
11161  __ Fmov(d20, kFP64PositiveInfinity);
11162  __ Fmov(d21, kFP64NegativeInfinity);
11163  __ Fmov(d22, 65504);       // Max half precision.
11164  __ Fmov(d23, 6.10352e-5);  // Min positive normal.
11165  __ Fmov(d24, 6.09756e-5);  // Max subnormal.
11166  __ Fmov(d25, 5.96046e-8);  // Min positive subnormal.
11167  __ Fmov(d26, 5e-9);        // Not representable -> zero.
11168  __ Fmov(d27, -0.0);
11169  __ Fcvt(h20, d20);
11170  __ Fcvt(h21, d21);
11171  __ Fcvt(h22, d22);
11172  __ Fcvt(h23, d23);
11173  __ Fcvt(h24, d24);
11174  __ Fcvt(h25, d25);
11175  __ Fcvt(h26, d26);
11176  __ Fcvt(h27, d27);
11177  END();
11178
11179  RUN();
11180
11181  ASSERT_EQUAL_32(0, w0);  // 1 => float failed, 2 => double failed.
11182  ASSERT_EQUAL_128(0, kFP16PositiveInfinity, q0);
11183  ASSERT_EQUAL_128(0, kFP16NegativeInfinity, q1);
11184  ASSERT_EQUAL_128(0, 0x7bff, q2);
11185  ASSERT_EQUAL_128(0, 0x0400, q3);
11186  ASSERT_EQUAL_128(0, 0x03ff, q4);
11187  ASSERT_EQUAL_128(0, 0x0001, q5);
11188  ASSERT_EQUAL_128(0, 0, q6);
11189  ASSERT_EQUAL_128(0, 0x8000, q7);
11190  ASSERT_EQUAL_128(0, kFP16PositiveInfinity, q20);
11191  ASSERT_EQUAL_128(0, kFP16NegativeInfinity, q21);
11192  ASSERT_EQUAL_128(0, 0x7bff, q22);
11193  ASSERT_EQUAL_128(0, 0x0400, q23);
11194  ASSERT_EQUAL_128(0, 0x03ff, q24);
11195  ASSERT_EQUAL_128(0, 0x0001, q25);
11196  ASSERT_EQUAL_128(0, 0, q26);
11197  ASSERT_EQUAL_128(0, 0x8000, q27);
11198  TEARDOWN();
11199}
11200
11201
11202TEST(fcvtas) {
11203  SETUP();
11204
11205  START();
11206  __ Fmov(s0, 1.0);
11207  __ Fmov(s1, 1.1);
11208  __ Fmov(s2, 2.5);
11209  __ Fmov(s3, -2.5);
11210  __ Fmov(s4, kFP32PositiveInfinity);
11211  __ Fmov(s5, kFP32NegativeInfinity);
11212  __ Fmov(s6, 0x7fffff80);  // Largest float < INT32_MAX.
11213  __ Fneg(s7, s6);          // Smallest float > INT32_MIN.
11214  __ Fmov(d8, 1.0);
11215  __ Fmov(d9, 1.1);
11216  __ Fmov(d10, 2.5);
11217  __ Fmov(d11, -2.5);
11218  __ Fmov(d12, kFP64PositiveInfinity);
11219  __ Fmov(d13, kFP64NegativeInfinity);
11220  __ Fmov(d14, kWMaxInt - 1);
11221  __ Fmov(d15, kWMinInt + 1);
11222  __ Fmov(s17, 1.1);
11223  __ Fmov(s18, 2.5);
11224  __ Fmov(s19, -2.5);
11225  __ Fmov(s20, kFP32PositiveInfinity);
11226  __ Fmov(s21, kFP32NegativeInfinity);
11227  __ Fmov(s22, 0x7fffff8000000000);     // Largest float < INT64_MAX.
11228  __ Fneg(s23, s22);                    // Smallest float > INT64_MIN.
11229  __ Fmov(d24, 1.1);
11230  __ Fmov(d25, 2.5);
11231  __ Fmov(d26, -2.5);
11232  __ Fmov(d27, kFP64PositiveInfinity);
11233  __ Fmov(d28, kFP64NegativeInfinity);
11234  __ Fmov(d29, 0x7ffffffffffffc00);     // Largest double < INT64_MAX.
11235  __ Fneg(d30, d29);                    // Smallest double > INT64_MIN.
11236
11237  __ Fcvtas(w0, s0);
11238  __ Fcvtas(w1, s1);
11239  __ Fcvtas(w2, s2);
11240  __ Fcvtas(w3, s3);
11241  __ Fcvtas(w4, s4);
11242  __ Fcvtas(w5, s5);
11243  __ Fcvtas(w6, s6);
11244  __ Fcvtas(w7, s7);
11245  __ Fcvtas(w8, d8);
11246  __ Fcvtas(w9, d9);
11247  __ Fcvtas(w10, d10);
11248  __ Fcvtas(w11, d11);
11249  __ Fcvtas(w12, d12);
11250  __ Fcvtas(w13, d13);
11251  __ Fcvtas(w14, d14);
11252  __ Fcvtas(w15, d15);
11253  __ Fcvtas(x17, s17);
11254  __ Fcvtas(x18, s18);
11255  __ Fcvtas(x19, s19);
11256  __ Fcvtas(x20, s20);
11257  __ Fcvtas(x21, s21);
11258  __ Fcvtas(x22, s22);
11259  __ Fcvtas(x23, s23);
11260  __ Fcvtas(x24, d24);
11261  __ Fcvtas(x25, d25);
11262  __ Fcvtas(x26, d26);
11263  __ Fcvtas(x27, d27);
11264  __ Fcvtas(x28, d28);
11265  __ Fcvtas(x29, d29);
11266  __ Fcvtas(x30, d30);
11267  END();
11268
11269  RUN();
11270
11271  ASSERT_EQUAL_64(1, x0);
11272  ASSERT_EQUAL_64(1, x1);
11273  ASSERT_EQUAL_64(3, x2);
11274  ASSERT_EQUAL_64(0xfffffffd, x3);
11275  ASSERT_EQUAL_64(0x7fffffff, x4);
11276  ASSERT_EQUAL_64(0x80000000, x5);
11277  ASSERT_EQUAL_64(0x7fffff80, x6);
11278  ASSERT_EQUAL_64(0x80000080, x7);
11279  ASSERT_EQUAL_64(1, x8);
11280  ASSERT_EQUAL_64(1, x9);
11281  ASSERT_EQUAL_64(3, x10);
11282  ASSERT_EQUAL_64(0xfffffffd, x11);
11283  ASSERT_EQUAL_64(0x7fffffff, x12);
11284  ASSERT_EQUAL_64(0x80000000, x13);
11285  ASSERT_EQUAL_64(0x7ffffffe, x14);
11286  ASSERT_EQUAL_64(0x80000001, x15);
11287  ASSERT_EQUAL_64(1, x17);
11288  ASSERT_EQUAL_64(3, x18);
11289  ASSERT_EQUAL_64(0xfffffffffffffffd, x19);
11290  ASSERT_EQUAL_64(0x7fffffffffffffff, x20);
11291  ASSERT_EQUAL_64(0x8000000000000000, x21);
11292  ASSERT_EQUAL_64(0x7fffff8000000000, x22);
11293  ASSERT_EQUAL_64(0x8000008000000000, x23);
11294  ASSERT_EQUAL_64(1, x24);
11295  ASSERT_EQUAL_64(3, x25);
11296  ASSERT_EQUAL_64(0xfffffffffffffffd, x26);
11297  ASSERT_EQUAL_64(0x7fffffffffffffff, x27);
11298  ASSERT_EQUAL_64(0x8000000000000000, x28);
11299  ASSERT_EQUAL_64(0x7ffffffffffffc00, x29);
11300  ASSERT_EQUAL_64(0x8000000000000400, x30);
11301
11302  TEARDOWN();
11303}
11304
11305
11306TEST(fcvtau) {
11307  SETUP();
11308
11309  START();
11310  __ Fmov(s0, 1.0);
11311  __ Fmov(s1, 1.1);
11312  __ Fmov(s2, 2.5);
11313  __ Fmov(s3, -2.5);
11314  __ Fmov(s4, kFP32PositiveInfinity);
11315  __ Fmov(s5, kFP32NegativeInfinity);
11316  __ Fmov(s6, 0xffffff00);  // Largest float < UINT32_MAX.
11317  __ Fmov(d8, 1.0);
11318  __ Fmov(d9, 1.1);
11319  __ Fmov(d10, 2.5);
11320  __ Fmov(d11, -2.5);
11321  __ Fmov(d12, kFP64PositiveInfinity);
11322  __ Fmov(d13, kFP64NegativeInfinity);
11323  __ Fmov(d14, 0xfffffffe);
11324  __ Fmov(s16, 1.0);
11325  __ Fmov(s17, 1.1);
11326  __ Fmov(s18, 2.5);
11327  __ Fmov(s19, -2.5);
11328  __ Fmov(s20, kFP32PositiveInfinity);
11329  __ Fmov(s21, kFP32NegativeInfinity);
11330  __ Fmov(s22, 0xffffff0000000000);  // Largest float < UINT64_MAX.
11331  __ Fmov(d24, 1.1);
11332  __ Fmov(d25, 2.5);
11333  __ Fmov(d26, -2.5);
11334  __ Fmov(d27, kFP64PositiveInfinity);
11335  __ Fmov(d28, kFP64NegativeInfinity);
11336  __ Fmov(d29, 0xfffffffffffff800);  // Largest double < UINT64_MAX.
11337  __ Fmov(s30, 0x100000000);
11338
11339  __ Fcvtau(w0, s0);
11340  __ Fcvtau(w1, s1);
11341  __ Fcvtau(w2, s2);
11342  __ Fcvtau(w3, s3);
11343  __ Fcvtau(w4, s4);
11344  __ Fcvtau(w5, s5);
11345  __ Fcvtau(w6, s6);
11346  __ Fcvtau(w8, d8);
11347  __ Fcvtau(w9, d9);
11348  __ Fcvtau(w10, d10);
11349  __ Fcvtau(w11, d11);
11350  __ Fcvtau(w12, d12);
11351  __ Fcvtau(w13, d13);
11352  __ Fcvtau(w14, d14);
11353  __ Fcvtau(w15, d15);
11354  __ Fcvtau(x16, s16);
11355  __ Fcvtau(x17, s17);
11356  __ Fcvtau(x18, s18);
11357  __ Fcvtau(x19, s19);
11358  __ Fcvtau(x20, s20);
11359  __ Fcvtau(x21, s21);
11360  __ Fcvtau(x22, s22);
11361  __ Fcvtau(x24, d24);
11362  __ Fcvtau(x25, d25);
11363  __ Fcvtau(x26, d26);
11364  __ Fcvtau(x27, d27);
11365  __ Fcvtau(x28, d28);
11366  __ Fcvtau(x29, d29);
11367  __ Fcvtau(w30, s30);
11368  END();
11369
11370  RUN();
11371
11372  ASSERT_EQUAL_64(1, x0);
11373  ASSERT_EQUAL_64(1, x1);
11374  ASSERT_EQUAL_64(3, x2);
11375  ASSERT_EQUAL_64(0, x3);
11376  ASSERT_EQUAL_64(0xffffffff, x4);
11377  ASSERT_EQUAL_64(0, x5);
11378  ASSERT_EQUAL_64(0xffffff00, x6);
11379  ASSERT_EQUAL_64(1, x8);
11380  ASSERT_EQUAL_64(1, x9);
11381  ASSERT_EQUAL_64(3, x10);
11382  ASSERT_EQUAL_64(0, x11);
11383  ASSERT_EQUAL_64(0xffffffff, x12);
11384  ASSERT_EQUAL_64(0, x13);
11385  ASSERT_EQUAL_64(0xfffffffe, x14);
11386  ASSERT_EQUAL_64(1, x16);
11387  ASSERT_EQUAL_64(1, x17);
11388  ASSERT_EQUAL_64(3, x18);
11389  ASSERT_EQUAL_64(0, x19);
11390  ASSERT_EQUAL_64(0xffffffffffffffff, x20);
11391  ASSERT_EQUAL_64(0, x21);
11392  ASSERT_EQUAL_64(0xffffff0000000000, x22);
11393  ASSERT_EQUAL_64(1, x24);
11394  ASSERT_EQUAL_64(3, x25);
11395  ASSERT_EQUAL_64(0, x26);
11396  ASSERT_EQUAL_64(0xffffffffffffffff, x27);
11397  ASSERT_EQUAL_64(0, x28);
11398  ASSERT_EQUAL_64(0xfffffffffffff800, x29);
11399  ASSERT_EQUAL_64(0xffffffff, x30);
11400
11401  TEARDOWN();
11402}
11403
11404
11405TEST(fcvtms) {
11406  SETUP();
11407
11408  START();
11409  __ Fmov(s0, 1.0);
11410  __ Fmov(s1, 1.1);
11411  __ Fmov(s2, 1.5);
11412  __ Fmov(s3, -1.5);
11413  __ Fmov(s4, kFP32PositiveInfinity);
11414  __ Fmov(s5, kFP32NegativeInfinity);
11415  __ Fmov(s6, 0x7fffff80);                    // Largest float < INT32_MAX.
11416  __ Fneg(s7, s6);                            // Smallest float > INT32_MIN.
11417  __ Fmov(d8, 1.0);
11418  __ Fmov(d9, 1.1);
11419  __ Fmov(d10, 1.5);
11420  __ Fmov(d11, -1.5);
11421  __ Fmov(d12, kFP64PositiveInfinity);
11422  __ Fmov(d13, kFP64NegativeInfinity);
11423  __ Fmov(d14, kWMaxInt - 1);
11424  __ Fmov(d15, kWMinInt + 1);
11425  __ Fmov(s17, 1.1);
11426  __ Fmov(s18, 1.5);
11427  __ Fmov(s19, -1.5);
11428  __ Fmov(s20, kFP32PositiveInfinity);
11429  __ Fmov(s21, kFP32NegativeInfinity);
11430  __ Fmov(s22, 0x7fffff8000000000);           // Largest float < INT64_MAX.
11431  __ Fneg(s23, s22);                          // Smallest float > INT64_MIN.
11432  __ Fmov(d24, 1.1);
11433  __ Fmov(d25, 1.5);
11434  __ Fmov(d26, -1.5);
11435  __ Fmov(d27, kFP64PositiveInfinity);
11436  __ Fmov(d28, kFP64NegativeInfinity);
11437  __ Fmov(d29, 0x7ffffffffffffc00);           // Largest double < INT64_MAX.
11438  __ Fneg(d30, d29);                          // Smallest double > INT64_MIN.
11439
11440  __ Fcvtms(w0, s0);
11441  __ Fcvtms(w1, s1);
11442  __ Fcvtms(w2, s2);
11443  __ Fcvtms(w3, s3);
11444  __ Fcvtms(w4, s4);
11445  __ Fcvtms(w5, s5);
11446  __ Fcvtms(w6, s6);
11447  __ Fcvtms(w7, s7);
11448  __ Fcvtms(w8, d8);
11449  __ Fcvtms(w9, d9);
11450  __ Fcvtms(w10, d10);
11451  __ Fcvtms(w11, d11);
11452  __ Fcvtms(w12, d12);
11453  __ Fcvtms(w13, d13);
11454  __ Fcvtms(w14, d14);
11455  __ Fcvtms(w15, d15);
11456  __ Fcvtms(x17, s17);
11457  __ Fcvtms(x18, s18);
11458  __ Fcvtms(x19, s19);
11459  __ Fcvtms(x20, s20);
11460  __ Fcvtms(x21, s21);
11461  __ Fcvtms(x22, s22);
11462  __ Fcvtms(x23, s23);
11463  __ Fcvtms(x24, d24);
11464  __ Fcvtms(x25, d25);
11465  __ Fcvtms(x26, d26);
11466  __ Fcvtms(x27, d27);
11467  __ Fcvtms(x28, d28);
11468  __ Fcvtms(x29, d29);
11469  __ Fcvtms(x30, d30);
11470  END();
11471
11472  RUN();
11473
11474  ASSERT_EQUAL_64(1, x0);
11475  ASSERT_EQUAL_64(1, x1);
11476  ASSERT_EQUAL_64(1, x2);
11477  ASSERT_EQUAL_64(0xfffffffe, x3);
11478  ASSERT_EQUAL_64(0x7fffffff, x4);
11479  ASSERT_EQUAL_64(0x80000000, x5);
11480  ASSERT_EQUAL_64(0x7fffff80, x6);
11481  ASSERT_EQUAL_64(0x80000080, x7);
11482  ASSERT_EQUAL_64(1, x8);
11483  ASSERT_EQUAL_64(1, x9);
11484  ASSERT_EQUAL_64(1, x10);
11485  ASSERT_EQUAL_64(0xfffffffe, x11);
11486  ASSERT_EQUAL_64(0x7fffffff, x12);
11487  ASSERT_EQUAL_64(0x80000000, x13);
11488  ASSERT_EQUAL_64(0x7ffffffe, x14);
11489  ASSERT_EQUAL_64(0x80000001, x15);
11490  ASSERT_EQUAL_64(1, x17);
11491  ASSERT_EQUAL_64(1, x18);
11492  ASSERT_EQUAL_64(0xfffffffffffffffe, x19);
11493  ASSERT_EQUAL_64(0x7fffffffffffffff, x20);
11494  ASSERT_EQUAL_64(0x8000000000000000, x21);
11495  ASSERT_EQUAL_64(0x7fffff8000000000, x22);
11496  ASSERT_EQUAL_64(0x8000008000000000, x23);
11497  ASSERT_EQUAL_64(1, x24);
11498  ASSERT_EQUAL_64(1, x25);
11499  ASSERT_EQUAL_64(0xfffffffffffffffe, x26);
11500  ASSERT_EQUAL_64(0x7fffffffffffffff, x27);
11501  ASSERT_EQUAL_64(0x8000000000000000, x28);
11502  ASSERT_EQUAL_64(0x7ffffffffffffc00, x29);
11503  ASSERT_EQUAL_64(0x8000000000000400, x30);
11504
11505  TEARDOWN();
11506}
11507
11508
11509TEST(fcvtmu) {
11510  SETUP();
11511
11512  START();
11513  __ Fmov(s0, 1.0);
11514  __ Fmov(s1, 1.1);
11515  __ Fmov(s2, 1.5);
11516  __ Fmov(s3, -1.5);
11517  __ Fmov(s4, kFP32PositiveInfinity);
11518  __ Fmov(s5, kFP32NegativeInfinity);
11519  __ Fmov(s6, 0x7fffff80);                    // Largest float < INT32_MAX.
11520  __ Fneg(s7, s6);                            // Smallest float > INT32_MIN.
11521  __ Fmov(d8, 1.0);
11522  __ Fmov(d9, 1.1);
11523  __ Fmov(d10, 1.5);
11524  __ Fmov(d11, -1.5);
11525  __ Fmov(d12, kFP64PositiveInfinity);
11526  __ Fmov(d13, kFP64NegativeInfinity);
11527  __ Fmov(d14, kWMaxInt - 1);
11528  __ Fmov(d15, kWMinInt + 1);
11529  __ Fmov(s17, 1.1);
11530  __ Fmov(s18, 1.5);
11531  __ Fmov(s19, -1.5);
11532  __ Fmov(s20, kFP32PositiveInfinity);
11533  __ Fmov(s21, kFP32NegativeInfinity);
11534  __ Fmov(s22, 0x7fffff8000000000);           // Largest float < INT64_MAX.
11535  __ Fneg(s23, s22);                          // Smallest float > INT64_MIN.
11536  __ Fmov(d24, 1.1);
11537  __ Fmov(d25, 1.5);
11538  __ Fmov(d26, -1.5);
11539  __ Fmov(d27, kFP64PositiveInfinity);
11540  __ Fmov(d28, kFP64NegativeInfinity);
11541  __ Fmov(d29, 0x7ffffffffffffc00);           // Largest double < INT64_MAX.
11542  __ Fneg(d30, d29);                          // Smallest double > INT64_MIN.
11543
11544  __ Fcvtmu(w0, s0);
11545  __ Fcvtmu(w1, s1);
11546  __ Fcvtmu(w2, s2);
11547  __ Fcvtmu(w3, s3);
11548  __ Fcvtmu(w4, s4);
11549  __ Fcvtmu(w5, s5);
11550  __ Fcvtmu(w6, s6);
11551  __ Fcvtmu(w7, s7);
11552  __ Fcvtmu(w8, d8);
11553  __ Fcvtmu(w9, d9);
11554  __ Fcvtmu(w10, d10);
11555  __ Fcvtmu(w11, d11);
11556  __ Fcvtmu(w12, d12);
11557  __ Fcvtmu(w13, d13);
11558  __ Fcvtmu(w14, d14);
11559  __ Fcvtmu(x17, s17);
11560  __ Fcvtmu(x18, s18);
11561  __ Fcvtmu(x19, s19);
11562  __ Fcvtmu(x20, s20);
11563  __ Fcvtmu(x21, s21);
11564  __ Fcvtmu(x22, s22);
11565  __ Fcvtmu(x23, s23);
11566  __ Fcvtmu(x24, d24);
11567  __ Fcvtmu(x25, d25);
11568  __ Fcvtmu(x26, d26);
11569  __ Fcvtmu(x27, d27);
11570  __ Fcvtmu(x28, d28);
11571  __ Fcvtmu(x29, d29);
11572  __ Fcvtmu(x30, d30);
11573  END();
11574
11575  RUN();
11576
11577  ASSERT_EQUAL_64(1, x0);
11578  ASSERT_EQUAL_64(1, x1);
11579  ASSERT_EQUAL_64(1, x2);
11580  ASSERT_EQUAL_64(0, x3);
11581  ASSERT_EQUAL_64(0xffffffff, x4);
11582  ASSERT_EQUAL_64(0, x5);
11583  ASSERT_EQUAL_64(0x7fffff80, x6);
11584  ASSERT_EQUAL_64(0, x7);
11585  ASSERT_EQUAL_64(1, x8);
11586  ASSERT_EQUAL_64(1, x9);
11587  ASSERT_EQUAL_64(1, x10);
11588  ASSERT_EQUAL_64(0, x11);
11589  ASSERT_EQUAL_64(0xffffffff, x12);
11590  ASSERT_EQUAL_64(0, x13);
11591  ASSERT_EQUAL_64(0x7ffffffe, x14);
11592  ASSERT_EQUAL_64(1, x17);
11593  ASSERT_EQUAL_64(1, x18);
11594  ASSERT_EQUAL_64(0, x19);
11595  ASSERT_EQUAL_64(0xffffffffffffffff, x20);
11596  ASSERT_EQUAL_64(0, x21);
11597  ASSERT_EQUAL_64(0x7fffff8000000000, x22);
11598  ASSERT_EQUAL_64(0, x23);
11599  ASSERT_EQUAL_64(1, x24);
11600  ASSERT_EQUAL_64(1, x25);
11601  ASSERT_EQUAL_64(0, x26);
11602  ASSERT_EQUAL_64(0xffffffffffffffff, x27);
11603  ASSERT_EQUAL_64(0, x28);
11604  ASSERT_EQUAL_64(0x7ffffffffffffc00, x29);
11605  ASSERT_EQUAL_64(0, x30);
11606
11607  TEARDOWN();
11608}
11609
11610
11611TEST(fcvtns) {
11612  SETUP();
11613
11614  START();
11615  __ Fmov(s0, 1.0);
11616  __ Fmov(s1, 1.1);
11617  __ Fmov(s2, 1.5);
11618  __ Fmov(s3, -1.5);
11619  __ Fmov(s4, kFP32PositiveInfinity);
11620  __ Fmov(s5, kFP32NegativeInfinity);
11621  __ Fmov(s6, 0x7fffff80);                    // Largest float < INT32_MAX.
11622  __ Fneg(s7, s6);                            // Smallest float > INT32_MIN.
11623  __ Fmov(d8, 1.0);
11624  __ Fmov(d9, 1.1);
11625  __ Fmov(d10, 1.5);
11626  __ Fmov(d11, -1.5);
11627  __ Fmov(d12, kFP64PositiveInfinity);
11628  __ Fmov(d13, kFP64NegativeInfinity);
11629  __ Fmov(d14, kWMaxInt - 1);
11630  __ Fmov(d15, kWMinInt + 1);
11631  __ Fmov(s17, 1.1);
11632  __ Fmov(s18, 1.5);
11633  __ Fmov(s19, -1.5);
11634  __ Fmov(s20, kFP32PositiveInfinity);
11635  __ Fmov(s21, kFP32NegativeInfinity);
11636  __ Fmov(s22, 0x7fffff8000000000);           // Largest float < INT64_MAX.
11637  __ Fneg(s23, s22);                          // Smallest float > INT64_MIN.
11638  __ Fmov(d24, 1.1);
11639  __ Fmov(d25, 1.5);
11640  __ Fmov(d26, -1.5);
11641  __ Fmov(d27, kFP64PositiveInfinity);
11642  __ Fmov(d28, kFP64NegativeInfinity);
11643  __ Fmov(d29, 0x7ffffffffffffc00);           // Largest double < INT64_MAX.
11644  __ Fneg(d30, d29);                          // Smallest double > INT64_MIN.
11645
11646  __ Fcvtns(w0, s0);
11647  __ Fcvtns(w1, s1);
11648  __ Fcvtns(w2, s2);
11649  __ Fcvtns(w3, s3);
11650  __ Fcvtns(w4, s4);
11651  __ Fcvtns(w5, s5);
11652  __ Fcvtns(w6, s6);
11653  __ Fcvtns(w7, s7);
11654  __ Fcvtns(w8, d8);
11655  __ Fcvtns(w9, d9);
11656  __ Fcvtns(w10, d10);
11657  __ Fcvtns(w11, d11);
11658  __ Fcvtns(w12, d12);
11659  __ Fcvtns(w13, d13);
11660  __ Fcvtns(w14, d14);
11661  __ Fcvtns(w15, d15);
11662  __ Fcvtns(x17, s17);
11663  __ Fcvtns(x18, s18);
11664  __ Fcvtns(x19, s19);
11665  __ Fcvtns(x20, s20);
11666  __ Fcvtns(x21, s21);
11667  __ Fcvtns(x22, s22);
11668  __ Fcvtns(x23, s23);
11669  __ Fcvtns(x24, d24);
11670  __ Fcvtns(x25, d25);
11671  __ Fcvtns(x26, d26);
11672  __ Fcvtns(x27, d27);
11673  __ Fcvtns(x28, d28);
11674  __ Fcvtns(x29, d29);
11675  __ Fcvtns(x30, d30);
11676  END();
11677
11678  RUN();
11679
11680  ASSERT_EQUAL_64(1, x0);
11681  ASSERT_EQUAL_64(1, x1);
11682  ASSERT_EQUAL_64(2, x2);
11683  ASSERT_EQUAL_64(0xfffffffe, x3);
11684  ASSERT_EQUAL_64(0x7fffffff, x4);
11685  ASSERT_EQUAL_64(0x80000000, x5);
11686  ASSERT_EQUAL_64(0x7fffff80, x6);
11687  ASSERT_EQUAL_64(0x80000080, x7);
11688  ASSERT_EQUAL_64(1, x8);
11689  ASSERT_EQUAL_64(1, x9);
11690  ASSERT_EQUAL_64(2, x10);
11691  ASSERT_EQUAL_64(0xfffffffe, x11);
11692  ASSERT_EQUAL_64(0x7fffffff, x12);
11693  ASSERT_EQUAL_64(0x80000000, x13);
11694  ASSERT_EQUAL_64(0x7ffffffe, x14);
11695  ASSERT_EQUAL_64(0x80000001, x15);
11696  ASSERT_EQUAL_64(1, x17);
11697  ASSERT_EQUAL_64(2, x18);
11698  ASSERT_EQUAL_64(0xfffffffffffffffe, x19);
11699  ASSERT_EQUAL_64(0x7fffffffffffffff, x20);
11700  ASSERT_EQUAL_64(0x8000000000000000, x21);
11701  ASSERT_EQUAL_64(0x7fffff8000000000, x22);
11702  ASSERT_EQUAL_64(0x8000008000000000, x23);
11703  ASSERT_EQUAL_64(1, x24);
11704  ASSERT_EQUAL_64(2, x25);
11705  ASSERT_EQUAL_64(0xfffffffffffffffe, x26);
11706  ASSERT_EQUAL_64(0x7fffffffffffffff, x27);
11707  ASSERT_EQUAL_64(0x8000000000000000, x28);
11708  ASSERT_EQUAL_64(0x7ffffffffffffc00, x29);
11709  ASSERT_EQUAL_64(0x8000000000000400, x30);
11710
11711  TEARDOWN();
11712}
11713
11714
11715TEST(fcvtnu) {
11716  SETUP();
11717
11718  START();
11719  __ Fmov(s0, 1.0);
11720  __ Fmov(s1, 1.1);
11721  __ Fmov(s2, 1.5);
11722  __ Fmov(s3, -1.5);
11723  __ Fmov(s4, kFP32PositiveInfinity);
11724  __ Fmov(s5, kFP32NegativeInfinity);
11725  __ Fmov(s6, 0xffffff00);  // Largest float < UINT32_MAX.
11726  __ Fmov(d8, 1.0);
11727  __ Fmov(d9, 1.1);
11728  __ Fmov(d10, 1.5);
11729  __ Fmov(d11, -1.5);
11730  __ Fmov(d12, kFP64PositiveInfinity);
11731  __ Fmov(d13, kFP64NegativeInfinity);
11732  __ Fmov(d14, 0xfffffffe);
11733  __ Fmov(s16, 1.0);
11734  __ Fmov(s17, 1.1);
11735  __ Fmov(s18, 1.5);
11736  __ Fmov(s19, -1.5);
11737  __ Fmov(s20, kFP32PositiveInfinity);
11738  __ Fmov(s21, kFP32NegativeInfinity);
11739  __ Fmov(s22, 0xffffff0000000000);  // Largest float < UINT64_MAX.
11740  __ Fmov(d24, 1.1);
11741  __ Fmov(d25, 1.5);
11742  __ Fmov(d26, -1.5);
11743  __ Fmov(d27, kFP64PositiveInfinity);
11744  __ Fmov(d28, kFP64NegativeInfinity);
11745  __ Fmov(d29, 0xfffffffffffff800);  // Largest double < UINT64_MAX.
11746  __ Fmov(s30, 0x100000000);
11747
11748  __ Fcvtnu(w0, s0);
11749  __ Fcvtnu(w1, s1);
11750  __ Fcvtnu(w2, s2);
11751  __ Fcvtnu(w3, s3);
11752  __ Fcvtnu(w4, s4);
11753  __ Fcvtnu(w5, s5);
11754  __ Fcvtnu(w6, s6);
11755  __ Fcvtnu(w8, d8);
11756  __ Fcvtnu(w9, d9);
11757  __ Fcvtnu(w10, d10);
11758  __ Fcvtnu(w11, d11);
11759  __ Fcvtnu(w12, d12);
11760  __ Fcvtnu(w13, d13);
11761  __ Fcvtnu(w14, d14);
11762  __ Fcvtnu(w15, d15);
11763  __ Fcvtnu(x16, s16);
11764  __ Fcvtnu(x17, s17);
11765  __ Fcvtnu(x18, s18);
11766  __ Fcvtnu(x19, s19);
11767  __ Fcvtnu(x20, s20);
11768  __ Fcvtnu(x21, s21);
11769  __ Fcvtnu(x22, s22);
11770  __ Fcvtnu(x24, d24);
11771  __ Fcvtnu(x25, d25);
11772  __ Fcvtnu(x26, d26);
11773  __ Fcvtnu(x27, d27);
11774  __ Fcvtnu(x28, d28);
11775  __ Fcvtnu(x29, d29);
11776  __ Fcvtnu(w30, s30);
11777  END();
11778
11779  RUN();
11780
11781  ASSERT_EQUAL_64(1, x0);
11782  ASSERT_EQUAL_64(1, x1);
11783  ASSERT_EQUAL_64(2, x2);
11784  ASSERT_EQUAL_64(0, x3);
11785  ASSERT_EQUAL_64(0xffffffff, x4);
11786  ASSERT_EQUAL_64(0, x5);
11787  ASSERT_EQUAL_64(0xffffff00, x6);
11788  ASSERT_EQUAL_64(1, x8);
11789  ASSERT_EQUAL_64(1, x9);
11790  ASSERT_EQUAL_64(2, x10);
11791  ASSERT_EQUAL_64(0, x11);
11792  ASSERT_EQUAL_64(0xffffffff, x12);
11793  ASSERT_EQUAL_64(0, x13);
11794  ASSERT_EQUAL_64(0xfffffffe, x14);
11795  ASSERT_EQUAL_64(1, x16);
11796  ASSERT_EQUAL_64(1, x17);
11797  ASSERT_EQUAL_64(2, x18);
11798  ASSERT_EQUAL_64(0, x19);
11799  ASSERT_EQUAL_64(0xffffffffffffffff, x20);
11800  ASSERT_EQUAL_64(0, x21);
11801  ASSERT_EQUAL_64(0xffffff0000000000, x22);
11802  ASSERT_EQUAL_64(1, x24);
11803  ASSERT_EQUAL_64(2, x25);
11804  ASSERT_EQUAL_64(0, x26);
11805  ASSERT_EQUAL_64(0xffffffffffffffff, x27);
11806  ASSERT_EQUAL_64(0, x28);
11807  ASSERT_EQUAL_64(0xfffffffffffff800, x29);
11808  ASSERT_EQUAL_64(0xffffffff, x30);
11809
11810  TEARDOWN();
11811}
11812
11813
11814TEST(fcvtzs) {
11815  SETUP();
11816
11817  START();
11818  __ Fmov(s0, 1.0);
11819  __ Fmov(s1, 1.1);
11820  __ Fmov(s2, 1.5);
11821  __ Fmov(s3, -1.5);
11822  __ Fmov(s4, kFP32PositiveInfinity);
11823  __ Fmov(s5, kFP32NegativeInfinity);
11824  __ Fmov(s6, 0x7fffff80);                    // Largest float < INT32_MAX.
11825  __ Fneg(s7, s6);                            // Smallest float > INT32_MIN.
11826  __ Fmov(d8, 1.0);
11827  __ Fmov(d9, 1.1);
11828  __ Fmov(d10, 1.5);
11829  __ Fmov(d11, -1.5);
11830  __ Fmov(d12, kFP64PositiveInfinity);
11831  __ Fmov(d13, kFP64NegativeInfinity);
11832  __ Fmov(d14, kWMaxInt - 1);
11833  __ Fmov(d15, kWMinInt + 1);
11834  __ Fmov(s17, 1.1);
11835  __ Fmov(s18, 1.5);
11836  __ Fmov(s19, -1.5);
11837  __ Fmov(s20, kFP32PositiveInfinity);
11838  __ Fmov(s21, kFP32NegativeInfinity);
11839  __ Fmov(s22, 0x7fffff8000000000);           // Largest float < INT64_MAX.
11840  __ Fneg(s23, s22);                          // Smallest float > INT64_MIN.
11841  __ Fmov(d24, 1.1);
11842  __ Fmov(d25, 1.5);
11843  __ Fmov(d26, -1.5);
11844  __ Fmov(d27, kFP64PositiveInfinity);
11845  __ Fmov(d28, kFP64NegativeInfinity);
11846  __ Fmov(d29, 0x7ffffffffffffc00);           // Largest double < INT64_MAX.
11847  __ Fneg(d30, d29);                          // Smallest double > INT64_MIN.
11848
11849  __ Fcvtzs(w0, s0);
11850  __ Fcvtzs(w1, s1);
11851  __ Fcvtzs(w2, s2);
11852  __ Fcvtzs(w3, s3);
11853  __ Fcvtzs(w4, s4);
11854  __ Fcvtzs(w5, s5);
11855  __ Fcvtzs(w6, s6);
11856  __ Fcvtzs(w7, s7);
11857  __ Fcvtzs(w8, d8);
11858  __ Fcvtzs(w9, d9);
11859  __ Fcvtzs(w10, d10);
11860  __ Fcvtzs(w11, d11);
11861  __ Fcvtzs(w12, d12);
11862  __ Fcvtzs(w13, d13);
11863  __ Fcvtzs(w14, d14);
11864  __ Fcvtzs(w15, d15);
11865  __ Fcvtzs(x17, s17);
11866  __ Fcvtzs(x18, s18);
11867  __ Fcvtzs(x19, s19);
11868  __ Fcvtzs(x20, s20);
11869  __ Fcvtzs(x21, s21);
11870  __ Fcvtzs(x22, s22);
11871  __ Fcvtzs(x23, s23);
11872  __ Fcvtzs(x24, d24);
11873  __ Fcvtzs(x25, d25);
11874  __ Fcvtzs(x26, d26);
11875  __ Fcvtzs(x27, d27);
11876  __ Fcvtzs(x28, d28);
11877  __ Fcvtzs(x29, d29);
11878  __ Fcvtzs(x30, d30);
11879  END();
11880
11881  RUN();
11882
11883  ASSERT_EQUAL_64(1, x0);
11884  ASSERT_EQUAL_64(1, x1);
11885  ASSERT_EQUAL_64(1, x2);
11886  ASSERT_EQUAL_64(0xffffffff, x3);
11887  ASSERT_EQUAL_64(0x7fffffff, x4);
11888  ASSERT_EQUAL_64(0x80000000, x5);
11889  ASSERT_EQUAL_64(0x7fffff80, x6);
11890  ASSERT_EQUAL_64(0x80000080, x7);
11891  ASSERT_EQUAL_64(1, x8);
11892  ASSERT_EQUAL_64(1, x9);
11893  ASSERT_EQUAL_64(1, x10);
11894  ASSERT_EQUAL_64(0xffffffff, x11);
11895  ASSERT_EQUAL_64(0x7fffffff, x12);
11896  ASSERT_EQUAL_64(0x80000000, x13);
11897  ASSERT_EQUAL_64(0x7ffffffe, x14);
11898  ASSERT_EQUAL_64(0x80000001, x15);
11899  ASSERT_EQUAL_64(1, x17);
11900  ASSERT_EQUAL_64(1, x18);
11901  ASSERT_EQUAL_64(0xffffffffffffffff, x19);
11902  ASSERT_EQUAL_64(0x7fffffffffffffff, x20);
11903  ASSERT_EQUAL_64(0x8000000000000000, x21);
11904  ASSERT_EQUAL_64(0x7fffff8000000000, x22);
11905  ASSERT_EQUAL_64(0x8000008000000000, x23);
11906  ASSERT_EQUAL_64(1, x24);
11907  ASSERT_EQUAL_64(1, x25);
11908  ASSERT_EQUAL_64(0xffffffffffffffff, x26);
11909  ASSERT_EQUAL_64(0x7fffffffffffffff, x27);
11910  ASSERT_EQUAL_64(0x8000000000000000, x28);
11911  ASSERT_EQUAL_64(0x7ffffffffffffc00, x29);
11912  ASSERT_EQUAL_64(0x8000000000000400, x30);
11913
11914  TEARDOWN();
11915}
11916
11917TEST(fcvtzu) {
11918  SETUP();
11919
11920  START();
11921  __ Fmov(s0, 1.0);
11922  __ Fmov(s1, 1.1);
11923  __ Fmov(s2, 1.5);
11924  __ Fmov(s3, -1.5);
11925  __ Fmov(s4, kFP32PositiveInfinity);
11926  __ Fmov(s5, kFP32NegativeInfinity);
11927  __ Fmov(s6, 0x7fffff80);                    // Largest float < INT32_MAX.
11928  __ Fneg(s7, s6);                            // Smallest float > INT32_MIN.
11929  __ Fmov(d8, 1.0);
11930  __ Fmov(d9, 1.1);
11931  __ Fmov(d10, 1.5);
11932  __ Fmov(d11, -1.5);
11933  __ Fmov(d12, kFP64PositiveInfinity);
11934  __ Fmov(d13, kFP64NegativeInfinity);
11935  __ Fmov(d14, kWMaxInt - 1);
11936  __ Fmov(d15, kWMinInt + 1);
11937  __ Fmov(s17, 1.1);
11938  __ Fmov(s18, 1.5);
11939  __ Fmov(s19, -1.5);
11940  __ Fmov(s20, kFP32PositiveInfinity);
11941  __ Fmov(s21, kFP32NegativeInfinity);
11942  __ Fmov(s22, 0x7fffff8000000000);           // Largest float < INT64_MAX.
11943  __ Fneg(s23, s22);                          // Smallest float > INT64_MIN.
11944  __ Fmov(d24, 1.1);
11945  __ Fmov(d25, 1.5);
11946  __ Fmov(d26, -1.5);
11947  __ Fmov(d27, kFP64PositiveInfinity);
11948  __ Fmov(d28, kFP64NegativeInfinity);
11949  __ Fmov(d29, 0x7ffffffffffffc00);           // Largest double < INT64_MAX.
11950  __ Fneg(d30, d29);                          // Smallest double > INT64_MIN.
11951
11952  __ Fcvtzu(w0, s0);
11953  __ Fcvtzu(w1, s1);
11954  __ Fcvtzu(w2, s2);
11955  __ Fcvtzu(w3, s3);
11956  __ Fcvtzu(w4, s4);
11957  __ Fcvtzu(w5, s5);
11958  __ Fcvtzu(w6, s6);
11959  __ Fcvtzu(w7, s7);
11960  __ Fcvtzu(w8, d8);
11961  __ Fcvtzu(w9, d9);
11962  __ Fcvtzu(w10, d10);
11963  __ Fcvtzu(w11, d11);
11964  __ Fcvtzu(w12, d12);
11965  __ Fcvtzu(w13, d13);
11966  __ Fcvtzu(w14, d14);
11967  __ Fcvtzu(x17, s17);
11968  __ Fcvtzu(x18, s18);
11969  __ Fcvtzu(x19, s19);
11970  __ Fcvtzu(x20, s20);
11971  __ Fcvtzu(x21, s21);
11972  __ Fcvtzu(x22, s22);
11973  __ Fcvtzu(x23, s23);
11974  __ Fcvtzu(x24, d24);
11975  __ Fcvtzu(x25, d25);
11976  __ Fcvtzu(x26, d26);
11977  __ Fcvtzu(x27, d27);
11978  __ Fcvtzu(x28, d28);
11979  __ Fcvtzu(x29, d29);
11980  __ Fcvtzu(x30, d30);
11981  END();
11982
11983  RUN();
11984
11985  ASSERT_EQUAL_64(1, x0);
11986  ASSERT_EQUAL_64(1, x1);
11987  ASSERT_EQUAL_64(1, x2);
11988  ASSERT_EQUAL_64(0, x3);
11989  ASSERT_EQUAL_64(0xffffffff, x4);
11990  ASSERT_EQUAL_64(0, x5);
11991  ASSERT_EQUAL_64(0x7fffff80, x6);
11992  ASSERT_EQUAL_64(0, x7);
11993  ASSERT_EQUAL_64(1, x8);
11994  ASSERT_EQUAL_64(1, x9);
11995  ASSERT_EQUAL_64(1, x10);
11996  ASSERT_EQUAL_64(0, x11);
11997  ASSERT_EQUAL_64(0xffffffff, x12);
11998  ASSERT_EQUAL_64(0, x13);
11999  ASSERT_EQUAL_64(0x7ffffffe, x14);
12000  ASSERT_EQUAL_64(1, x17);
12001  ASSERT_EQUAL_64(1, x18);
12002  ASSERT_EQUAL_64(0, x19);
12003  ASSERT_EQUAL_64(0xffffffffffffffff, x20);
12004  ASSERT_EQUAL_64(0, x21);
12005  ASSERT_EQUAL_64(0x7fffff8000000000, x22);
12006  ASSERT_EQUAL_64(0, x23);
12007  ASSERT_EQUAL_64(1, x24);
12008  ASSERT_EQUAL_64(1, x25);
12009  ASSERT_EQUAL_64(0, x26);
12010  ASSERT_EQUAL_64(0xffffffffffffffff, x27);
12011  ASSERT_EQUAL_64(0, x28);
12012  ASSERT_EQUAL_64(0x7ffffffffffffc00, x29);
12013  ASSERT_EQUAL_64(0, x30);
12014
12015  TEARDOWN();
12016}
12017
12018
12019TEST(neon_fcvtl) {
12020  SETUP();
12021
12022  START();
12023
12024  __ Movi(v0.V2D(), 0x000080007efffeff, 0x3100b1007c00fc00);
12025  __ Movi(v1.V2D(), 0x03ff83ff00038003, 0x000180017c01fc01);
12026  __ Movi(v2.V2D(), 0x3e200000be200000, 0x7f800000ff800000);
12027  __ Movi(v3.V2D(), 0x0000000080000000, 0x7f8fffffff8fffff);
12028  __ Movi(v4.V2D(), 0x7fcfffffffcfffff, 0x0000000180000001);
12029  __ Fcvtl(v16.V4S(), v0.V4H());
12030  __ Fcvtl2(v17.V4S(), v0.V8H());
12031  __ Fcvtl(v18.V4S(), v1.V4H());
12032  __ Fcvtl2(v19.V4S(), v1.V8H());
12033
12034  __ Fcvtl(v20.V2D(), v2.V2S());
12035  __ Fcvtl2(v21.V2D(), v2.V4S());
12036  __ Fcvtl(v22.V2D(), v3.V2S());
12037  __ Fcvtl2(v23.V2D(), v3.V4S());
12038  __ Fcvtl(v24.V2D(), v4.V2S());
12039  __ Fcvtl2(v25.V2D(), v4.V4S());
12040
12041  END();
12042
12043  RUN();
12044  ASSERT_EQUAL_128(0x3e200000be200000, 0x7f800000ff800000, q16);
12045  ASSERT_EQUAL_128(0x0000000080000000, 0x7fdfe000ffdfe000, q17);
12046  ASSERT_EQUAL_128(0x33800000b3800000, 0x7fc02000ffc02000, q18);
12047  ASSERT_EQUAL_128(0x387fc000b87fc000, 0x34400000b4400000, q19);
12048  ASSERT_EQUAL_128(0x7ff0000000000000, 0xfff0000000000000, q20);
12049  ASSERT_EQUAL_128(0x3fc4000000000000, 0xbfc4000000000000, q21);
12050  ASSERT_EQUAL_128(0x7ff9ffffe0000000, 0xfff9ffffe0000000, q22);
12051  ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000000000, q23);
12052  ASSERT_EQUAL_128(0x36a0000000000000, 0xb6a0000000000000, q24);
12053  ASSERT_EQUAL_128(0x7ff9ffffe0000000, 0xfff9ffffe0000000, q25);
12054  TEARDOWN();
12055}
12056
12057
12058TEST(neon_fcvtn) {
12059  SETUP();
12060
12061  START();
12062
12063  __ Movi(v0.V2D(), 0x3e200000be200000, 0x7f800000ff800000);
12064  __ Movi(v1.V2D(), 0x0000000080000000, 0x7f8fffffff8fffff);
12065  __ Movi(v2.V2D(), 0x7fcfffffffcfffff, 0x0000000180000001);
12066  __ Movi(v3.V2D(), 0x3fc4000000000000, 0xbfc4000000000000);
12067  __ Movi(v4.V2D(), 0x7ff0000000000000, 0xfff0000000000000);
12068  __ Movi(v5.V2D(), 0x0000000000000000, 0x8000000000000000);
12069  __ Movi(v6.V2D(), 0x7ff0ffffffffffff, 0xfff0ffffffffffff);
12070  __ Movi(v7.V2D(), 0x7ff8ffffffffffff, 0xfff8ffffffffffff);
12071  __ Movi(v8.V2D(), 0x0000000000000001, 0x8000000000000001);
12072
12073  __ Fcvtn(v16.V4H(), v0.V4S());
12074  __ Fcvtn2(v16.V8H(), v1.V4S());
12075  __ Fcvtn(v17.V4H(), v2.V4S());
12076  __ Fcvtn(v18.V2S(), v3.V2D());
12077  __ Fcvtn2(v18.V4S(), v4.V2D());
12078  __ Fcvtn(v19.V2S(), v5.V2D());
12079  __ Fcvtn2(v19.V4S(), v6.V2D());
12080  __ Fcvtn(v20.V2S(), v7.V2D());
12081  __ Fcvtn2(v20.V4S(), v8.V2D());
12082  END();
12083
12084  RUN();
12085  ASSERT_EQUAL_128(0x000080007e7ffe7f, 0x3100b1007c00fc00, q16);
12086  ASSERT_EQUAL_128(0, 0x7e7ffe7f00008000, q17);
12087  ASSERT_EQUAL_128(0x7f800000ff800000, 0x3e200000be200000, q18);
12088  ASSERT_EQUAL_128(0x7fc7ffffffc7ffff, 0x0000000080000000, q19);
12089  ASSERT_EQUAL_128(0x0000000080000000, 0x7fc7ffffffc7ffff, q20);
12090  TEARDOWN();
12091}
12092
12093
12094TEST(neon_fcvtxn) {
12095  SETUP();
12096
12097  START();
12098  __ Movi(v0.V2D(), 0x3e200000be200000, 0x7f800000ff800000);
12099  __ Movi(v1.V2D(), 0x0000000080000000, 0x7f8fffffff8fffff);
12100  __ Movi(v2.V2D(), 0x7fcfffffffcfffff, 0x0000000180000001);
12101  __ Movi(v3.V2D(), 0x3fc4000000000000, 0xbfc4000000000000);
12102  __ Movi(v4.V2D(), 0x7ff0000000000000, 0xfff0000000000000);
12103  __ Movi(v5.V2D(), 0x0000000000000000, 0x8000000000000000);
12104  __ Movi(v6.V2D(), 0x7ff0ffffffffffff, 0xfff0ffffffffffff);
12105  __ Movi(v7.V2D(), 0x7ff8ffffffffffff, 0xfff8ffffffffffff);
12106  __ Movi(v8.V2D(), 0x0000000000000001, 0x8000000000000001);
12107  __ Movi(v9.V2D(), 0x41ed000000000000, 0x41efffffffefffff);
12108  __ Fcvtxn(v16.V2S(), v0.V2D());
12109  __ Fcvtxn2(v16.V4S(), v1.V2D());
12110  __ Fcvtxn(v17.V2S(), v2.V2D());
12111  __ Fcvtxn2(v17.V4S(), v3.V2D());
12112  __ Fcvtxn(v18.V2S(), v4.V2D());
12113  __ Fcvtxn2(v18.V4S(), v5.V2D());
12114  __ Fcvtxn(v19.V2S(), v6.V2D());
12115  __ Fcvtxn2(v19.V4S(), v7.V2D());
12116  __ Fcvtxn(v20.V2S(), v8.V2D());
12117  __ Fcvtxn2(v20.V4S(), v9.V2D());
12118  __ Fcvtxn(s21, d0);
12119  END();
12120
12121  RUN();
12122  ASSERT_EQUAL_128(0x000000017f7fffff, 0x310000057f7fffff, q16);
12123  ASSERT_EQUAL_128(0x3e200000be200000, 0x7f7fffff00000001, q17);
12124  ASSERT_EQUAL_128(0x0000000080000000, 0x7f800000ff800000, q18);
12125  ASSERT_EQUAL_128(0x7fc7ffffffc7ffff, 0x7fc7ffffffc7ffff, q19);
12126  ASSERT_EQUAL_128(0x4f6800004f7fffff, 0x0000000180000001, q20);
12127  ASSERT_EQUAL_128(0, 0x7f7fffff, q21);
12128  TEARDOWN();
12129}
12130
12131
12132// Test that scvtf and ucvtf can convert the 64-bit input into the expected
12133// value. All possible values of 'fbits' are tested. The expected value is
12134// modified accordingly in each case.
12135//
12136// The expected value is specified as the bit encoding of the expected double
12137// produced by scvtf (expected_scvtf_bits) as well as ucvtf
12138// (expected_ucvtf_bits).
12139//
12140// Where the input value is representable by int32_t or uint32_t, conversions
12141// from W registers will also be tested.
12142static void TestUScvtfHelper(uint64_t in,
12143                             uint64_t expected_scvtf_bits,
12144                             uint64_t expected_ucvtf_bits) {
12145  uint64_t u64 = in;
12146  uint32_t u32 = u64 & 0xffffffff;
12147  int64_t s64 = static_cast<int64_t>(in);
12148  int32_t s32 = s64 & 0x7fffffff;
12149
12150  bool cvtf_s32 = (s64 == s32);
12151  bool cvtf_u32 = (u64 == u32);
12152
12153  double results_scvtf_x[65];
12154  double results_ucvtf_x[65];
12155  double results_scvtf_w[33];
12156  double results_ucvtf_w[33];
12157
12158  SETUP();
12159  START();
12160
12161  __ Mov(x0, reinterpret_cast<uintptr_t>(results_scvtf_x));
12162  __ Mov(x1, reinterpret_cast<uintptr_t>(results_ucvtf_x));
12163  __ Mov(x2, reinterpret_cast<uintptr_t>(results_scvtf_w));
12164  __ Mov(x3, reinterpret_cast<uintptr_t>(results_ucvtf_w));
12165
12166  __ Mov(x10, s64);
12167
12168  // Corrupt the top word, in case it is accidentally used during W-register
12169  // conversions.
12170  __ Mov(x11, 0x5555555555555555);
12171  __ Bfi(x11, x10, 0, kWRegSize);
12172
12173  // Test integer conversions.
12174  __ Scvtf(d0, x10);
12175  __ Ucvtf(d1, x10);
12176  __ Scvtf(d2, w11);
12177  __ Ucvtf(d3, w11);
12178  __ Str(d0, MemOperand(x0));
12179  __ Str(d1, MemOperand(x1));
12180  __ Str(d2, MemOperand(x2));
12181  __ Str(d3, MemOperand(x3));
12182
12183  // Test all possible values of fbits.
12184  for (int fbits = 1; fbits <= 32; fbits++) {
12185    __ Scvtf(d0, x10, fbits);
12186    __ Ucvtf(d1, x10, fbits);
12187    __ Scvtf(d2, w11, fbits);
12188    __ Ucvtf(d3, w11, fbits);
12189    __ Str(d0, MemOperand(x0, fbits * kDRegSizeInBytes));
12190    __ Str(d1, MemOperand(x1, fbits * kDRegSizeInBytes));
12191    __ Str(d2, MemOperand(x2, fbits * kDRegSizeInBytes));
12192    __ Str(d3, MemOperand(x3, fbits * kDRegSizeInBytes));
12193  }
12194
12195  // Conversions from W registers can only handle fbits values <= 32, so just
12196  // test conversions from X registers for 32 < fbits <= 64.
12197  for (int fbits = 33; fbits <= 64; fbits++) {
12198    __ Scvtf(d0, x10, fbits);
12199    __ Ucvtf(d1, x10, fbits);
12200    __ Str(d0, MemOperand(x0, fbits * kDRegSizeInBytes));
12201    __ Str(d1, MemOperand(x1, fbits * kDRegSizeInBytes));
12202  }
12203
12204  END();
12205  RUN();
12206
12207  // Check the results.
12208  double expected_scvtf_base = rawbits_to_double(expected_scvtf_bits);
12209  double expected_ucvtf_base = rawbits_to_double(expected_ucvtf_bits);
12210
12211  for (int fbits = 0; fbits <= 32; fbits++) {
12212    double expected_scvtf = expected_scvtf_base / std::pow(2, fbits);
12213    double expected_ucvtf = expected_ucvtf_base / std::pow(2, fbits);
12214    ASSERT_EQUAL_FP64(expected_scvtf, results_scvtf_x[fbits]);
12215    ASSERT_EQUAL_FP64(expected_ucvtf, results_ucvtf_x[fbits]);
12216    if (cvtf_s32) ASSERT_EQUAL_FP64(expected_scvtf, results_scvtf_w[fbits]);
12217    if (cvtf_u32) ASSERT_EQUAL_FP64(expected_ucvtf, results_ucvtf_w[fbits]);
12218  }
12219  for (int fbits = 33; fbits <= 64; fbits++) {
12220    double expected_scvtf = expected_scvtf_base / std::pow(2, fbits);
12221    double expected_ucvtf = expected_ucvtf_base / std::pow(2, fbits);
12222    ASSERT_EQUAL_FP64(expected_scvtf, results_scvtf_x[fbits]);
12223    ASSERT_EQUAL_FP64(expected_ucvtf, results_ucvtf_x[fbits]);
12224  }
12225
12226  TEARDOWN();
12227}
12228
12229
12230TEST(scvtf_ucvtf_double) {
12231  // Simple conversions of positive numbers which require no rounding; the
12232  // results should not depened on the rounding mode, and ucvtf and scvtf should
12233  // produce the same result.
12234  TestUScvtfHelper(0x0000000000000000, 0x0000000000000000, 0x0000000000000000);
12235  TestUScvtfHelper(0x0000000000000001, 0x3ff0000000000000, 0x3ff0000000000000);
12236  TestUScvtfHelper(0x0000000040000000, 0x41d0000000000000, 0x41d0000000000000);
12237  TestUScvtfHelper(0x0000000100000000, 0x41f0000000000000, 0x41f0000000000000);
12238  TestUScvtfHelper(0x4000000000000000, 0x43d0000000000000, 0x43d0000000000000);
12239  // Test mantissa extremities.
12240  TestUScvtfHelper(0x4000000000000400, 0x43d0000000000001, 0x43d0000000000001);
12241  // The largest int32_t that fits in a double.
12242  TestUScvtfHelper(0x000000007fffffff, 0x41dfffffffc00000, 0x41dfffffffc00000);
12243  // Values that would be negative if treated as an int32_t.
12244  TestUScvtfHelper(0x00000000ffffffff, 0x41efffffffe00000, 0x41efffffffe00000);
12245  TestUScvtfHelper(0x0000000080000000, 0x41e0000000000000, 0x41e0000000000000);
12246  TestUScvtfHelper(0x0000000080000001, 0x41e0000000200000, 0x41e0000000200000);
12247  // The largest int64_t that fits in a double.
12248  TestUScvtfHelper(0x7ffffffffffffc00, 0x43dfffffffffffff, 0x43dfffffffffffff);
12249  // Check for bit pattern reproduction.
12250  TestUScvtfHelper(0x0123456789abcde0, 0x43723456789abcde, 0x43723456789abcde);
12251  TestUScvtfHelper(0x0000000012345678, 0x41b2345678000000, 0x41b2345678000000);
12252
12253  // Simple conversions of negative int64_t values. These require no rounding,
12254  // and the results should not depend on the rounding mode.
12255  TestUScvtfHelper(0xffffffffc0000000, 0xc1d0000000000000, 0x43effffffff80000);
12256  TestUScvtfHelper(0xffffffff00000000, 0xc1f0000000000000, 0x43efffffffe00000);
12257  TestUScvtfHelper(0xc000000000000000, 0xc3d0000000000000, 0x43e8000000000000);
12258
12259  // Conversions which require rounding.
12260  TestUScvtfHelper(0x1000000000000000, 0x43b0000000000000, 0x43b0000000000000);
12261  TestUScvtfHelper(0x1000000000000001, 0x43b0000000000000, 0x43b0000000000000);
12262  TestUScvtfHelper(0x1000000000000080, 0x43b0000000000000, 0x43b0000000000000);
12263  TestUScvtfHelper(0x1000000000000081, 0x43b0000000000001, 0x43b0000000000001);
12264  TestUScvtfHelper(0x1000000000000100, 0x43b0000000000001, 0x43b0000000000001);
12265  TestUScvtfHelper(0x1000000000000101, 0x43b0000000000001, 0x43b0000000000001);
12266  TestUScvtfHelper(0x1000000000000180, 0x43b0000000000002, 0x43b0000000000002);
12267  TestUScvtfHelper(0x1000000000000181, 0x43b0000000000002, 0x43b0000000000002);
12268  TestUScvtfHelper(0x1000000000000200, 0x43b0000000000002, 0x43b0000000000002);
12269  TestUScvtfHelper(0x1000000000000201, 0x43b0000000000002, 0x43b0000000000002);
12270  TestUScvtfHelper(0x1000000000000280, 0x43b0000000000002, 0x43b0000000000002);
12271  TestUScvtfHelper(0x1000000000000281, 0x43b0000000000003, 0x43b0000000000003);
12272  TestUScvtfHelper(0x1000000000000300, 0x43b0000000000003, 0x43b0000000000003);
12273  // Check rounding of negative int64_t values (and large uint64_t values).
12274  TestUScvtfHelper(0x8000000000000000, 0xc3e0000000000000, 0x43e0000000000000);
12275  TestUScvtfHelper(0x8000000000000001, 0xc3e0000000000000, 0x43e0000000000000);
12276  TestUScvtfHelper(0x8000000000000200, 0xc3e0000000000000, 0x43e0000000000000);
12277  TestUScvtfHelper(0x8000000000000201, 0xc3dfffffffffffff, 0x43e0000000000000);
12278  TestUScvtfHelper(0x8000000000000400, 0xc3dfffffffffffff, 0x43e0000000000000);
12279  TestUScvtfHelper(0x8000000000000401, 0xc3dfffffffffffff, 0x43e0000000000001);
12280  TestUScvtfHelper(0x8000000000000600, 0xc3dffffffffffffe, 0x43e0000000000001);
12281  TestUScvtfHelper(0x8000000000000601, 0xc3dffffffffffffe, 0x43e0000000000001);
12282  TestUScvtfHelper(0x8000000000000800, 0xc3dffffffffffffe, 0x43e0000000000001);
12283  TestUScvtfHelper(0x8000000000000801, 0xc3dffffffffffffe, 0x43e0000000000001);
12284  TestUScvtfHelper(0x8000000000000a00, 0xc3dffffffffffffe, 0x43e0000000000001);
12285  TestUScvtfHelper(0x8000000000000a01, 0xc3dffffffffffffd, 0x43e0000000000001);
12286  TestUScvtfHelper(0x8000000000000c00, 0xc3dffffffffffffd, 0x43e0000000000002);
12287  // Round up to produce a result that's too big for the input to represent.
12288  TestUScvtfHelper(0x7ffffffffffffe00, 0x43e0000000000000, 0x43e0000000000000);
12289  TestUScvtfHelper(0x7fffffffffffffff, 0x43e0000000000000, 0x43e0000000000000);
12290  TestUScvtfHelper(0xfffffffffffffc00, 0xc090000000000000, 0x43f0000000000000);
12291  TestUScvtfHelper(0xffffffffffffffff, 0xbff0000000000000, 0x43f0000000000000);
12292}
12293
12294
12295// The same as TestUScvtfHelper, but convert to floats.
12296static void TestUScvtf32Helper(uint64_t in,
12297                               uint32_t expected_scvtf_bits,
12298                               uint32_t expected_ucvtf_bits) {
12299  uint64_t u64 = in;
12300  uint32_t u32 = u64 & 0xffffffff;
12301  int64_t s64 = static_cast<int64_t>(in);
12302  int32_t s32 = s64 & 0x7fffffff;
12303
12304  bool cvtf_s32 = (s64 == s32);
12305  bool cvtf_u32 = (u64 == u32);
12306
12307  float results_scvtf_x[65];
12308  float results_ucvtf_x[65];
12309  float results_scvtf_w[33];
12310  float results_ucvtf_w[33];
12311
12312  SETUP();
12313  START();
12314
12315  __ Mov(x0, reinterpret_cast<uintptr_t>(results_scvtf_x));
12316  __ Mov(x1, reinterpret_cast<uintptr_t>(results_ucvtf_x));
12317  __ Mov(x2, reinterpret_cast<uintptr_t>(results_scvtf_w));
12318  __ Mov(x3, reinterpret_cast<uintptr_t>(results_ucvtf_w));
12319
12320  __ Mov(x10, s64);
12321
12322  // Corrupt the top word, in case it is accidentally used during W-register
12323  // conversions.
12324  __ Mov(x11, 0x5555555555555555);
12325  __ Bfi(x11, x10, 0, kWRegSize);
12326
12327  // Test integer conversions.
12328  __ Scvtf(s0, x10);
12329  __ Ucvtf(s1, x10);
12330  __ Scvtf(s2, w11);
12331  __ Ucvtf(s3, w11);
12332  __ Str(s0, MemOperand(x0));
12333  __ Str(s1, MemOperand(x1));
12334  __ Str(s2, MemOperand(x2));
12335  __ Str(s3, MemOperand(x3));
12336
12337  // Test all possible values of fbits.
12338  for (int fbits = 1; fbits <= 32; fbits++) {
12339    __ Scvtf(s0, x10, fbits);
12340    __ Ucvtf(s1, x10, fbits);
12341    __ Scvtf(s2, w11, fbits);
12342    __ Ucvtf(s3, w11, fbits);
12343    __ Str(s0, MemOperand(x0, fbits * kSRegSizeInBytes));
12344    __ Str(s1, MemOperand(x1, fbits * kSRegSizeInBytes));
12345    __ Str(s2, MemOperand(x2, fbits * kSRegSizeInBytes));
12346    __ Str(s3, MemOperand(x3, fbits * kSRegSizeInBytes));
12347  }
12348
12349  // Conversions from W registers can only handle fbits values <= 32, so just
12350  // test conversions from X registers for 32 < fbits <= 64.
12351  for (int fbits = 33; fbits <= 64; fbits++) {
12352    __ Scvtf(s0, x10, fbits);
12353    __ Ucvtf(s1, x10, fbits);
12354    __ Str(s0, MemOperand(x0, fbits * kSRegSizeInBytes));
12355    __ Str(s1, MemOperand(x1, fbits * kSRegSizeInBytes));
12356  }
12357
12358  END();
12359  RUN();
12360
12361  // Check the results.
12362  float expected_scvtf_base = rawbits_to_float(expected_scvtf_bits);
12363  float expected_ucvtf_base = rawbits_to_float(expected_ucvtf_bits);
12364
12365  for (int fbits = 0; fbits <= 32; fbits++) {
12366    float expected_scvtf = expected_scvtf_base / std::pow(2.0f, fbits);
12367    float expected_ucvtf = expected_ucvtf_base / std::pow(2.0f, fbits);
12368    ASSERT_EQUAL_FP32(expected_scvtf, results_scvtf_x[fbits]);
12369    ASSERT_EQUAL_FP32(expected_ucvtf, results_ucvtf_x[fbits]);
12370    if (cvtf_s32) ASSERT_EQUAL_FP32(expected_scvtf, results_scvtf_w[fbits]);
12371    if (cvtf_u32) ASSERT_EQUAL_FP32(expected_ucvtf, results_ucvtf_w[fbits]);
12372  }
12373  for (int fbits = 33; fbits <= 64; fbits++) {
12374    float expected_scvtf = expected_scvtf_base / std::pow(2.0f, fbits);
12375    float expected_ucvtf = expected_ucvtf_base / std::pow(2.0f, fbits);
12376    ASSERT_EQUAL_FP32(expected_scvtf, results_scvtf_x[fbits]);
12377    ASSERT_EQUAL_FP32(expected_ucvtf, results_ucvtf_x[fbits]);
12378  }
12379
12380  TEARDOWN();
12381}
12382
12383
12384TEST(scvtf_ucvtf_float) {
12385  // Simple conversions of positive numbers which require no rounding; the
12386  // results should not depened on the rounding mode, and ucvtf and scvtf should
12387  // produce the same result.
12388  TestUScvtf32Helper(0x0000000000000000, 0x00000000, 0x00000000);
12389  TestUScvtf32Helper(0x0000000000000001, 0x3f800000, 0x3f800000);
12390  TestUScvtf32Helper(0x0000000040000000, 0x4e800000, 0x4e800000);
12391  TestUScvtf32Helper(0x0000000100000000, 0x4f800000, 0x4f800000);
12392  TestUScvtf32Helper(0x4000000000000000, 0x5e800000, 0x5e800000);
12393  // Test mantissa extremities.
12394  TestUScvtf32Helper(0x0000000000800001, 0x4b000001, 0x4b000001);
12395  TestUScvtf32Helper(0x4000008000000000, 0x5e800001, 0x5e800001);
12396  // The largest int32_t that fits in a float.
12397  TestUScvtf32Helper(0x000000007fffff80, 0x4effffff, 0x4effffff);
12398  // Values that would be negative if treated as an int32_t.
12399  TestUScvtf32Helper(0x00000000ffffff00, 0x4f7fffff, 0x4f7fffff);
12400  TestUScvtf32Helper(0x0000000080000000, 0x4f000000, 0x4f000000);
12401  TestUScvtf32Helper(0x0000000080000100, 0x4f000001, 0x4f000001);
12402  // The largest int64_t that fits in a float.
12403  TestUScvtf32Helper(0x7fffff8000000000, 0x5effffff, 0x5effffff);
12404  // Check for bit pattern reproduction.
12405  TestUScvtf32Helper(0x0000000000876543, 0x4b076543, 0x4b076543);
12406
12407  // Simple conversions of negative int64_t values. These require no rounding,
12408  // and the results should not depend on the rounding mode.
12409  TestUScvtf32Helper(0xfffffc0000000000, 0xd4800000, 0x5f7ffffc);
12410  TestUScvtf32Helper(0xc000000000000000, 0xde800000, 0x5f400000);
12411
12412  // Conversions which require rounding.
12413  TestUScvtf32Helper(0x0000800000000000, 0x57000000, 0x57000000);
12414  TestUScvtf32Helper(0x0000800000000001, 0x57000000, 0x57000000);
12415  TestUScvtf32Helper(0x0000800000800000, 0x57000000, 0x57000000);
12416  TestUScvtf32Helper(0x0000800000800001, 0x57000001, 0x57000001);
12417  TestUScvtf32Helper(0x0000800001000000, 0x57000001, 0x57000001);
12418  TestUScvtf32Helper(0x0000800001000001, 0x57000001, 0x57000001);
12419  TestUScvtf32Helper(0x0000800001800000, 0x57000002, 0x57000002);
12420  TestUScvtf32Helper(0x0000800001800001, 0x57000002, 0x57000002);
12421  TestUScvtf32Helper(0x0000800002000000, 0x57000002, 0x57000002);
12422  TestUScvtf32Helper(0x0000800002000001, 0x57000002, 0x57000002);
12423  TestUScvtf32Helper(0x0000800002800000, 0x57000002, 0x57000002);
12424  TestUScvtf32Helper(0x0000800002800001, 0x57000003, 0x57000003);
12425  TestUScvtf32Helper(0x0000800003000000, 0x57000003, 0x57000003);
12426  // Check rounding of negative int64_t values (and large uint64_t values).
12427  TestUScvtf32Helper(0x8000000000000000, 0xdf000000, 0x5f000000);
12428  TestUScvtf32Helper(0x8000000000000001, 0xdf000000, 0x5f000000);
12429  TestUScvtf32Helper(0x8000004000000000, 0xdf000000, 0x5f000000);
12430  TestUScvtf32Helper(0x8000004000000001, 0xdeffffff, 0x5f000000);
12431  TestUScvtf32Helper(0x8000008000000000, 0xdeffffff, 0x5f000000);
12432  TestUScvtf32Helper(0x8000008000000001, 0xdeffffff, 0x5f000001);
12433  TestUScvtf32Helper(0x800000c000000000, 0xdefffffe, 0x5f000001);
12434  TestUScvtf32Helper(0x800000c000000001, 0xdefffffe, 0x5f000001);
12435  TestUScvtf32Helper(0x8000010000000000, 0xdefffffe, 0x5f000001);
12436  TestUScvtf32Helper(0x8000010000000001, 0xdefffffe, 0x5f000001);
12437  TestUScvtf32Helper(0x8000014000000000, 0xdefffffe, 0x5f000001);
12438  TestUScvtf32Helper(0x8000014000000001, 0xdefffffd, 0x5f000001);
12439  TestUScvtf32Helper(0x8000018000000000, 0xdefffffd, 0x5f000002);
12440  // Round up to produce a result that's too big for the input to represent.
12441  TestUScvtf32Helper(0x000000007fffffc0, 0x4f000000, 0x4f000000);
12442  TestUScvtf32Helper(0x000000007fffffff, 0x4f000000, 0x4f000000);
12443  TestUScvtf32Helper(0x00000000ffffff80, 0x4f800000, 0x4f800000);
12444  TestUScvtf32Helper(0x00000000ffffffff, 0x4f800000, 0x4f800000);
12445  TestUScvtf32Helper(0x7fffffc000000000, 0x5f000000, 0x5f000000);
12446  TestUScvtf32Helper(0x7fffffffffffffff, 0x5f000000, 0x5f000000);
12447  TestUScvtf32Helper(0xffffff8000000000, 0xd3000000, 0x5f800000);
12448  TestUScvtf32Helper(0xffffffffffffffff, 0xbf800000, 0x5f800000);
12449}
12450
12451
12452TEST(system_mrs) {
12453  SETUP();
12454
12455  START();
12456  __ Mov(w0, 0);
12457  __ Mov(w1, 1);
12458  __ Mov(w2, 0x80000000);
12459
12460  // Set the Z and C flags.
12461  __ Cmp(w0, w0);
12462  __ Mrs(x3, NZCV);
12463
12464  // Set the N flag.
12465  __ Cmp(w0, w1);
12466  __ Mrs(x4, NZCV);
12467
12468  // Set the Z, C and V flags.
12469  __ Adds(w0, w2, w2);
12470  __ Mrs(x5, NZCV);
12471
12472  // Read the default FPCR.
12473  __ Mrs(x6, FPCR);
12474  END();
12475
12476  RUN();
12477
12478  // NZCV
12479  ASSERT_EQUAL_32(ZCFlag, w3);
12480  ASSERT_EQUAL_32(NFlag, w4);
12481  ASSERT_EQUAL_32(ZCVFlag, w5);
12482
12483  // FPCR
12484  // The default FPCR on Linux-based platforms is 0.
12485  ASSERT_EQUAL_32(0, w6);
12486
12487  TEARDOWN();
12488}
12489
12490
12491TEST(system_msr) {
12492  // All FPCR fields that must be implemented: AHP, DN, FZ, RMode
12493  const uint64_t fpcr_core = 0x07c00000;
12494
12495  // All FPCR fields (including fields which may be read-as-zero):
12496  //  Stride, Len
12497  //  IDE, IXE, UFE, OFE, DZE, IOE
12498  const uint64_t fpcr_all = fpcr_core | 0x00379f00;
12499
12500  SETUP();
12501
12502  START();
12503  __ Mov(w0, 0);
12504  __ Mov(w1, 0x7fffffff);
12505
12506  __ Mov(x7, 0);
12507
12508  __ Mov(x10, NVFlag);
12509  __ Cmp(w0, w0);     // Set Z and C.
12510  __ Msr(NZCV, x10);  // Set N and V.
12511  // The Msr should have overwritten every flag set by the Cmp.
12512  __ Cinc(x7, x7, mi);  // N
12513  __ Cinc(x7, x7, ne);  // !Z
12514  __ Cinc(x7, x7, lo);  // !C
12515  __ Cinc(x7, x7, vs);  // V
12516
12517  __ Mov(x10, ZCFlag);
12518  __ Cmn(w1, w1);     // Set N and V.
12519  __ Msr(NZCV, x10);  // Set Z and C.
12520  // The Msr should have overwritten every flag set by the Cmn.
12521  __ Cinc(x7, x7, pl);  // !N
12522  __ Cinc(x7, x7, eq);  // Z
12523  __ Cinc(x7, x7, hs);  // C
12524  __ Cinc(x7, x7, vc);  // !V
12525
12526  // All core FPCR fields must be writable.
12527  __ Mov(x8, fpcr_core);
12528  __ Msr(FPCR, x8);
12529  __ Mrs(x8, FPCR);
12530
12531  // All FPCR fields, including optional ones. This part of the test doesn't
12532  // achieve much other than ensuring that supported fields can be cleared by
12533  // the next test.
12534  __ Mov(x9, fpcr_all);
12535  __ Msr(FPCR, x9);
12536  __ Mrs(x9, FPCR);
12537  __ And(x9, x9, fpcr_core);
12538
12539  // The undefined bits must ignore writes.
12540  // It's conceivable that a future version of the architecture could use these
12541  // fields (making this test fail), but in the meantime this is a useful test
12542  // for the simulator.
12543  __ Mov(x10, ~fpcr_all);
12544  __ Msr(FPCR, x10);
12545  __ Mrs(x10, FPCR);
12546
12547  END();
12548
12549  RUN();
12550
12551  // We should have incremented x7 (from 0) exactly 8 times.
12552  ASSERT_EQUAL_64(8, x7);
12553
12554  ASSERT_EQUAL_64(fpcr_core, x8);
12555  ASSERT_EQUAL_64(fpcr_core, x9);
12556  ASSERT_EQUAL_64(0, x10);
12557
12558  TEARDOWN();
12559}
12560
12561
12562TEST(system_nop) {
12563  SETUP();
12564  RegisterDump before;
12565
12566  START();
12567  before.Dump(&masm);
12568  __ Nop();
12569  END();
12570
12571  RUN();
12572
12573  ASSERT_EQUAL_REGISTERS(before);
12574  ASSERT_EQUAL_NZCV(before.flags_nzcv());
12575
12576  TEARDOWN();
12577}
12578
12579
12580TEST(zero_dest) {
12581  SETUP();
12582  ALLOW_ASM();
12583  RegisterDump before;
12584
12585  START();
12586  // Preserve the stack pointer, in case we clobber it.
12587  __ Mov(x30, sp);
12588  // Initialize the other registers used in this test.
12589  uint64_t literal_base = 0x0100001000100101;
12590  __ Mov(x0, 0);
12591  __ Mov(x1, literal_base);
12592  for (unsigned i = 2; i < x30.code(); i++) {
12593    __ Add(Register::XRegFromCode(i), Register::XRegFromCode(i-1), x1);
12594  }
12595  before.Dump(&masm);
12596
12597  // All of these instructions should be NOPs in these forms, but have
12598  // alternate forms which can write into the stack pointer.
12599  __ add(xzr, x0, x1);
12600  __ add(xzr, x1, xzr);
12601  __ add(xzr, xzr, x1);
12602
12603  __ and_(xzr, x0, x2);
12604  __ and_(xzr, x2, xzr);
12605  __ and_(xzr, xzr, x2);
12606
12607  __ bic(xzr, x0, x3);
12608  __ bic(xzr, x3, xzr);
12609  __ bic(xzr, xzr, x3);
12610
12611  __ eon(xzr, x0, x4);
12612  __ eon(xzr, x4, xzr);
12613  __ eon(xzr, xzr, x4);
12614
12615  __ eor(xzr, x0, x5);
12616  __ eor(xzr, x5, xzr);
12617  __ eor(xzr, xzr, x5);
12618
12619  __ orr(xzr, x0, x6);
12620  __ orr(xzr, x6, xzr);
12621  __ orr(xzr, xzr, x6);
12622
12623  __ sub(xzr, x0, x7);
12624  __ sub(xzr, x7, xzr);
12625  __ sub(xzr, xzr, x7);
12626
12627  // Swap the saved stack pointer with the real one. If sp was written
12628  // during the test, it will show up in x30. This is done because the test
12629  // framework assumes that sp will be valid at the end of the test.
12630  __ Mov(x29, x30);
12631  __ Mov(x30, sp);
12632  __ Mov(sp, x29);
12633  // We used x29 as a scratch register, so reset it to make sure it doesn't
12634  // trigger a test failure.
12635  __ Add(x29, x28, x1);
12636  END();
12637
12638  RUN();
12639
12640  ASSERT_EQUAL_REGISTERS(before);
12641  ASSERT_EQUAL_NZCV(before.flags_nzcv());
12642
12643  TEARDOWN();
12644}
12645
12646
12647TEST(zero_dest_setflags) {
12648  SETUP();
12649  ALLOW_ASM();
12650  RegisterDump before;
12651
12652  START();
12653  // Preserve the stack pointer, in case we clobber it.
12654  __ Mov(x30, sp);
12655  // Initialize the other registers used in this test.
12656  uint64_t literal_base = 0x0100001000100101;
12657  __ Mov(x0, 0);
12658  __ Mov(x1, literal_base);
12659  for (int i = 2; i < 30; i++) {
12660    __ Add(Register::XRegFromCode(i), Register::XRegFromCode(i-1), x1);
12661  }
12662  before.Dump(&masm);
12663
12664  // All of these instructions should only write to the flags in these forms,
12665  // but have alternate forms which can write into the stack pointer.
12666  __ adds(xzr, x0, Operand(x1, UXTX));
12667  __ adds(xzr, x1, Operand(xzr, UXTX));
12668  __ adds(xzr, x1, 1234);
12669  __ adds(xzr, x0, x1);
12670  __ adds(xzr, x1, xzr);
12671  __ adds(xzr, xzr, x1);
12672
12673  __ ands(xzr, x2, ~0xf);
12674  __ ands(xzr, xzr, ~0xf);
12675  __ ands(xzr, x0, x2);
12676  __ ands(xzr, x2, xzr);
12677  __ ands(xzr, xzr, x2);
12678
12679  __ bics(xzr, x3, ~0xf);
12680  __ bics(xzr, xzr, ~0xf);
12681  __ bics(xzr, x0, x3);
12682  __ bics(xzr, x3, xzr);
12683  __ bics(xzr, xzr, x3);
12684
12685  __ subs(xzr, x0, Operand(x3, UXTX));
12686  __ subs(xzr, x3, Operand(xzr, UXTX));
12687  __ subs(xzr, x3, 1234);
12688  __ subs(xzr, x0, x3);
12689  __ subs(xzr, x3, xzr);
12690  __ subs(xzr, xzr, x3);
12691
12692  // Swap the saved stack pointer with the real one. If sp was written
12693  // during the test, it will show up in x30. This is done because the test
12694  // framework assumes that sp will be valid at the end of the test.
12695  __ Mov(x29, x30);
12696  __ Mov(x30, sp);
12697  __ Mov(sp, x29);
12698  // We used x29 as a scratch register, so reset it to make sure it doesn't
12699  // trigger a test failure.
12700  __ Add(x29, x28, x1);
12701  END();
12702
12703  RUN();
12704
12705  ASSERT_EQUAL_REGISTERS(before);
12706
12707  TEARDOWN();
12708}
12709
12710
12711TEST(register_bit) {
12712  // No code generation takes place in this test, so no need to setup and
12713  // teardown.
12714
12715  // Simple tests.
12716  assert(x0.Bit() == (UINT64_C(1) << 0));
12717  assert(x1.Bit() == (UINT64_C(1) << 1));
12718  assert(x10.Bit() == (UINT64_C(1) << 10));
12719
12720  // AAPCS64 definitions.
12721  assert(lr.Bit() == (UINT64_C(1) << kLinkRegCode));
12722
12723  // Fixed (hardware) definitions.
12724  assert(xzr.Bit() == (UINT64_C(1) << kZeroRegCode));
12725
12726  // Internal ABI definitions.
12727  assert(sp.Bit() == (UINT64_C(1) << kSPRegInternalCode));
12728  assert(sp.Bit() != xzr.Bit());
12729
12730  // xn.Bit() == wn.Bit() at all times, for the same n.
12731  assert(x0.Bit() == w0.Bit());
12732  assert(x1.Bit() == w1.Bit());
12733  assert(x10.Bit() == w10.Bit());
12734  assert(xzr.Bit() == wzr.Bit());
12735  assert(sp.Bit() == wsp.Bit());
12736}
12737
12738
12739TEST(stack_pointer_override) {
12740  // This test generates some stack maintenance code, but the test only checks
12741  // the reported state.
12742  SETUP();
12743  START();
12744
12745  // The default stack pointer in VIXL is sp.
12746  assert(sp.Is(__ StackPointer()));
12747  __ SetStackPointer(x0);
12748  assert(x0.Is(__ StackPointer()));
12749  __ SetStackPointer(x28);
12750  assert(x28.Is(__ StackPointer()));
12751  __ SetStackPointer(sp);
12752  assert(sp.Is(__ StackPointer()));
12753
12754  END();
12755  RUN();
12756  TEARDOWN();
12757}
12758
12759
12760TEST(peek_poke_simple) {
12761  SETUP();
12762  START();
12763
12764  static const RegList x0_to_x3 = x0.Bit() | x1.Bit() | x2.Bit() | x3.Bit();
12765  static const RegList x10_to_x13 = x10.Bit() | x11.Bit() |
12766                                    x12.Bit() | x13.Bit();
12767
12768  // The literal base is chosen to have two useful properties:
12769  //  * When multiplied by small values (such as a register index), this value
12770  //    is clearly readable in the result.
12771  //  * The value is not formed from repeating fixed-size smaller values, so it
12772  //    can be used to detect endianness-related errors.
12773  uint64_t literal_base = 0x0100001000100101;
12774
12775  // Initialize the registers.
12776  __ Mov(x0, literal_base);
12777  __ Add(x1, x0, x0);
12778  __ Add(x2, x1, x0);
12779  __ Add(x3, x2, x0);
12780
12781  __ Claim(32);
12782
12783  // Simple exchange.
12784  //  After this test:
12785  //    x0-x3 should be unchanged.
12786  //    w10-w13 should contain the lower words of x0-x3.
12787  __ Poke(x0, 0);
12788  __ Poke(x1, 8);
12789  __ Poke(x2, 16);
12790  __ Poke(x3, 24);
12791  Clobber(&masm, x0_to_x3);
12792  __ Peek(x0, 0);
12793  __ Peek(x1, 8);
12794  __ Peek(x2, 16);
12795  __ Peek(x3, 24);
12796
12797  __ Poke(w0, 0);
12798  __ Poke(w1, 4);
12799  __ Poke(w2, 8);
12800  __ Poke(w3, 12);
12801  Clobber(&masm, x10_to_x13);
12802  __ Peek(w10, 0);
12803  __ Peek(w11, 4);
12804  __ Peek(w12, 8);
12805  __ Peek(w13, 12);
12806
12807  __ Drop(32);
12808
12809  END();
12810  RUN();
12811
12812  ASSERT_EQUAL_64(literal_base * 1, x0);
12813  ASSERT_EQUAL_64(literal_base * 2, x1);
12814  ASSERT_EQUAL_64(literal_base * 3, x2);
12815  ASSERT_EQUAL_64(literal_base * 4, x3);
12816
12817  ASSERT_EQUAL_64((literal_base * 1) & 0xffffffff, x10);
12818  ASSERT_EQUAL_64((literal_base * 2) & 0xffffffff, x11);
12819  ASSERT_EQUAL_64((literal_base * 3) & 0xffffffff, x12);
12820  ASSERT_EQUAL_64((literal_base * 4) & 0xffffffff, x13);
12821
12822  TEARDOWN();
12823}
12824
12825
12826TEST(peek_poke_unaligned) {
12827  SETUP();
12828  START();
12829
12830  // The literal base is chosen to have two useful properties:
12831  //  * When multiplied by small values (such as a register index), this value
12832  //    is clearly readable in the result.
12833  //  * The value is not formed from repeating fixed-size smaller values, so it
12834  //    can be used to detect endianness-related errors.
12835  uint64_t literal_base = 0x0100001000100101;
12836
12837  // Initialize the registers.
12838  __ Mov(x0, literal_base);
12839  __ Add(x1, x0, x0);
12840  __ Add(x2, x1, x0);
12841  __ Add(x3, x2, x0);
12842  __ Add(x4, x3, x0);
12843  __ Add(x5, x4, x0);
12844  __ Add(x6, x5, x0);
12845
12846  __ Claim(32);
12847
12848  // Unaligned exchanges.
12849  //  After this test:
12850  //    x0-x6 should be unchanged.
12851  //    w10-w12 should contain the lower words of x0-x2.
12852  __ Poke(x0, 1);
12853  Clobber(&masm, x0.Bit());
12854  __ Peek(x0, 1);
12855  __ Poke(x1, 2);
12856  Clobber(&masm, x1.Bit());
12857  __ Peek(x1, 2);
12858  __ Poke(x2, 3);
12859  Clobber(&masm, x2.Bit());
12860  __ Peek(x2, 3);
12861  __ Poke(x3, 4);
12862  Clobber(&masm, x3.Bit());
12863  __ Peek(x3, 4);
12864  __ Poke(x4, 5);
12865  Clobber(&masm, x4.Bit());
12866  __ Peek(x4, 5);
12867  __ Poke(x5, 6);
12868  Clobber(&masm, x5.Bit());
12869  __ Peek(x5, 6);
12870  __ Poke(x6, 7);
12871  Clobber(&masm, x6.Bit());
12872  __ Peek(x6, 7);
12873
12874  __ Poke(w0, 1);
12875  Clobber(&masm, w10.Bit());
12876  __ Peek(w10, 1);
12877  __ Poke(w1, 2);
12878  Clobber(&masm, w11.Bit());
12879  __ Peek(w11, 2);
12880  __ Poke(w2, 3);
12881  Clobber(&masm, w12.Bit());
12882  __ Peek(w12, 3);
12883
12884  __ Drop(32);
12885
12886  END();
12887  RUN();
12888
12889  ASSERT_EQUAL_64(literal_base * 1, x0);
12890  ASSERT_EQUAL_64(literal_base * 2, x1);
12891  ASSERT_EQUAL_64(literal_base * 3, x2);
12892  ASSERT_EQUAL_64(literal_base * 4, x3);
12893  ASSERT_EQUAL_64(literal_base * 5, x4);
12894  ASSERT_EQUAL_64(literal_base * 6, x5);
12895  ASSERT_EQUAL_64(literal_base * 7, x6);
12896
12897  ASSERT_EQUAL_64((literal_base * 1) & 0xffffffff, x10);
12898  ASSERT_EQUAL_64((literal_base * 2) & 0xffffffff, x11);
12899  ASSERT_EQUAL_64((literal_base * 3) & 0xffffffff, x12);
12900
12901  TEARDOWN();
12902}
12903
12904
12905TEST(peek_poke_endianness) {
12906  SETUP();
12907  START();
12908
12909  // The literal base is chosen to have two useful properties:
12910  //  * When multiplied by small values (such as a register index), this value
12911  //    is clearly readable in the result.
12912  //  * The value is not formed from repeating fixed-size smaller values, so it
12913  //    can be used to detect endianness-related errors.
12914  uint64_t literal_base = 0x0100001000100101;
12915
12916  // Initialize the registers.
12917  __ Mov(x0, literal_base);
12918  __ Add(x1, x0, x0);
12919
12920  __ Claim(32);
12921
12922  // Endianness tests.
12923  //  After this section:
12924  //    x4 should match x0[31:0]:x0[63:32]
12925  //    w5 should match w1[15:0]:w1[31:16]
12926  __ Poke(x0, 0);
12927  __ Poke(x0, 8);
12928  __ Peek(x4, 4);
12929
12930  __ Poke(w1, 0);
12931  __ Poke(w1, 4);
12932  __ Peek(w5, 2);
12933
12934  __ Drop(32);
12935
12936  END();
12937  RUN();
12938
12939  uint64_t x0_expected = literal_base * 1;
12940  uint64_t x1_expected = literal_base * 2;
12941  uint64_t x4_expected = (x0_expected << 32) | (x0_expected >> 32);
12942  uint64_t x5_expected = ((x1_expected << 16) & 0xffff0000) |
12943                         ((x1_expected >> 16) & 0x0000ffff);
12944
12945  ASSERT_EQUAL_64(x0_expected, x0);
12946  ASSERT_EQUAL_64(x1_expected, x1);
12947  ASSERT_EQUAL_64(x4_expected, x4);
12948  ASSERT_EQUAL_64(x5_expected, x5);
12949
12950  TEARDOWN();
12951}
12952
12953
12954TEST(peek_poke_mixed) {
12955  SETUP();
12956  START();
12957
12958  // Acquire all temps from the MacroAssembler. They are used arbitrarily below.
12959  UseScratchRegisterScope temps(&masm);
12960  temps.ExcludeAll();
12961
12962  // The literal base is chosen to have two useful properties:
12963  //  * When multiplied by small values (such as a register index), this value
12964  //    is clearly readable in the result.
12965  //  * The value is not formed from repeating fixed-size smaller values, so it
12966  //    can be used to detect endianness-related errors.
12967  uint64_t literal_base = 0x0100001000100101;
12968
12969  // Initialize the registers.
12970  __ Mov(x0, literal_base);
12971  __ Add(x1, x0, x0);
12972  __ Add(x2, x1, x0);
12973  __ Add(x3, x2, x0);
12974
12975  __ Claim(32);
12976
12977  // Mix with other stack operations.
12978  //  After this section:
12979  //    x0-x3 should be unchanged.
12980  //    x6 should match x1[31:0]:x0[63:32]
12981  //    w7 should match x1[15:0]:x0[63:48]
12982  __ Poke(x1, 8);
12983  __ Poke(x0, 0);
12984  {
12985    VIXL_ASSERT(__ StackPointer().Is(sp));
12986    __ Mov(x4, __ StackPointer());
12987    __ SetStackPointer(x4);
12988
12989    __ Poke(wzr, 0);    // Clobber the space we're about to drop.
12990    __ Drop(4);
12991    __ Peek(x6, 0);
12992    __ Claim(8);
12993    __ Peek(w7, 10);
12994    __ Poke(x3, 28);
12995    __ Poke(xzr, 0);    // Clobber the space we're about to drop.
12996    __ Drop(8);
12997    __ Poke(x2, 12);
12998    __ Push(w0);
12999
13000    __ Mov(sp, __ StackPointer());
13001    __ SetStackPointer(sp);
13002  }
13003
13004  __ Pop(x0, x1, x2, x3);
13005
13006  END();
13007  RUN();
13008
13009  uint64_t x0_expected = literal_base * 1;
13010  uint64_t x1_expected = literal_base * 2;
13011  uint64_t x2_expected = literal_base * 3;
13012  uint64_t x3_expected = literal_base * 4;
13013  uint64_t x6_expected = (x1_expected << 32) | (x0_expected >> 32);
13014  uint64_t x7_expected = ((x1_expected << 16) & 0xffff0000) |
13015                         ((x0_expected >> 48) & 0x0000ffff);
13016
13017  ASSERT_EQUAL_64(x0_expected, x0);
13018  ASSERT_EQUAL_64(x1_expected, x1);
13019  ASSERT_EQUAL_64(x2_expected, x2);
13020  ASSERT_EQUAL_64(x3_expected, x3);
13021  ASSERT_EQUAL_64(x6_expected, x6);
13022  ASSERT_EQUAL_64(x7_expected, x7);
13023
13024  TEARDOWN();
13025}
13026
13027
13028TEST(peek_poke_reglist) {
13029  SETUP();
13030  START();
13031
13032  // Acquire all temps from the MacroAssembler. They are used arbitrarily below.
13033  UseScratchRegisterScope temps(&masm);
13034  temps.ExcludeAll();
13035
13036  // The literal base is chosen to have two useful properties:
13037  //  * When multiplied by small values (such as a register index), this value
13038  //    is clearly readable in the result.
13039  //  * The value is not formed from repeating fixed-size smaller values, so it
13040  //    can be used to detect endianness-related errors.
13041  uint64_t base = 0x0100001000100101;
13042
13043  // Initialize the registers.
13044  __ Mov(x1, base);
13045  __ Add(x2, x1, x1);
13046  __ Add(x3, x2, x1);
13047  __ Add(x4, x3, x1);
13048
13049  CPURegList list_1(x1, x2, x3, x4);
13050  CPURegList list_2(x11, x12, x13, x14);
13051  int list_1_size = list_1.TotalSizeInBytes();
13052
13053  __ Claim(2 * list_1_size);
13054
13055  __ PokeCPURegList(list_1, 0);
13056  __ PokeXRegList(list_1.list(), list_1_size);
13057  __ PeekCPURegList(list_2, 2 * kXRegSizeInBytes);
13058  __ PeekXRegList(x15.Bit(), kWRegSizeInBytes);
13059  __ PeekWRegList(w16.Bit() | w17.Bit(), 3 * kXRegSizeInBytes);
13060
13061  __ Drop(2 * list_1_size);
13062
13063
13064  uint64_t base_d = 0x1010010001000010;
13065
13066  // Initialize the registers.
13067  __ Mov(x1, base_d);
13068  __ Add(x2, x1, x1);
13069  __ Add(x3, x2, x1);
13070  __ Add(x4, x3, x1);
13071  __ Fmov(d1, x1);
13072  __ Fmov(d2, x2);
13073  __ Fmov(d3, x3);
13074  __ Fmov(d4, x4);
13075
13076  CPURegList list_d_1(d1, d2, d3, d4);
13077  CPURegList list_d_2(d11, d12, d13, d14);
13078  int list_d_1_size = list_d_1.TotalSizeInBytes();
13079
13080  __ Claim(2 * list_d_1_size);
13081
13082  __ PokeCPURegList(list_d_1, 0);
13083  __ PokeDRegList(list_d_1.list(), list_d_1_size);
13084  __ PeekCPURegList(list_d_2, 2 * kDRegSizeInBytes);
13085  __ PeekDRegList(d15.Bit(), kSRegSizeInBytes);
13086  __ PeekSRegList(s16.Bit() | s17.Bit(), 3 * kDRegSizeInBytes);
13087
13088  __ Drop(2 * list_d_1_size);
13089
13090
13091  END();
13092  RUN();
13093
13094  ASSERT_EQUAL_64(3 * base, x11);
13095  ASSERT_EQUAL_64(4 * base, x12);
13096  ASSERT_EQUAL_64(1 * base, x13);
13097  ASSERT_EQUAL_64(2 * base, x14);
13098  ASSERT_EQUAL_64(((1 * base) >> kWRegSize) | ((2 * base) << kWRegSize), x15);
13099  ASSERT_EQUAL_64(2 * base, x14);
13100  ASSERT_EQUAL_32((4 * base) & kWRegMask, w16);
13101  ASSERT_EQUAL_32((4 * base) >> kWRegSize, w17);
13102
13103  ASSERT_EQUAL_FP64(rawbits_to_double(3 * base_d), d11);
13104  ASSERT_EQUAL_FP64(rawbits_to_double(4 * base_d), d12);
13105  ASSERT_EQUAL_FP64(rawbits_to_double(1 * base_d), d13);
13106  ASSERT_EQUAL_FP64(rawbits_to_double(2 * base_d), d14);
13107  ASSERT_EQUAL_FP64(
13108      rawbits_to_double((base_d >> kSRegSize) | ((2 * base_d) << kSRegSize)),
13109      d15);
13110  ASSERT_EQUAL_FP64(rawbits_to_double(2 * base_d), d14);
13111  ASSERT_EQUAL_FP32(rawbits_to_float((4 * base_d) & kSRegMask), s16);
13112  ASSERT_EQUAL_FP32(rawbits_to_float((4 * base_d) >> kSRegSize), s17);
13113
13114  TEARDOWN();
13115}
13116
13117
13118TEST(load_store_reglist) {
13119  SETUP();
13120  START();
13121
13122  // The literal base is chosen to have two useful properties:
13123  //  * When multiplied by small values (such as a register index), this value
13124  //    is clearly readable in the result.
13125  //  * The value is not formed from repeating fixed-size smaller values, so it
13126  //    can be used to detect endianness-related errors.
13127  uint64_t high_base = UINT32_C(0x01000010);
13128  uint64_t low_base =  UINT32_C(0x00100101);
13129  uint64_t base = (high_base << 32) | low_base;
13130  uint64_t array[21];
13131  memset(array, 0, sizeof(array));
13132
13133  // Initialize the registers.
13134  __ Mov(x1, base);
13135  __ Add(x2, x1, x1);
13136  __ Add(x3, x2, x1);
13137  __ Add(x4, x3, x1);
13138  __ Fmov(d1, x1);
13139  __ Fmov(d2, x2);
13140  __ Fmov(d3, x3);
13141  __ Fmov(d4, x4);
13142  __ Fmov(d5, x1);
13143  __ Fmov(d6, x2);
13144  __ Fmov(d7, x3);
13145  __ Fmov(d8, x4);
13146
13147  Register reg_base = x20;
13148  Register reg_index = x21;
13149  int size_stored = 0;
13150
13151  __ Mov(reg_base, reinterpret_cast<uintptr_t>(&array));
13152
13153  // Test aligned accesses.
13154  CPURegList list_src(w1, w2, w3, w4);
13155  CPURegList list_dst(w11, w12, w13, w14);
13156  CPURegList list_fp_src_1(d1, d2, d3, d4);
13157  CPURegList list_fp_dst_1(d11, d12, d13, d14);
13158
13159  __ StoreCPURegList(list_src, MemOperand(reg_base, 0 * sizeof(uint64_t)));
13160  __ LoadCPURegList(list_dst, MemOperand(reg_base, 0 * sizeof(uint64_t)));
13161  size_stored += 4 * kWRegSizeInBytes;
13162
13163  __ Mov(reg_index, size_stored);
13164  __ StoreCPURegList(list_src, MemOperand(reg_base, reg_index));
13165  __ LoadCPURegList(list_dst, MemOperand(reg_base, reg_index));
13166  size_stored += 4 * kWRegSizeInBytes;
13167
13168  __ StoreCPURegList(list_fp_src_1, MemOperand(reg_base, size_stored));
13169  __ LoadCPURegList(list_fp_dst_1, MemOperand(reg_base, size_stored));
13170  size_stored += 4 * kDRegSizeInBytes;
13171
13172  __ Mov(reg_index, size_stored);
13173  __ StoreCPURegList(list_fp_src_1, MemOperand(reg_base, reg_index));
13174  __ LoadCPURegList(list_fp_dst_1, MemOperand(reg_base, reg_index));
13175  size_stored += 4 * kDRegSizeInBytes;
13176
13177  // Test unaligned accesses.
13178  CPURegList list_fp_src_2(d5, d6, d7, d8);
13179  CPURegList list_fp_dst_2(d15, d16, d17, d18);
13180
13181  __ Str(wzr, MemOperand(reg_base, size_stored));
13182  size_stored += 1 * kWRegSizeInBytes;
13183  __ StoreCPURegList(list_fp_src_2, MemOperand(reg_base, size_stored));
13184  __ LoadCPURegList(list_fp_dst_2, MemOperand(reg_base, size_stored));
13185  size_stored += 4 * kDRegSizeInBytes;
13186
13187  __ Mov(reg_index, size_stored);
13188  __ StoreCPURegList(list_fp_src_2, MemOperand(reg_base, reg_index));
13189  __ LoadCPURegList(list_fp_dst_2, MemOperand(reg_base, reg_index));
13190
13191  END();
13192  RUN();
13193
13194  VIXL_CHECK(array[0] == (1 * low_base) + (2 * low_base << kWRegSize));
13195  VIXL_CHECK(array[1] == (3 * low_base) + (4 * low_base << kWRegSize));
13196  VIXL_CHECK(array[2] == (1 * low_base) + (2 * low_base << kWRegSize));
13197  VIXL_CHECK(array[3] == (3 * low_base) + (4 * low_base << kWRegSize));
13198  VIXL_CHECK(array[4] == 1 * base);
13199  VIXL_CHECK(array[5] == 2 * base);
13200  VIXL_CHECK(array[6] == 3 * base);
13201  VIXL_CHECK(array[7] == 4 * base);
13202  VIXL_CHECK(array[8] == 1 * base);
13203  VIXL_CHECK(array[9] == 2 * base);
13204  VIXL_CHECK(array[10] == 3 * base);
13205  VIXL_CHECK(array[11] == 4 * base);
13206  VIXL_CHECK(array[12] == ((1 * low_base) << kSRegSize));
13207  VIXL_CHECK(array[13] == (((2 * low_base) << kSRegSize) | (1 * high_base)));
13208  VIXL_CHECK(array[14] == (((3 * low_base) << kSRegSize) | (2 * high_base)));
13209  VIXL_CHECK(array[15] == (((4 * low_base) << kSRegSize) | (3 * high_base)));
13210  VIXL_CHECK(array[16] == (((1 * low_base) << kSRegSize) | (4 * high_base)));
13211  VIXL_CHECK(array[17] == (((2 * low_base) << kSRegSize) | (1 * high_base)));
13212  VIXL_CHECK(array[18] == (((3 * low_base) << kSRegSize) | (2 * high_base)));
13213  VIXL_CHECK(array[19] == (((4 * low_base) << kSRegSize) | (3 * high_base)));
13214  VIXL_CHECK(array[20] == (4 * high_base));
13215
13216  ASSERT_EQUAL_64(1 * low_base, x11);
13217  ASSERT_EQUAL_64(2 * low_base, x12);
13218  ASSERT_EQUAL_64(3 * low_base, x13);
13219  ASSERT_EQUAL_64(4 * low_base, x14);
13220  ASSERT_EQUAL_FP64(rawbits_to_double(1 * base), d11);
13221  ASSERT_EQUAL_FP64(rawbits_to_double(2 * base), d12);
13222  ASSERT_EQUAL_FP64(rawbits_to_double(3 * base), d13);
13223  ASSERT_EQUAL_FP64(rawbits_to_double(4 * base), d14);
13224  ASSERT_EQUAL_FP64(rawbits_to_double(1 * base), d15);
13225  ASSERT_EQUAL_FP64(rawbits_to_double(2 * base), d16);
13226  ASSERT_EQUAL_FP64(rawbits_to_double(3 * base), d17);
13227  ASSERT_EQUAL_FP64(rawbits_to_double(4 * base), d18);
13228
13229  TEARDOWN();
13230}
13231
13232
13233// This enum is used only as an argument to the push-pop test helpers.
13234enum PushPopMethod {
13235  // Push or Pop using the Push and Pop methods, with blocks of up to four
13236  // registers. (Smaller blocks will be used if necessary.)
13237  PushPopByFour,
13238
13239  // Use Push<Size>RegList and Pop<Size>RegList to transfer the registers.
13240  PushPopRegList
13241};
13242
13243
13244// The maximum number of registers that can be used by the PushPopXReg* tests,
13245// where a reg_count field is provided.
13246static int const kPushPopXRegMaxRegCount = -1;
13247
13248// Test a simple push-pop pattern:
13249//  * Claim <claim> bytes to set the stack alignment.
13250//  * Push <reg_count> registers with size <reg_size>.
13251//  * Clobber the register contents.
13252//  * Pop <reg_count> registers to restore the original contents.
13253//  * Drop <claim> bytes to restore the original stack pointer.
13254//
13255// Different push and pop methods can be specified independently to test for
13256// proper word-endian behaviour.
13257static void PushPopXRegSimpleHelper(int reg_count,
13258                                    int claim,
13259                                    int reg_size,
13260                                    PushPopMethod push_method,
13261                                    PushPopMethod pop_method) {
13262  SETUP();
13263
13264  START();
13265
13266  // Arbitrarily pick a register to use as a stack pointer.
13267  const Register& stack_pointer = x20;
13268  const RegList allowed = ~stack_pointer.Bit();
13269  if (reg_count == kPushPopXRegMaxRegCount) {
13270    reg_count = CountSetBits(allowed, kNumberOfRegisters);
13271  }
13272  // Work out which registers to use, based on reg_size.
13273  Register r[kNumberOfRegisters];
13274  Register x[kNumberOfRegisters];
13275  RegList list = PopulateRegisterArray(NULL, x, r, reg_size, reg_count,
13276                                       allowed);
13277
13278  // Acquire all temps from the MacroAssembler. They are used arbitrarily below.
13279  UseScratchRegisterScope temps(&masm);
13280  temps.ExcludeAll();
13281
13282  // The literal base is chosen to have two useful properties:
13283  //  * When multiplied by small values (such as a register index), this value
13284  //    is clearly readable in the result.
13285  //  * The value is not formed from repeating fixed-size smaller values, so it
13286  //    can be used to detect endianness-related errors.
13287  uint64_t literal_base = 0x0100001000100101;
13288
13289  {
13290    VIXL_ASSERT(__ StackPointer().Is(sp));
13291    __ Mov(stack_pointer, __ StackPointer());
13292    __ SetStackPointer(stack_pointer);
13293
13294    int i;
13295
13296    // Initialize the registers.
13297    for (i = 0; i < reg_count; i++) {
13298      // Always write into the X register, to ensure that the upper word is
13299      // properly ignored by Push when testing W registers.
13300      __ Mov(x[i], literal_base * i);
13301    }
13302
13303    // Claim memory first, as requested.
13304    __ Claim(claim);
13305
13306    switch (push_method) {
13307      case PushPopByFour:
13308        // Push high-numbered registers first (to the highest addresses).
13309        for (i = reg_count; i >= 4; i -= 4) {
13310          __ Push(r[i-1], r[i-2], r[i-3], r[i-4]);
13311        }
13312        // Finish off the leftovers.
13313        switch (i) {
13314          case 3:  __ Push(r[2], r[1], r[0]); break;
13315          case 2:  __ Push(r[1], r[0]);       break;
13316          case 1:  __ Push(r[0]);             break;
13317          default: VIXL_ASSERT(i == 0);            break;
13318        }
13319        break;
13320      case PushPopRegList:
13321        __ PushSizeRegList(list, reg_size);
13322        break;
13323    }
13324
13325    // Clobber all the registers, to ensure that they get repopulated by Pop.
13326    Clobber(&masm, list);
13327
13328    switch (pop_method) {
13329      case PushPopByFour:
13330        // Pop low-numbered registers first (from the lowest addresses).
13331        for (i = 0; i <= (reg_count-4); i += 4) {
13332          __ Pop(r[i], r[i+1], r[i+2], r[i+3]);
13333        }
13334        // Finish off the leftovers.
13335        switch (reg_count - i) {
13336          case 3:  __ Pop(r[i], r[i+1], r[i+2]); break;
13337          case 2:  __ Pop(r[i], r[i+1]);         break;
13338          case 1:  __ Pop(r[i]);                 break;
13339          default: VIXL_ASSERT(i == reg_count);       break;
13340        }
13341        break;
13342      case PushPopRegList:
13343        __ PopSizeRegList(list, reg_size);
13344        break;
13345    }
13346
13347    // Drop memory to restore stack_pointer.
13348    __ Drop(claim);
13349
13350    __ Mov(sp, __ StackPointer());
13351    __ SetStackPointer(sp);
13352  }
13353
13354  END();
13355
13356  RUN();
13357
13358  // Check that the register contents were preserved.
13359  // Always use ASSERT_EQUAL_64, even when testing W registers, so we can test
13360  // that the upper word was properly cleared by Pop.
13361  literal_base &= (0xffffffffffffffff >> (64-reg_size));
13362  for (int i = 0; i < reg_count; i++) {
13363    if (x[i].Is(xzr)) {
13364      ASSERT_EQUAL_64(0, x[i]);
13365    } else {
13366      ASSERT_EQUAL_64(literal_base * i, x[i]);
13367    }
13368  }
13369
13370  TEARDOWN();
13371}
13372
13373
13374TEST(push_pop_xreg_simple_32) {
13375  for (int claim = 0; claim <= 8; claim++) {
13376    for (int count = 0; count <= 8; count++) {
13377      PushPopXRegSimpleHelper(count, claim, kWRegSize,
13378                              PushPopByFour, PushPopByFour);
13379      PushPopXRegSimpleHelper(count, claim, kWRegSize,
13380                              PushPopByFour, PushPopRegList);
13381      PushPopXRegSimpleHelper(count, claim, kWRegSize,
13382                              PushPopRegList, PushPopByFour);
13383      PushPopXRegSimpleHelper(count, claim, kWRegSize,
13384                              PushPopRegList, PushPopRegList);
13385    }
13386    // Test with the maximum number of registers.
13387    PushPopXRegSimpleHelper(kPushPopXRegMaxRegCount,
13388                            claim, kWRegSize, PushPopByFour, PushPopByFour);
13389    PushPopXRegSimpleHelper(kPushPopXRegMaxRegCount,
13390                            claim, kWRegSize, PushPopByFour, PushPopRegList);
13391    PushPopXRegSimpleHelper(kPushPopXRegMaxRegCount,
13392                            claim, kWRegSize, PushPopRegList, PushPopByFour);
13393    PushPopXRegSimpleHelper(kPushPopXRegMaxRegCount,
13394                            claim, kWRegSize, PushPopRegList, PushPopRegList);
13395  }
13396}
13397
13398
13399TEST(push_pop_xreg_simple_64) {
13400  for (int claim = 0; claim <= 8; claim++) {
13401    for (int count = 0; count <= 8; count++) {
13402      PushPopXRegSimpleHelper(count, claim, kXRegSize,
13403                              PushPopByFour, PushPopByFour);
13404      PushPopXRegSimpleHelper(count, claim, kXRegSize,
13405                              PushPopByFour, PushPopRegList);
13406      PushPopXRegSimpleHelper(count, claim, kXRegSize,
13407                              PushPopRegList, PushPopByFour);
13408      PushPopXRegSimpleHelper(count, claim, kXRegSize,
13409                              PushPopRegList, PushPopRegList);
13410    }
13411    // Test with the maximum number of registers.
13412    PushPopXRegSimpleHelper(kPushPopXRegMaxRegCount,
13413                            claim, kXRegSize, PushPopByFour, PushPopByFour);
13414    PushPopXRegSimpleHelper(kPushPopXRegMaxRegCount,
13415                            claim, kXRegSize, PushPopByFour, PushPopRegList);
13416    PushPopXRegSimpleHelper(kPushPopXRegMaxRegCount,
13417                            claim, kXRegSize, PushPopRegList, PushPopByFour);
13418    PushPopXRegSimpleHelper(kPushPopXRegMaxRegCount,
13419                            claim, kXRegSize, PushPopRegList, PushPopRegList);
13420  }
13421}
13422
13423
13424// The maximum number of registers that can be used by the PushPopFPXReg* tests,
13425// where a reg_count field is provided.
13426static int const kPushPopFPXRegMaxRegCount = -1;
13427
13428// Test a simple push-pop pattern:
13429//  * Claim <claim> bytes to set the stack alignment.
13430//  * Push <reg_count> FP registers with size <reg_size>.
13431//  * Clobber the register contents.
13432//  * Pop <reg_count> FP registers to restore the original contents.
13433//  * Drop <claim> bytes to restore the original stack pointer.
13434//
13435// Different push and pop methods can be specified independently to test for
13436// proper word-endian behaviour.
13437static void PushPopFPXRegSimpleHelper(int reg_count,
13438                                      int claim,
13439                                      int reg_size,
13440                                      PushPopMethod push_method,
13441                                      PushPopMethod pop_method) {
13442  SETUP();
13443
13444  START();
13445
13446  // We can use any floating-point register. None of them are reserved for
13447  // debug code, for example.
13448  static RegList const allowed = ~0;
13449  if (reg_count == kPushPopFPXRegMaxRegCount) {
13450    reg_count = CountSetBits(allowed, kNumberOfFPRegisters);
13451  }
13452  // Work out which registers to use, based on reg_size.
13453  FPRegister v[kNumberOfRegisters];
13454  FPRegister d[kNumberOfRegisters];
13455  RegList list = PopulateFPRegisterArray(NULL, d, v, reg_size, reg_count,
13456                                         allowed);
13457
13458  // Arbitrarily pick a register to use as a stack pointer.
13459  const Register& stack_pointer = x10;
13460
13461  // Acquire all temps from the MacroAssembler. They are used arbitrarily below.
13462  UseScratchRegisterScope temps(&masm);
13463  temps.ExcludeAll();
13464
13465  // The literal base is chosen to have two useful properties:
13466  //  * When multiplied (using an integer) by small values (such as a register
13467  //    index), this value is clearly readable in the result.
13468  //  * The value is not formed from repeating fixed-size smaller values, so it
13469  //    can be used to detect endianness-related errors.
13470  //  * It is never a floating-point NaN, and will therefore always compare
13471  //    equal to itself.
13472  uint64_t literal_base = 0x0100001000100101;
13473
13474  {
13475    VIXL_ASSERT(__ StackPointer().Is(sp));
13476    __ Mov(stack_pointer, __ StackPointer());
13477    __ SetStackPointer(stack_pointer);
13478
13479    int i;
13480
13481    // Initialize the registers, using X registers to load the literal.
13482    __ Mov(x0, 0);
13483    __ Mov(x1, literal_base);
13484    for (i = 0; i < reg_count; i++) {
13485      // Always write into the D register, to ensure that the upper word is
13486      // properly ignored by Push when testing S registers.
13487      __ Fmov(d[i], x0);
13488      // Calculate the next literal.
13489      __ Add(x0, x0, x1);
13490    }
13491
13492    // Claim memory first, as requested.
13493    __ Claim(claim);
13494
13495    switch (push_method) {
13496      case PushPopByFour:
13497        // Push high-numbered registers first (to the highest addresses).
13498        for (i = reg_count; i >= 4; i -= 4) {
13499          __ Push(v[i-1], v[i-2], v[i-3], v[i-4]);
13500        }
13501        // Finish off the leftovers.
13502        switch (i) {
13503          case 3:  __ Push(v[2], v[1], v[0]); break;
13504          case 2:  __ Push(v[1], v[0]);       break;
13505          case 1:  __ Push(v[0]);             break;
13506          default: VIXL_ASSERT(i == 0);            break;
13507        }
13508        break;
13509      case PushPopRegList:
13510        __ PushSizeRegList(list, reg_size, CPURegister::kVRegister);
13511        break;
13512    }
13513
13514    // Clobber all the registers, to ensure that they get repopulated by Pop.
13515    ClobberFP(&masm, list);
13516
13517    switch (pop_method) {
13518      case PushPopByFour:
13519        // Pop low-numbered registers first (from the lowest addresses).
13520        for (i = 0; i <= (reg_count-4); i += 4) {
13521          __ Pop(v[i], v[i+1], v[i+2], v[i+3]);
13522        }
13523        // Finish off the leftovers.
13524        switch (reg_count - i) {
13525          case 3:  __ Pop(v[i], v[i+1], v[i+2]); break;
13526          case 2:  __ Pop(v[i], v[i+1]);         break;
13527          case 1:  __ Pop(v[i]);                 break;
13528          default: VIXL_ASSERT(i == reg_count);       break;
13529        }
13530        break;
13531      case PushPopRegList:
13532        __ PopSizeRegList(list, reg_size, CPURegister::kVRegister);
13533        break;
13534    }
13535
13536    // Drop memory to restore the stack pointer.
13537    __ Drop(claim);
13538
13539    __ Mov(sp, __ StackPointer());
13540    __ SetStackPointer(sp);
13541  }
13542
13543  END();
13544
13545  RUN();
13546
13547  // Check that the register contents were preserved.
13548  // Always use ASSERT_EQUAL_FP64, even when testing S registers, so we can
13549  // test that the upper word was properly cleared by Pop.
13550  literal_base &= (0xffffffffffffffff >> (64-reg_size));
13551  for (int i = 0; i < reg_count; i++) {
13552    uint64_t literal = literal_base * i;
13553    double expected;
13554    memcpy(&expected, &literal, sizeof(expected));
13555    ASSERT_EQUAL_FP64(expected, d[i]);
13556  }
13557
13558  TEARDOWN();
13559}
13560
13561
13562TEST(push_pop_fp_xreg_simple_32) {
13563  for (int claim = 0; claim <= 8; claim++) {
13564    for (int count = 0; count <= 8; count++) {
13565      PushPopFPXRegSimpleHelper(count, claim, kSRegSize,
13566                                PushPopByFour, PushPopByFour);
13567      PushPopFPXRegSimpleHelper(count, claim, kSRegSize,
13568                                PushPopByFour, PushPopRegList);
13569      PushPopFPXRegSimpleHelper(count, claim, kSRegSize,
13570                                PushPopRegList, PushPopByFour);
13571      PushPopFPXRegSimpleHelper(count, claim, kSRegSize,
13572                                PushPopRegList, PushPopRegList);
13573    }
13574    // Test with the maximum number of registers.
13575    PushPopFPXRegSimpleHelper(kPushPopFPXRegMaxRegCount, claim, kSRegSize,
13576                              PushPopByFour, PushPopByFour);
13577    PushPopFPXRegSimpleHelper(kPushPopFPXRegMaxRegCount, claim, kSRegSize,
13578                              PushPopByFour, PushPopRegList);
13579    PushPopFPXRegSimpleHelper(kPushPopFPXRegMaxRegCount, claim, kSRegSize,
13580                              PushPopRegList, PushPopByFour);
13581    PushPopFPXRegSimpleHelper(kPushPopFPXRegMaxRegCount, claim, kSRegSize,
13582                              PushPopRegList, PushPopRegList);
13583  }
13584}
13585
13586
13587TEST(push_pop_fp_xreg_simple_64) {
13588  for (int claim = 0; claim <= 8; claim++) {
13589    for (int count = 0; count <= 8; count++) {
13590      PushPopFPXRegSimpleHelper(count, claim, kDRegSize,
13591                                PushPopByFour, PushPopByFour);
13592      PushPopFPXRegSimpleHelper(count, claim, kDRegSize,
13593                                PushPopByFour, PushPopRegList);
13594      PushPopFPXRegSimpleHelper(count, claim, kDRegSize,
13595                                PushPopRegList, PushPopByFour);
13596      PushPopFPXRegSimpleHelper(count, claim, kDRegSize,
13597                                PushPopRegList, PushPopRegList);
13598    }
13599    // Test with the maximum number of registers.
13600    PushPopFPXRegSimpleHelper(kPushPopFPXRegMaxRegCount, claim, kDRegSize,
13601                              PushPopByFour, PushPopByFour);
13602    PushPopFPXRegSimpleHelper(kPushPopFPXRegMaxRegCount, claim, kDRegSize,
13603                              PushPopByFour, PushPopRegList);
13604    PushPopFPXRegSimpleHelper(kPushPopFPXRegMaxRegCount, claim, kDRegSize,
13605                              PushPopRegList, PushPopByFour);
13606    PushPopFPXRegSimpleHelper(kPushPopFPXRegMaxRegCount, claim, kDRegSize,
13607                              PushPopRegList, PushPopRegList);
13608  }
13609}
13610
13611
13612// Push and pop data using an overlapping combination of Push/Pop and
13613// RegList-based methods.
13614static void PushPopXRegMixedMethodsHelper(int claim, int reg_size) {
13615  SETUP();
13616
13617  // Arbitrarily pick a register to use as a stack pointer.
13618  const Register& stack_pointer = x5;
13619  const RegList allowed = ~stack_pointer.Bit();
13620  // Work out which registers to use, based on reg_size.
13621  Register r[10];
13622  Register x[10];
13623  PopulateRegisterArray(NULL, x, r, reg_size, 10, allowed);
13624
13625  // Calculate some handy register lists.
13626  RegList r0_to_r3 = 0;
13627  for (int i = 0; i <= 3; i++) {
13628    r0_to_r3 |= x[i].Bit();
13629  }
13630  RegList r4_to_r5 = 0;
13631  for (int i = 4; i <= 5; i++) {
13632    r4_to_r5 |= x[i].Bit();
13633  }
13634  RegList r6_to_r9 = 0;
13635  for (int i = 6; i <= 9; i++) {
13636    r6_to_r9 |= x[i].Bit();
13637  }
13638
13639  // Acquire all temps from the MacroAssembler. They are used arbitrarily below.
13640  UseScratchRegisterScope temps(&masm);
13641  temps.ExcludeAll();
13642
13643  // The literal base is chosen to have two useful properties:
13644  //  * When multiplied by small values (such as a register index), this value
13645  //    is clearly readable in the result.
13646  //  * The value is not formed from repeating fixed-size smaller values, so it
13647  //    can be used to detect endianness-related errors.
13648  uint64_t literal_base = 0x0100001000100101;
13649
13650  START();
13651  {
13652    VIXL_ASSERT(__ StackPointer().Is(sp));
13653    __ Mov(stack_pointer, __ StackPointer());
13654    __ SetStackPointer(stack_pointer);
13655
13656    // Claim memory first, as requested.
13657    __ Claim(claim);
13658
13659    __ Mov(x[3], literal_base * 3);
13660    __ Mov(x[2], literal_base * 2);
13661    __ Mov(x[1], literal_base * 1);
13662    __ Mov(x[0], literal_base * 0);
13663
13664    __ PushSizeRegList(r0_to_r3, reg_size);
13665    __ Push(r[3], r[2]);
13666
13667    Clobber(&masm, r0_to_r3);
13668    __ PopSizeRegList(r0_to_r3, reg_size);
13669
13670    __ Push(r[2], r[1], r[3], r[0]);
13671
13672    Clobber(&masm, r4_to_r5);
13673    __ Pop(r[4], r[5]);
13674    Clobber(&masm, r6_to_r9);
13675    __ Pop(r[6], r[7], r[8], r[9]);
13676
13677    // Drop memory to restore stack_pointer.
13678    __ Drop(claim);
13679
13680    __ Mov(sp, __ StackPointer());
13681    __ SetStackPointer(sp);
13682  }
13683
13684  END();
13685
13686  RUN();
13687
13688  // Always use ASSERT_EQUAL_64, even when testing W registers, so we can test
13689  // that the upper word was properly cleared by Pop.
13690  literal_base &= (0xffffffffffffffff >> (64-reg_size));
13691
13692  ASSERT_EQUAL_64(literal_base * 3, x[9]);
13693  ASSERT_EQUAL_64(literal_base * 2, x[8]);
13694  ASSERT_EQUAL_64(literal_base * 0, x[7]);
13695  ASSERT_EQUAL_64(literal_base * 3, x[6]);
13696  ASSERT_EQUAL_64(literal_base * 1, x[5]);
13697  ASSERT_EQUAL_64(literal_base * 2, x[4]);
13698
13699  TEARDOWN();
13700}
13701
13702
13703TEST(push_pop_xreg_mixed_methods_64) {
13704  for (int claim = 0; claim <= 8; claim++) {
13705    PushPopXRegMixedMethodsHelper(claim, kXRegSize);
13706  }
13707}
13708
13709
13710TEST(push_pop_xreg_mixed_methods_32) {
13711  for (int claim = 0; claim <= 8; claim++) {
13712    PushPopXRegMixedMethodsHelper(claim, kWRegSize);
13713  }
13714}
13715
13716
13717// Push and pop data using overlapping X- and W-sized quantities.
13718static void PushPopXRegWXOverlapHelper(int reg_count, int claim) {
13719  SETUP();
13720
13721  // Arbitrarily pick a register to use as a stack pointer.
13722  const Register& stack_pointer = x10;
13723  const RegList allowed = ~stack_pointer.Bit();
13724  if (reg_count == kPushPopXRegMaxRegCount) {
13725    reg_count = CountSetBits(allowed, kNumberOfRegisters);
13726  }
13727  // Work out which registers to use, based on reg_size.
13728  Register w[kNumberOfRegisters];
13729  Register x[kNumberOfRegisters];
13730  RegList list = PopulateRegisterArray(w, x, NULL, 0, reg_count, allowed);
13731
13732  // The number of W-sized slots we expect to pop. When we pop, we alternate
13733  // between W and X registers, so we need reg_count*1.5 W-sized slots.
13734  int const requested_w_slots = reg_count + reg_count / 2;
13735
13736  // Track what _should_ be on the stack, using W-sized slots.
13737  static int const kMaxWSlots = kNumberOfRegisters + kNumberOfRegisters / 2;
13738  uint32_t stack[kMaxWSlots];
13739  for (int i = 0; i < kMaxWSlots; i++) {
13740    stack[i] = 0xdeadbeef;
13741  }
13742
13743  // Acquire all temps from the MacroAssembler. They are used arbitrarily below.
13744  UseScratchRegisterScope temps(&masm);
13745  temps.ExcludeAll();
13746
13747  // The literal base is chosen to have two useful properties:
13748  //  * When multiplied by small values (such as a register index), this value
13749  //    is clearly readable in the result.
13750  //  * The value is not formed from repeating fixed-size smaller values, so it
13751  //    can be used to detect endianness-related errors.
13752  static uint64_t const literal_base = 0x0100001000100101;
13753  static uint64_t const literal_base_hi = literal_base >> 32;
13754  static uint64_t const literal_base_lo = literal_base & 0xffffffff;
13755  static uint64_t const literal_base_w = literal_base & 0xffffffff;
13756
13757  START();
13758  {
13759    VIXL_ASSERT(__ StackPointer().Is(sp));
13760    __ Mov(stack_pointer, __ StackPointer());
13761    __ SetStackPointer(stack_pointer);
13762
13763    // Initialize the registers.
13764    for (int i = 0; i < reg_count; i++) {
13765      // Always write into the X register, to ensure that the upper word is
13766      // properly ignored by Push when testing W registers.
13767      __ Mov(x[i], literal_base * i);
13768    }
13769
13770    // Claim memory first, as requested.
13771    __ Claim(claim);
13772
13773    // The push-pop pattern is as follows:
13774    // Push:           Pop:
13775    //  x[0](hi)   ->   w[0]
13776    //  x[0](lo)   ->   x[1](hi)
13777    //  w[1]       ->   x[1](lo)
13778    //  w[1]       ->   w[2]
13779    //  x[2](hi)   ->   x[2](hi)
13780    //  x[2](lo)   ->   x[2](lo)
13781    //  x[2](hi)   ->   w[3]
13782    //  x[2](lo)   ->   x[4](hi)
13783    //  x[2](hi)   ->   x[4](lo)
13784    //  x[2](lo)   ->   w[5]
13785    //  w[3]       ->   x[5](hi)
13786    //  w[3]       ->   x[6](lo)
13787    //  w[3]       ->   w[7]
13788    //  w[3]       ->   x[8](hi)
13789    //  x[4](hi)   ->   x[8](lo)
13790    //  x[4](lo)   ->   w[9]
13791    // ... pattern continues ...
13792    //
13793    // That is, registers are pushed starting with the lower numbers,
13794    // alternating between x and w registers, and pushing i%4+1 copies of each,
13795    // where i is the register number.
13796    // Registers are popped starting with the higher numbers one-by-one,
13797    // alternating between x and w registers, but only popping one at a time.
13798    //
13799    // This pattern provides a wide variety of alignment effects and overlaps.
13800
13801    // ---- Push ----
13802
13803    int active_w_slots = 0;
13804    for (int i = 0; active_w_slots < requested_w_slots; i++) {
13805      VIXL_ASSERT(i < reg_count);
13806      // In order to test various arguments to PushMultipleTimes, and to try to
13807      // exercise different alignment and overlap effects, we push each
13808      // register a different number of times.
13809      int times = i % 4 + 1;
13810      if (i & 1) {
13811        // Push odd-numbered registers as W registers.
13812        __ PushMultipleTimes(times, w[i]);
13813        // Fill in the expected stack slots.
13814        for (int j = 0; j < times; j++) {
13815          if (w[i].Is(wzr)) {
13816            // The zero register always writes zeroes.
13817            stack[active_w_slots++] = 0;
13818          } else {
13819            stack[active_w_slots++] = literal_base_w * i;
13820          }
13821        }
13822      } else {
13823        // Push even-numbered registers as X registers.
13824        __ PushMultipleTimes(times, x[i]);
13825        // Fill in the expected stack slots.
13826        for (int j = 0; j < times; j++) {
13827          if (x[i].Is(xzr)) {
13828            // The zero register always writes zeroes.
13829            stack[active_w_slots++] = 0;
13830            stack[active_w_slots++] = 0;
13831          } else {
13832            stack[active_w_slots++] = literal_base_hi * i;
13833            stack[active_w_slots++] = literal_base_lo * i;
13834          }
13835        }
13836      }
13837    }
13838    // Because we were pushing several registers at a time, we probably pushed
13839    // more than we needed to.
13840    if (active_w_slots > requested_w_slots) {
13841      __ Drop((active_w_slots - requested_w_slots) * kWRegSizeInBytes);
13842      // Bump the number of active W-sized slots back to where it should be,
13843      // and fill the empty space with a dummy value.
13844      do {
13845        stack[active_w_slots--] = 0xdeadbeef;
13846      } while (active_w_slots > requested_w_slots);
13847    }
13848
13849    // ---- Pop ----
13850
13851    Clobber(&masm, list);
13852
13853    // If popping an even number of registers, the first one will be X-sized.
13854    // Otherwise, the first one will be W-sized.
13855    bool next_is_64 = !(reg_count & 1);
13856    for (int i = reg_count-1; i >= 0; i--) {
13857      if (next_is_64) {
13858        __ Pop(x[i]);
13859        active_w_slots -= 2;
13860      } else {
13861        __ Pop(w[i]);
13862        active_w_slots -= 1;
13863      }
13864      next_is_64 = !next_is_64;
13865    }
13866    VIXL_ASSERT(active_w_slots == 0);
13867
13868    // Drop memory to restore stack_pointer.
13869    __ Drop(claim);
13870
13871    __ Mov(sp, __ StackPointer());
13872    __ SetStackPointer(sp);
13873  }
13874
13875  END();
13876
13877  RUN();
13878
13879  int slot = 0;
13880  for (int i = 0; i < reg_count; i++) {
13881    // Even-numbered registers were written as W registers.
13882    // Odd-numbered registers were written as X registers.
13883    bool expect_64 = (i & 1);
13884    uint64_t expected;
13885
13886    if (expect_64) {
13887      uint64_t hi = stack[slot++];
13888      uint64_t lo = stack[slot++];
13889      expected = (hi << 32) | lo;
13890    } else {
13891      expected = stack[slot++];
13892    }
13893
13894    // Always use ASSERT_EQUAL_64, even when testing W registers, so we can
13895    // test that the upper word was properly cleared by Pop.
13896    if (x[i].Is(xzr)) {
13897      ASSERT_EQUAL_64(0, x[i]);
13898    } else {
13899      ASSERT_EQUAL_64(expected, x[i]);
13900    }
13901  }
13902  VIXL_ASSERT(slot == requested_w_slots);
13903
13904  TEARDOWN();
13905}
13906
13907
13908TEST(push_pop_xreg_wx_overlap) {
13909  for (int claim = 0; claim <= 8; claim++) {
13910    for (int count = 1; count <= 8; count++) {
13911      PushPopXRegWXOverlapHelper(count, claim);
13912    }
13913    // Test with the maximum number of registers.
13914    PushPopXRegWXOverlapHelper(kPushPopXRegMaxRegCount, claim);
13915  }
13916}
13917
13918
13919TEST(push_pop_sp) {
13920  SETUP();
13921
13922  START();
13923
13924  VIXL_ASSERT(sp.Is(__ StackPointer()));
13925
13926  // Acquire all temps from the MacroAssembler. They are used arbitrarily below.
13927  UseScratchRegisterScope temps(&masm);
13928  temps.ExcludeAll();
13929
13930  __ Mov(x3, 0x3333333333333333);
13931  __ Mov(x2, 0x2222222222222222);
13932  __ Mov(x1, 0x1111111111111111);
13933  __ Mov(x0, 0x0000000000000000);
13934  __ Claim(2 * kXRegSizeInBytes);
13935  __ PushXRegList(x0.Bit() | x1.Bit() | x2.Bit() | x3.Bit());
13936  __ Push(x3, x2);
13937  __ PopXRegList(x0.Bit() | x1.Bit() | x2.Bit() | x3.Bit());
13938  __ Push(x2, x1, x3, x0);
13939  __ Pop(x4, x5);
13940  __ Pop(x6, x7, x8, x9);
13941
13942  __ Claim(2 * kXRegSizeInBytes);
13943  __ PushWRegList(w0.Bit() | w1.Bit() | w2.Bit() | w3.Bit());
13944  __ Push(w3, w1, w2, w0);
13945  __ PopWRegList(w10.Bit() | w11.Bit() | w12.Bit() | w13.Bit());
13946  __ Pop(w14, w15, w16, w17);
13947
13948  __ Claim(2 * kXRegSizeInBytes);
13949  __ Push(w2, w2, w1, w1);
13950  __ Push(x3, x3);
13951  __ Pop(w18, w19, w20, w21);
13952  __ Pop(x22, x23);
13953
13954  __ Claim(2 * kXRegSizeInBytes);
13955  __ PushXRegList(x1.Bit() | x22.Bit());
13956  __ PopXRegList(x24.Bit() | x26.Bit());
13957
13958  __ Claim(2 * kXRegSizeInBytes);
13959  __ PushWRegList(w1.Bit() | w2.Bit() | w4.Bit() | w22.Bit());
13960  __ PopWRegList(w25.Bit() | w27.Bit() | w28.Bit() | w29.Bit());
13961
13962  __ Claim(2 * kXRegSizeInBytes);
13963  __ PushXRegList(0);
13964  __ PopXRegList(0);
13965  __ PushXRegList(0xffffffff);
13966  __ PopXRegList(0xffffffff);
13967  __ Drop(12 * kXRegSizeInBytes);
13968  END();
13969
13970  RUN();
13971
13972  ASSERT_EQUAL_64(0x1111111111111111, x3);
13973  ASSERT_EQUAL_64(0x0000000000000000, x2);
13974  ASSERT_EQUAL_64(0x3333333333333333, x1);
13975  ASSERT_EQUAL_64(0x2222222222222222, x0);
13976  ASSERT_EQUAL_64(0x3333333333333333, x9);
13977  ASSERT_EQUAL_64(0x2222222222222222, x8);
13978  ASSERT_EQUAL_64(0x0000000000000000, x7);
13979  ASSERT_EQUAL_64(0x3333333333333333, x6);
13980  ASSERT_EQUAL_64(0x1111111111111111, x5);
13981  ASSERT_EQUAL_64(0x2222222222222222, x4);
13982
13983  ASSERT_EQUAL_32(0x11111111U, w13);
13984  ASSERT_EQUAL_32(0x33333333U, w12);
13985  ASSERT_EQUAL_32(0x00000000U, w11);
13986  ASSERT_EQUAL_32(0x22222222U, w10);
13987  ASSERT_EQUAL_32(0x11111111U, w17);
13988  ASSERT_EQUAL_32(0x00000000U, w16);
13989  ASSERT_EQUAL_32(0x33333333U, w15);
13990  ASSERT_EQUAL_32(0x22222222U, w14);
13991
13992  ASSERT_EQUAL_32(0x11111111U, w18);
13993  ASSERT_EQUAL_32(0x11111111U, w19);
13994  ASSERT_EQUAL_32(0x11111111U, w20);
13995  ASSERT_EQUAL_32(0x11111111U, w21);
13996  ASSERT_EQUAL_64(0x3333333333333333, x22);
13997  ASSERT_EQUAL_64(0x0000000000000000, x23);
13998
13999  ASSERT_EQUAL_64(0x3333333333333333, x24);
14000  ASSERT_EQUAL_64(0x3333333333333333, x26);
14001
14002  ASSERT_EQUAL_32(0x33333333U, w25);
14003  ASSERT_EQUAL_32(0x00000000U, w27);
14004  ASSERT_EQUAL_32(0x22222222U, w28);
14005  ASSERT_EQUAL_32(0x33333333U, w29);
14006  TEARDOWN();
14007}
14008
14009
14010TEST(noreg) {
14011  // This test doesn't generate any code, but it verifies some invariants
14012  // related to NoReg.
14013  VIXL_CHECK(NoReg.Is(NoFPReg));
14014  VIXL_CHECK(NoFPReg.Is(NoReg));
14015
14016  VIXL_CHECK(NoVReg.Is(NoReg));
14017  VIXL_CHECK(NoReg.Is(NoVReg));
14018
14019  VIXL_CHECK(NoReg.Is(NoCPUReg));
14020  VIXL_CHECK(NoCPUReg.Is(NoReg));
14021
14022  VIXL_CHECK(NoFPReg.Is(NoCPUReg));
14023  VIXL_CHECK(NoCPUReg.Is(NoFPReg));
14024
14025  VIXL_CHECK(NoVReg.Is(NoCPUReg));
14026  VIXL_CHECK(NoCPUReg.Is(NoVReg));
14027
14028  VIXL_CHECK(NoReg.IsNone());
14029  VIXL_CHECK(NoFPReg.IsNone());
14030  VIXL_CHECK(NoVReg.IsNone());
14031  VIXL_CHECK(NoCPUReg.IsNone());
14032}
14033
14034
14035TEST(isvalid) {
14036  // This test doesn't generate any code, but it verifies some invariants
14037  // related to IsValid().
14038  VIXL_CHECK(!NoReg.IsValid());
14039  VIXL_CHECK(!NoFPReg.IsValid());
14040  VIXL_CHECK(!NoVReg.IsValid());
14041  VIXL_CHECK(!NoCPUReg.IsValid());
14042
14043  VIXL_CHECK(x0.IsValid());
14044  VIXL_CHECK(w0.IsValid());
14045  VIXL_CHECK(x30.IsValid());
14046  VIXL_CHECK(w30.IsValid());
14047  VIXL_CHECK(xzr.IsValid());
14048  VIXL_CHECK(wzr.IsValid());
14049
14050  VIXL_CHECK(sp.IsValid());
14051  VIXL_CHECK(wsp.IsValid());
14052
14053  VIXL_CHECK(d0.IsValid());
14054  VIXL_CHECK(s0.IsValid());
14055  VIXL_CHECK(d31.IsValid());
14056  VIXL_CHECK(s31.IsValid());
14057
14058  VIXL_CHECK(x0.IsValidRegister());
14059  VIXL_CHECK(w0.IsValidRegister());
14060  VIXL_CHECK(xzr.IsValidRegister());
14061  VIXL_CHECK(wzr.IsValidRegister());
14062  VIXL_CHECK(sp.IsValidRegister());
14063  VIXL_CHECK(wsp.IsValidRegister());
14064  VIXL_CHECK(!x0.IsValidFPRegister());
14065  VIXL_CHECK(!w0.IsValidFPRegister());
14066  VIXL_CHECK(!xzr.IsValidFPRegister());
14067  VIXL_CHECK(!wzr.IsValidFPRegister());
14068  VIXL_CHECK(!sp.IsValidFPRegister());
14069  VIXL_CHECK(!wsp.IsValidFPRegister());
14070
14071  VIXL_CHECK(d0.IsValidFPRegister());
14072  VIXL_CHECK(s0.IsValidFPRegister());
14073  VIXL_CHECK(!d0.IsValidRegister());
14074  VIXL_CHECK(!s0.IsValidRegister());
14075
14076  // Test the same as before, but using CPURegister types. This shouldn't make
14077  // any difference.
14078  VIXL_CHECK(static_cast<CPURegister>(x0).IsValid());
14079  VIXL_CHECK(static_cast<CPURegister>(w0).IsValid());
14080  VIXL_CHECK(static_cast<CPURegister>(x30).IsValid());
14081  VIXL_CHECK(static_cast<CPURegister>(w30).IsValid());
14082  VIXL_CHECK(static_cast<CPURegister>(xzr).IsValid());
14083  VIXL_CHECK(static_cast<CPURegister>(wzr).IsValid());
14084
14085  VIXL_CHECK(static_cast<CPURegister>(sp).IsValid());
14086  VIXL_CHECK(static_cast<CPURegister>(wsp).IsValid());
14087
14088  VIXL_CHECK(static_cast<CPURegister>(d0).IsValid());
14089  VIXL_CHECK(static_cast<CPURegister>(s0).IsValid());
14090  VIXL_CHECK(static_cast<CPURegister>(d31).IsValid());
14091  VIXL_CHECK(static_cast<CPURegister>(s31).IsValid());
14092
14093  VIXL_CHECK(static_cast<CPURegister>(x0).IsValidRegister());
14094  VIXL_CHECK(static_cast<CPURegister>(w0).IsValidRegister());
14095  VIXL_CHECK(static_cast<CPURegister>(xzr).IsValidRegister());
14096  VIXL_CHECK(static_cast<CPURegister>(wzr).IsValidRegister());
14097  VIXL_CHECK(static_cast<CPURegister>(sp).IsValidRegister());
14098  VIXL_CHECK(static_cast<CPURegister>(wsp).IsValidRegister());
14099  VIXL_CHECK(!static_cast<CPURegister>(x0).IsValidFPRegister());
14100  VIXL_CHECK(!static_cast<CPURegister>(w0).IsValidFPRegister());
14101  VIXL_CHECK(!static_cast<CPURegister>(xzr).IsValidFPRegister());
14102  VIXL_CHECK(!static_cast<CPURegister>(wzr).IsValidFPRegister());
14103  VIXL_CHECK(!static_cast<CPURegister>(sp).IsValidFPRegister());
14104  VIXL_CHECK(!static_cast<CPURegister>(wsp).IsValidFPRegister());
14105
14106  VIXL_CHECK(static_cast<CPURegister>(d0).IsValidFPRegister());
14107  VIXL_CHECK(static_cast<CPURegister>(s0).IsValidFPRegister());
14108  VIXL_CHECK(!static_cast<CPURegister>(d0).IsValidRegister());
14109  VIXL_CHECK(!static_cast<CPURegister>(s0).IsValidRegister());
14110}
14111
14112
14113TEST(printf) {
14114  SETUP();
14115  START();
14116
14117  char const * test_plain_string = "Printf with no arguments.\n";
14118  char const * test_substring = "'This is a substring.'";
14119  RegisterDump before;
14120
14121  // Initialize x29 to the value of the stack pointer. We will use x29 as a
14122  // temporary stack pointer later, and initializing it in this way allows the
14123  // RegisterDump check to pass.
14124  __ Mov(x29, __ StackPointer());
14125
14126  // Test simple integer arguments.
14127  __ Mov(x0, 1234);
14128  __ Mov(x1, 0x1234);
14129
14130  // Test simple floating-point arguments.
14131  __ Fmov(d0, 1.234);
14132
14133  // Test pointer (string) arguments.
14134  __ Mov(x2, reinterpret_cast<uintptr_t>(test_substring));
14135
14136  // Test the maximum number of arguments, and sign extension.
14137  __ Mov(w3, 0xffffffff);
14138  __ Mov(w4, 0xffffffff);
14139  __ Mov(x5, 0xffffffffffffffff);
14140  __ Mov(x6, 0xffffffffffffffff);
14141  __ Fmov(s1, 1.234);
14142  __ Fmov(s2, 2.345);
14143  __ Fmov(d3, 3.456);
14144  __ Fmov(d4, 4.567);
14145
14146  // Test printing callee-saved registers.
14147  __ Mov(x28, 0x123456789abcdef);
14148  __ Fmov(d10, 42.0);
14149
14150  // Test with three arguments.
14151  __ Mov(x10, 3);
14152  __ Mov(x11, 40);
14153  __ Mov(x12, 500);
14154
14155  // A single character.
14156  __ Mov(w13, 'x');
14157
14158  // Check that we don't clobber any registers.
14159  before.Dump(&masm);
14160
14161  __ Printf(test_plain_string);   // NOLINT(runtime/printf)
14162  __ Printf("x0: %" PRId64 ", x1: 0x%08" PRIx64 "\n", x0, x1);
14163  __ Printf("w5: %" PRId32 ", x5: %" PRId64"\n", w5, x5);
14164  __ Printf("d0: %f\n", d0);
14165  __ Printf("Test %%s: %s\n", x2);
14166  __ Printf("w3(uint32): %" PRIu32 "\nw4(int32): %" PRId32 "\n"
14167            "x5(uint64): %" PRIu64 "\nx6(int64): %" PRId64 "\n",
14168            w3, w4, x5, x6);
14169  __ Printf("%%f: %f\n%%g: %g\n%%e: %e\n%%E: %E\n", s1, s2, d3, d4);
14170  __ Printf("0x%" PRIx32 ", 0x%" PRIx64 "\n", w28, x28);
14171  __ Printf("%g\n", d10);
14172  __ Printf("%%%%%s%%%c%%\n", x2, w13);
14173
14174  // Print the stack pointer (sp).
14175  __ Printf("StackPointer(sp): 0x%016" PRIx64 ", 0x%08" PRIx32 "\n",
14176            __ StackPointer(), __ StackPointer().W());
14177
14178  // Test with a different stack pointer.
14179  const Register old_stack_pointer = __ StackPointer();
14180  __ Mov(x29, old_stack_pointer);
14181  __ SetStackPointer(x29);
14182  // Print the stack pointer (not sp).
14183  __ Printf("StackPointer(not sp): 0x%016" PRIx64 ", 0x%08" PRIx32 "\n",
14184            __ StackPointer(), __ StackPointer().W());
14185  __ Mov(old_stack_pointer, __ StackPointer());
14186  __ SetStackPointer(old_stack_pointer);
14187
14188  // Test with three arguments.
14189  __ Printf("3=%u, 4=%u, 5=%u\n", x10, x11, x12);
14190
14191  // Mixed argument types.
14192  __ Printf("w3: %" PRIu32 ", s1: %f, x5: %" PRIu64 ", d3: %f\n",
14193            w3, s1, x5, d3);
14194  __ Printf("s1: %f, d3: %f, w3: %" PRId32 ", x5: %" PRId64 "\n",
14195            s1, d3, w3, x5);
14196
14197  END();
14198  RUN();
14199
14200  // We cannot easily test the output of the Printf sequences, and because
14201  // Printf preserves all registers by default, we can't look at the number of
14202  // bytes that were printed. However, the printf_no_preserve test should check
14203  // that, and here we just test that we didn't clobber any registers.
14204  ASSERT_EQUAL_REGISTERS(before);
14205
14206  TEARDOWN();
14207}
14208
14209
14210TEST(printf_no_preserve) {
14211  SETUP();
14212  START();
14213
14214  char const * test_plain_string = "Printf with no arguments.\n";
14215  char const * test_substring = "'This is a substring.'";
14216
14217  __ PrintfNoPreserve(test_plain_string);
14218  __ Mov(x19, x0);
14219
14220  // Test simple integer arguments.
14221  __ Mov(x0, 1234);
14222  __ Mov(x1, 0x1234);
14223  __ PrintfNoPreserve("x0: %" PRId64", x1: 0x%08" PRIx64 "\n", x0, x1);
14224  __ Mov(x20, x0);
14225
14226  // Test simple floating-point arguments.
14227  __ Fmov(d0, 1.234);
14228  __ PrintfNoPreserve("d0: %f\n", d0);
14229  __ Mov(x21, x0);
14230
14231  // Test pointer (string) arguments.
14232  __ Mov(x2, reinterpret_cast<uintptr_t>(test_substring));
14233  __ PrintfNoPreserve("Test %%s: %s\n", x2);
14234  __ Mov(x22, x0);
14235
14236  // Test the maximum number of arguments, and sign extension.
14237  __ Mov(w3, 0xffffffff);
14238  __ Mov(w4, 0xffffffff);
14239  __ Mov(x5, 0xffffffffffffffff);
14240  __ Mov(x6, 0xffffffffffffffff);
14241  __ PrintfNoPreserve("w3(uint32): %" PRIu32 "\nw4(int32): %" PRId32 "\n"
14242                      "x5(uint64): %" PRIu64 "\nx6(int64): %" PRId64 "\n",
14243                      w3, w4, x5, x6);
14244  __ Mov(x23, x0);
14245
14246  __ Fmov(s1, 1.234);
14247  __ Fmov(s2, 2.345);
14248  __ Fmov(d3, 3.456);
14249  __ Fmov(d4, 4.567);
14250  __ PrintfNoPreserve("%%f: %f\n%%g: %g\n%%e: %e\n%%E: %E\n", s1, s2, d3, d4);
14251  __ Mov(x24, x0);
14252
14253  // Test printing callee-saved registers.
14254  __ Mov(x28, 0x123456789abcdef);
14255  __ PrintfNoPreserve("0x%" PRIx32 ", 0x%" PRIx64 "\n", w28, x28);
14256  __ Mov(x25, x0);
14257
14258  __ Fmov(d10, 42.0);
14259  __ PrintfNoPreserve("%g\n", d10);
14260  __ Mov(x26, x0);
14261
14262  // Test with a different stack pointer.
14263  const Register old_stack_pointer = __ StackPointer();
14264  __ Mov(x29, old_stack_pointer);
14265  __ SetStackPointer(x29);
14266  // Print the stack pointer (not sp).
14267  __ PrintfNoPreserve(
14268      "StackPointer(not sp): 0x%016" PRIx64 ", 0x%08" PRIx32 "\n",
14269      __ StackPointer(), __ StackPointer().W());
14270  __ Mov(x27, x0);
14271  __ Mov(old_stack_pointer, __ StackPointer());
14272  __ SetStackPointer(old_stack_pointer);
14273
14274  // Test with three arguments.
14275  __ Mov(x3, 3);
14276  __ Mov(x4, 40);
14277  __ Mov(x5, 500);
14278  __ PrintfNoPreserve("3=%u, 4=%u, 5=%u\n", x3, x4, x5);
14279  __ Mov(x28, x0);
14280
14281  // Mixed argument types.
14282  __ Mov(w3, 0xffffffff);
14283  __ Fmov(s1, 1.234);
14284  __ Mov(x5, 0xffffffffffffffff);
14285  __ Fmov(d3, 3.456);
14286  __ PrintfNoPreserve("w3: %" PRIu32 ", s1: %f, x5: %" PRIu64 ", d3: %f\n",
14287                      w3, s1, x5, d3);
14288  __ Mov(x29, x0);
14289
14290  END();
14291  RUN();
14292
14293  // We cannot easily test the exact output of the Printf sequences, but we can
14294  // use the return code to check that the string length was correct.
14295
14296  // Printf with no arguments.
14297  ASSERT_EQUAL_64(strlen(test_plain_string), x19);
14298  // x0: 1234, x1: 0x00001234
14299  ASSERT_EQUAL_64(25, x20);
14300  // d0: 1.234000
14301  ASSERT_EQUAL_64(13, x21);
14302  // Test %s: 'This is a substring.'
14303  ASSERT_EQUAL_64(32, x22);
14304  // w3(uint32): 4294967295
14305  // w4(int32): -1
14306  // x5(uint64): 18446744073709551615
14307  // x6(int64): -1
14308  ASSERT_EQUAL_64(23 + 14 + 33 + 14, x23);
14309  // %f: 1.234000
14310  // %g: 2.345
14311  // %e: 3.456000e+00
14312  // %E: 4.567000E+00
14313  ASSERT_EQUAL_64(13 + 10 + 17 + 17, x24);
14314  // 0x89abcdef, 0x123456789abcdef
14315  ASSERT_EQUAL_64(30, x25);
14316  // 42
14317  ASSERT_EQUAL_64(3, x26);
14318  // StackPointer(not sp): 0x00007fb037ae2370, 0x37ae2370
14319  // Note: This is an example value, but the field width is fixed here so the
14320  // string length is still predictable.
14321  ASSERT_EQUAL_64(53, x27);
14322  // 3=3, 4=40, 5=500
14323  ASSERT_EQUAL_64(17, x28);
14324  // w3: 4294967295, s1: 1.234000, x5: 18446744073709551615, d3: 3.456000
14325  ASSERT_EQUAL_64(69, x29);
14326
14327  TEARDOWN();
14328}
14329
14330
14331#ifndef VIXL_INCLUDE_SIMULATOR
14332TEST(trace) {
14333  // The Trace helper should not generate any code unless the simulator (or
14334  // debugger) is being used.
14335  SETUP();
14336  START();
14337
14338  Label start;
14339  __ Bind(&start);
14340  __ Trace(LOG_ALL, TRACE_ENABLE);
14341  __ Trace(LOG_ALL, TRACE_DISABLE);
14342  VIXL_CHECK(__ SizeOfCodeGeneratedSince(&start) == 0);
14343
14344  END();
14345  TEARDOWN();
14346}
14347#endif
14348
14349
14350#ifndef VIXL_INCLUDE_SIMULATOR
14351TEST(log) {
14352  // The Log helper should not generate any code unless the simulator (or
14353  // debugger) is being used.
14354  SETUP();
14355  START();
14356
14357  Label start;
14358  __ Bind(&start);
14359  __ Log(LOG_ALL);
14360  VIXL_CHECK(__ SizeOfCodeGeneratedSince(&start) == 0);
14361
14362  END();
14363  TEARDOWN();
14364}
14365#endif
14366
14367
14368TEST(instruction_accurate_scope) {
14369  SETUP();
14370  START();
14371
14372  // By default macro instructions are allowed.
14373  VIXL_ASSERT(masm.AllowMacroInstructions());
14374  {
14375    InstructionAccurateScope scope1(&masm, 2);
14376    VIXL_ASSERT(!masm.AllowMacroInstructions());
14377    __ nop();
14378    {
14379      InstructionAccurateScope scope2(&masm, 1);
14380      VIXL_ASSERT(!masm.AllowMacroInstructions());
14381      __ nop();
14382    }
14383    VIXL_ASSERT(!masm.AllowMacroInstructions());
14384  }
14385  VIXL_ASSERT(masm.AllowMacroInstructions());
14386
14387  {
14388    InstructionAccurateScope scope(&masm, 2);
14389    __ add(x0, x0, x0);
14390    __ sub(x0, x0, x0);
14391  }
14392
14393  END();
14394  RUN();
14395  TEARDOWN();
14396}
14397
14398
14399TEST(blr_lr) {
14400  // A simple test to check that the simulator correcty handle "blr lr".
14401  SETUP();
14402
14403  START();
14404  Label target;
14405  Label end;
14406
14407  __ Mov(x0, 0x0);
14408  __ Adr(lr, &target);
14409
14410  __ Blr(lr);
14411  __ Mov(x0, 0xdeadbeef);
14412  __ B(&end);
14413
14414  __ Bind(&target);
14415  __ Mov(x0, 0xc001c0de);
14416
14417  __ Bind(&end);
14418  END();
14419
14420  RUN();
14421
14422  ASSERT_EQUAL_64(0xc001c0de, x0);
14423
14424  TEARDOWN();
14425}
14426
14427
14428TEST(barriers) {
14429  // Generate all supported barriers, this is just a smoke test
14430  SETUP();
14431
14432  START();
14433
14434  // DMB
14435  __ Dmb(FullSystem, BarrierAll);
14436  __ Dmb(FullSystem, BarrierReads);
14437  __ Dmb(FullSystem, BarrierWrites);
14438  __ Dmb(FullSystem, BarrierOther);
14439
14440  __ Dmb(InnerShareable, BarrierAll);
14441  __ Dmb(InnerShareable, BarrierReads);
14442  __ Dmb(InnerShareable, BarrierWrites);
14443  __ Dmb(InnerShareable, BarrierOther);
14444
14445  __ Dmb(NonShareable, BarrierAll);
14446  __ Dmb(NonShareable, BarrierReads);
14447  __ Dmb(NonShareable, BarrierWrites);
14448  __ Dmb(NonShareable, BarrierOther);
14449
14450  __ Dmb(OuterShareable, BarrierAll);
14451  __ Dmb(OuterShareable, BarrierReads);
14452  __ Dmb(OuterShareable, BarrierWrites);
14453  __ Dmb(OuterShareable, BarrierOther);
14454
14455  // DSB
14456  __ Dsb(FullSystem, BarrierAll);
14457  __ Dsb(FullSystem, BarrierReads);
14458  __ Dsb(FullSystem, BarrierWrites);
14459  __ Dsb(FullSystem, BarrierOther);
14460
14461  __ Dsb(InnerShareable, BarrierAll);
14462  __ Dsb(InnerShareable, BarrierReads);
14463  __ Dsb(InnerShareable, BarrierWrites);
14464  __ Dsb(InnerShareable, BarrierOther);
14465
14466  __ Dsb(NonShareable, BarrierAll);
14467  __ Dsb(NonShareable, BarrierReads);
14468  __ Dsb(NonShareable, BarrierWrites);
14469  __ Dsb(NonShareable, BarrierOther);
14470
14471  __ Dsb(OuterShareable, BarrierAll);
14472  __ Dsb(OuterShareable, BarrierReads);
14473  __ Dsb(OuterShareable, BarrierWrites);
14474  __ Dsb(OuterShareable, BarrierOther);
14475
14476  // ISB
14477  __ Isb();
14478
14479  END();
14480
14481  RUN();
14482
14483  TEARDOWN();
14484}
14485
14486
14487TEST(process_nan_double) {
14488  // Make sure that NaN propagation works correctly.
14489  double sn = rawbits_to_double(0x7ff5555511111111);
14490  double qn = rawbits_to_double(0x7ffaaaaa11111111);
14491  VIXL_ASSERT(IsSignallingNaN(sn));
14492  VIXL_ASSERT(IsQuietNaN(qn));
14493
14494  // The input NaNs after passing through ProcessNaN.
14495  double sn_proc = rawbits_to_double(0x7ffd555511111111);
14496  double qn_proc = qn;
14497  VIXL_ASSERT(IsQuietNaN(sn_proc));
14498  VIXL_ASSERT(IsQuietNaN(qn_proc));
14499
14500  SETUP();
14501  START();
14502
14503  // Execute a number of instructions which all use ProcessNaN, and check that
14504  // they all handle the NaN correctly.
14505  __ Fmov(d0, sn);
14506  __ Fmov(d10, qn);
14507
14508  // Operations that always propagate NaNs unchanged, even signalling NaNs.
14509  //   - Signalling NaN
14510  __ Fmov(d1, d0);
14511  __ Fabs(d2, d0);
14512  __ Fneg(d3, d0);
14513  //   - Quiet NaN
14514  __ Fmov(d11, d10);
14515  __ Fabs(d12, d10);
14516  __ Fneg(d13, d10);
14517
14518  // Operations that use ProcessNaN.
14519  //   - Signalling NaN
14520  __ Fsqrt(d4, d0);
14521  __ Frinta(d5, d0);
14522  __ Frintn(d6, d0);
14523  __ Frintz(d7, d0);
14524  //   - Quiet NaN
14525  __ Fsqrt(d14, d10);
14526  __ Frinta(d15, d10);
14527  __ Frintn(d16, d10);
14528  __ Frintz(d17, d10);
14529
14530  // The behaviour of fcvt is checked in TEST(fcvt_sd).
14531
14532  END();
14533  RUN();
14534
14535  uint64_t qn_raw = double_to_rawbits(qn);
14536  uint64_t sn_raw = double_to_rawbits(sn);
14537
14538  //   - Signalling NaN
14539  ASSERT_EQUAL_FP64(sn, d1);
14540  ASSERT_EQUAL_FP64(rawbits_to_double(sn_raw & ~kDSignMask), d2);
14541  ASSERT_EQUAL_FP64(rawbits_to_double(sn_raw ^ kDSignMask), d3);
14542  //   - Quiet NaN
14543  ASSERT_EQUAL_FP64(qn, d11);
14544  ASSERT_EQUAL_FP64(rawbits_to_double(qn_raw & ~kDSignMask), d12);
14545  ASSERT_EQUAL_FP64(rawbits_to_double(qn_raw ^ kDSignMask), d13);
14546
14547  //   - Signalling NaN
14548  ASSERT_EQUAL_FP64(sn_proc, d4);
14549  ASSERT_EQUAL_FP64(sn_proc, d5);
14550  ASSERT_EQUAL_FP64(sn_proc, d6);
14551  ASSERT_EQUAL_FP64(sn_proc, d7);
14552  //   - Quiet NaN
14553  ASSERT_EQUAL_FP64(qn_proc, d14);
14554  ASSERT_EQUAL_FP64(qn_proc, d15);
14555  ASSERT_EQUAL_FP64(qn_proc, d16);
14556  ASSERT_EQUAL_FP64(qn_proc, d17);
14557
14558  TEARDOWN();
14559}
14560
14561
14562TEST(process_nan_float) {
14563  // Make sure that NaN propagation works correctly.
14564  float sn = rawbits_to_float(0x7f951111);
14565  float qn = rawbits_to_float(0x7fea1111);
14566  VIXL_ASSERT(IsSignallingNaN(sn));
14567  VIXL_ASSERT(IsQuietNaN(qn));
14568
14569  // The input NaNs after passing through ProcessNaN.
14570  float sn_proc = rawbits_to_float(0x7fd51111);
14571  float qn_proc = qn;
14572  VIXL_ASSERT(IsQuietNaN(sn_proc));
14573  VIXL_ASSERT(IsQuietNaN(qn_proc));
14574
14575  SETUP();
14576  START();
14577
14578  // Execute a number of instructions which all use ProcessNaN, and check that
14579  // they all handle the NaN correctly.
14580  __ Fmov(s0, sn);
14581  __ Fmov(s10, qn);
14582
14583  // Operations that always propagate NaNs unchanged, even signalling NaNs.
14584  //   - Signalling NaN
14585  __ Fmov(s1, s0);
14586  __ Fabs(s2, s0);
14587  __ Fneg(s3, s0);
14588  //   - Quiet NaN
14589  __ Fmov(s11, s10);
14590  __ Fabs(s12, s10);
14591  __ Fneg(s13, s10);
14592
14593  // Operations that use ProcessNaN.
14594  //   - Signalling NaN
14595  __ Fsqrt(s4, s0);
14596  __ Frinta(s5, s0);
14597  __ Frintn(s6, s0);
14598  __ Frintz(s7, s0);
14599  //   - Quiet NaN
14600  __ Fsqrt(s14, s10);
14601  __ Frinta(s15, s10);
14602  __ Frintn(s16, s10);
14603  __ Frintz(s17, s10);
14604
14605  // The behaviour of fcvt is checked in TEST(fcvt_sd).
14606
14607  END();
14608  RUN();
14609
14610  uint32_t qn_raw = float_to_rawbits(qn);
14611  uint32_t sn_raw = float_to_rawbits(sn);
14612
14613  //   - Signalling NaN
14614  ASSERT_EQUAL_FP32(sn, s1);
14615  ASSERT_EQUAL_FP32(rawbits_to_float(sn_raw & ~kSSignMask), s2);
14616  ASSERT_EQUAL_FP32(rawbits_to_float(sn_raw ^ kSSignMask), s3);
14617  //   - Quiet NaN
14618  ASSERT_EQUAL_FP32(qn, s11);
14619  ASSERT_EQUAL_FP32(rawbits_to_float(qn_raw & ~kSSignMask), s12);
14620  ASSERT_EQUAL_FP32(rawbits_to_float(qn_raw ^ kSSignMask), s13);
14621
14622  //   - Signalling NaN
14623  ASSERT_EQUAL_FP32(sn_proc, s4);
14624  ASSERT_EQUAL_FP32(sn_proc, s5);
14625  ASSERT_EQUAL_FP32(sn_proc, s6);
14626  ASSERT_EQUAL_FP32(sn_proc, s7);
14627  //   - Quiet NaN
14628  ASSERT_EQUAL_FP32(qn_proc, s14);
14629  ASSERT_EQUAL_FP32(qn_proc, s15);
14630  ASSERT_EQUAL_FP32(qn_proc, s16);
14631  ASSERT_EQUAL_FP32(qn_proc, s17);
14632
14633  TEARDOWN();
14634}
14635
14636
14637static void ProcessNaNsHelper(double n, double m, double expected) {
14638  VIXL_ASSERT(std::isnan(n) || std::isnan(m));
14639  VIXL_ASSERT(std::isnan(expected));
14640
14641  SETUP();
14642  START();
14643
14644  // Execute a number of instructions which all use ProcessNaNs, and check that
14645  // they all propagate NaNs correctly.
14646  __ Fmov(d0, n);
14647  __ Fmov(d1, m);
14648
14649  __ Fadd(d2, d0, d1);
14650  __ Fsub(d3, d0, d1);
14651  __ Fmul(d4, d0, d1);
14652  __ Fdiv(d5, d0, d1);
14653  __ Fmax(d6, d0, d1);
14654  __ Fmin(d7, d0, d1);
14655
14656  END();
14657  RUN();
14658
14659  ASSERT_EQUAL_FP64(expected, d2);
14660  ASSERT_EQUAL_FP64(expected, d3);
14661  ASSERT_EQUAL_FP64(expected, d4);
14662  ASSERT_EQUAL_FP64(expected, d5);
14663  ASSERT_EQUAL_FP64(expected, d6);
14664  ASSERT_EQUAL_FP64(expected, d7);
14665
14666  TEARDOWN();
14667}
14668
14669
14670TEST(process_nans_double) {
14671  // Make sure that NaN propagation works correctly.
14672  double sn = rawbits_to_double(0x7ff5555511111111);
14673  double sm = rawbits_to_double(0x7ff5555522222222);
14674  double qn = rawbits_to_double(0x7ffaaaaa11111111);
14675  double qm = rawbits_to_double(0x7ffaaaaa22222222);
14676  VIXL_ASSERT(IsSignallingNaN(sn));
14677  VIXL_ASSERT(IsSignallingNaN(sm));
14678  VIXL_ASSERT(IsQuietNaN(qn));
14679  VIXL_ASSERT(IsQuietNaN(qm));
14680
14681  // The input NaNs after passing through ProcessNaN.
14682  double sn_proc = rawbits_to_double(0x7ffd555511111111);
14683  double sm_proc = rawbits_to_double(0x7ffd555522222222);
14684  double qn_proc = qn;
14685  double qm_proc = qm;
14686  VIXL_ASSERT(IsQuietNaN(sn_proc));
14687  VIXL_ASSERT(IsQuietNaN(sm_proc));
14688  VIXL_ASSERT(IsQuietNaN(qn_proc));
14689  VIXL_ASSERT(IsQuietNaN(qm_proc));
14690
14691  // Quiet NaNs are propagated.
14692  ProcessNaNsHelper(qn, 0, qn_proc);
14693  ProcessNaNsHelper(0, qm, qm_proc);
14694  ProcessNaNsHelper(qn, qm, qn_proc);
14695
14696  // Signalling NaNs are propagated, and made quiet.
14697  ProcessNaNsHelper(sn, 0, sn_proc);
14698  ProcessNaNsHelper(0, sm, sm_proc);
14699  ProcessNaNsHelper(sn, sm, sn_proc);
14700
14701  // Signalling NaNs take precedence over quiet NaNs.
14702  ProcessNaNsHelper(sn, qm, sn_proc);
14703  ProcessNaNsHelper(qn, sm, sm_proc);
14704  ProcessNaNsHelper(sn, sm, sn_proc);
14705}
14706
14707
14708static void ProcessNaNsHelper(float n, float m, float expected) {
14709  VIXL_ASSERT(std::isnan(n) || std::isnan(m));
14710  VIXL_ASSERT(std::isnan(expected));
14711
14712  SETUP();
14713  START();
14714
14715  // Execute a number of instructions which all use ProcessNaNs, and check that
14716  // they all propagate NaNs correctly.
14717  __ Fmov(s0, n);
14718  __ Fmov(s1, m);
14719
14720  __ Fadd(s2, s0, s1);
14721  __ Fsub(s3, s0, s1);
14722  __ Fmul(s4, s0, s1);
14723  __ Fdiv(s5, s0, s1);
14724  __ Fmax(s6, s0, s1);
14725  __ Fmin(s7, s0, s1);
14726
14727  END();
14728  RUN();
14729
14730  ASSERT_EQUAL_FP32(expected, s2);
14731  ASSERT_EQUAL_FP32(expected, s3);
14732  ASSERT_EQUAL_FP32(expected, s4);
14733  ASSERT_EQUAL_FP32(expected, s5);
14734  ASSERT_EQUAL_FP32(expected, s6);
14735  ASSERT_EQUAL_FP32(expected, s7);
14736
14737  TEARDOWN();
14738}
14739
14740
14741TEST(process_nans_float) {
14742  // Make sure that NaN propagation works correctly.
14743  float sn = rawbits_to_float(0x7f951111);
14744  float sm = rawbits_to_float(0x7f952222);
14745  float qn = rawbits_to_float(0x7fea1111);
14746  float qm = rawbits_to_float(0x7fea2222);
14747  VIXL_ASSERT(IsSignallingNaN(sn));
14748  VIXL_ASSERT(IsSignallingNaN(sm));
14749  VIXL_ASSERT(IsQuietNaN(qn));
14750  VIXL_ASSERT(IsQuietNaN(qm));
14751
14752  // The input NaNs after passing through ProcessNaN.
14753  float sn_proc = rawbits_to_float(0x7fd51111);
14754  float sm_proc = rawbits_to_float(0x7fd52222);
14755  float qn_proc = qn;
14756  float qm_proc = qm;
14757  VIXL_ASSERT(IsQuietNaN(sn_proc));
14758  VIXL_ASSERT(IsQuietNaN(sm_proc));
14759  VIXL_ASSERT(IsQuietNaN(qn_proc));
14760  VIXL_ASSERT(IsQuietNaN(qm_proc));
14761
14762  // Quiet NaNs are propagated.
14763  ProcessNaNsHelper(qn, 0, qn_proc);
14764  ProcessNaNsHelper(0, qm, qm_proc);
14765  ProcessNaNsHelper(qn, qm, qn_proc);
14766
14767  // Signalling NaNs are propagated, and made quiet.
14768  ProcessNaNsHelper(sn, 0, sn_proc);
14769  ProcessNaNsHelper(0, sm, sm_proc);
14770  ProcessNaNsHelper(sn, sm, sn_proc);
14771
14772  // Signalling NaNs take precedence over quiet NaNs.
14773  ProcessNaNsHelper(sn, qm, sn_proc);
14774  ProcessNaNsHelper(qn, sm, sm_proc);
14775  ProcessNaNsHelper(sn, sm, sn_proc);
14776}
14777
14778
14779static void DefaultNaNHelper(float n, float m, float a) {
14780  VIXL_ASSERT(std::isnan(n) || std::isnan(m) || std::isnan(a));
14781
14782  bool test_1op = std::isnan(n);
14783  bool test_2op = std::isnan(n) || std::isnan(m);
14784
14785  SETUP();
14786  START();
14787
14788  // Enable Default-NaN mode in the FPCR.
14789  __ Mrs(x0, FPCR);
14790  __ Orr(x1, x0, DN_mask);
14791  __ Msr(FPCR, x1);
14792
14793  // Execute a number of instructions which all use ProcessNaNs, and check that
14794  // they all produce the default NaN.
14795  __ Fmov(s0, n);
14796  __ Fmov(s1, m);
14797  __ Fmov(s2, a);
14798
14799  if (test_1op) {
14800    // Operations that always propagate NaNs unchanged, even signalling NaNs.
14801    __ Fmov(s10, s0);
14802    __ Fabs(s11, s0);
14803    __ Fneg(s12, s0);
14804
14805    // Operations that use ProcessNaN.
14806    __ Fsqrt(s13, s0);
14807    __ Frinta(s14, s0);
14808    __ Frintn(s15, s0);
14809    __ Frintz(s16, s0);
14810
14811    // Fcvt usually has special NaN handling, but it respects default-NaN mode.
14812    __ Fcvt(d17, s0);
14813  }
14814
14815  if (test_2op) {
14816    __ Fadd(s18, s0, s1);
14817    __ Fsub(s19, s0, s1);
14818    __ Fmul(s20, s0, s1);
14819    __ Fdiv(s21, s0, s1);
14820    __ Fmax(s22, s0, s1);
14821    __ Fmin(s23, s0, s1);
14822  }
14823
14824  __ Fmadd(s24, s0, s1, s2);
14825  __ Fmsub(s25, s0, s1, s2);
14826  __ Fnmadd(s26, s0, s1, s2);
14827  __ Fnmsub(s27, s0, s1, s2);
14828
14829  // Restore FPCR.
14830  __ Msr(FPCR, x0);
14831
14832  END();
14833  RUN();
14834
14835  if (test_1op) {
14836    uint32_t n_raw = float_to_rawbits(n);
14837    ASSERT_EQUAL_FP32(n, s10);
14838    ASSERT_EQUAL_FP32(rawbits_to_float(n_raw & ~kSSignMask), s11);
14839    ASSERT_EQUAL_FP32(rawbits_to_float(n_raw ^ kSSignMask), s12);
14840    ASSERT_EQUAL_FP32(kFP32DefaultNaN, s13);
14841    ASSERT_EQUAL_FP32(kFP32DefaultNaN, s14);
14842    ASSERT_EQUAL_FP32(kFP32DefaultNaN, s15);
14843    ASSERT_EQUAL_FP32(kFP32DefaultNaN, s16);
14844    ASSERT_EQUAL_FP64(kFP64DefaultNaN, d17);
14845  }
14846
14847  if (test_2op) {
14848    ASSERT_EQUAL_FP32(kFP32DefaultNaN, s18);
14849    ASSERT_EQUAL_FP32(kFP32DefaultNaN, s19);
14850    ASSERT_EQUAL_FP32(kFP32DefaultNaN, s20);
14851    ASSERT_EQUAL_FP32(kFP32DefaultNaN, s21);
14852    ASSERT_EQUAL_FP32(kFP32DefaultNaN, s22);
14853    ASSERT_EQUAL_FP32(kFP32DefaultNaN, s23);
14854  }
14855
14856  ASSERT_EQUAL_FP32(kFP32DefaultNaN, s24);
14857  ASSERT_EQUAL_FP32(kFP32DefaultNaN, s25);
14858  ASSERT_EQUAL_FP32(kFP32DefaultNaN, s26);
14859  ASSERT_EQUAL_FP32(kFP32DefaultNaN, s27);
14860
14861  TEARDOWN();
14862}
14863
14864
14865TEST(default_nan_float) {
14866  float sn = rawbits_to_float(0x7f951111);
14867  float sm = rawbits_to_float(0x7f952222);
14868  float sa = rawbits_to_float(0x7f95aaaa);
14869  float qn = rawbits_to_float(0x7fea1111);
14870  float qm = rawbits_to_float(0x7fea2222);
14871  float qa = rawbits_to_float(0x7feaaaaa);
14872  VIXL_ASSERT(IsSignallingNaN(sn));
14873  VIXL_ASSERT(IsSignallingNaN(sm));
14874  VIXL_ASSERT(IsSignallingNaN(sa));
14875  VIXL_ASSERT(IsQuietNaN(qn));
14876  VIXL_ASSERT(IsQuietNaN(qm));
14877  VIXL_ASSERT(IsQuietNaN(qa));
14878
14879  //   - Signalling NaNs
14880  DefaultNaNHelper(sn, 0.0f, 0.0f);
14881  DefaultNaNHelper(0.0f, sm, 0.0f);
14882  DefaultNaNHelper(0.0f, 0.0f, sa);
14883  DefaultNaNHelper(sn, sm, 0.0f);
14884  DefaultNaNHelper(0.0f, sm, sa);
14885  DefaultNaNHelper(sn, 0.0f, sa);
14886  DefaultNaNHelper(sn, sm, sa);
14887  //   - Quiet NaNs
14888  DefaultNaNHelper(qn, 0.0f, 0.0f);
14889  DefaultNaNHelper(0.0f, qm, 0.0f);
14890  DefaultNaNHelper(0.0f, 0.0f, qa);
14891  DefaultNaNHelper(qn, qm, 0.0f);
14892  DefaultNaNHelper(0.0f, qm, qa);
14893  DefaultNaNHelper(qn, 0.0f, qa);
14894  DefaultNaNHelper(qn, qm, qa);
14895  //   - Mixed NaNs
14896  DefaultNaNHelper(qn, sm, sa);
14897  DefaultNaNHelper(sn, qm, sa);
14898  DefaultNaNHelper(sn, sm, qa);
14899  DefaultNaNHelper(qn, qm, sa);
14900  DefaultNaNHelper(sn, qm, qa);
14901  DefaultNaNHelper(qn, sm, qa);
14902  DefaultNaNHelper(qn, qm, qa);
14903}
14904
14905
14906static void DefaultNaNHelper(double n, double m, double a) {
14907  VIXL_ASSERT(std::isnan(n) || std::isnan(m) || std::isnan(a));
14908
14909  bool test_1op = std::isnan(n);
14910  bool test_2op = std::isnan(n) || std::isnan(m);
14911
14912  SETUP();
14913  START();
14914
14915  // Enable Default-NaN mode in the FPCR.
14916  __ Mrs(x0, FPCR);
14917  __ Orr(x1, x0, DN_mask);
14918  __ Msr(FPCR, x1);
14919
14920  // Execute a number of instructions which all use ProcessNaNs, and check that
14921  // they all produce the default NaN.
14922  __ Fmov(d0, n);
14923  __ Fmov(d1, m);
14924  __ Fmov(d2, a);
14925
14926  if (test_1op) {
14927    // Operations that always propagate NaNs unchanged, even signalling NaNs.
14928    __ Fmov(d10, d0);
14929    __ Fabs(d11, d0);
14930    __ Fneg(d12, d0);
14931
14932    // Operations that use ProcessNaN.
14933    __ Fsqrt(d13, d0);
14934    __ Frinta(d14, d0);
14935    __ Frintn(d15, d0);
14936    __ Frintz(d16, d0);
14937
14938    // Fcvt usually has special NaN handling, but it respects default-NaN mode.
14939    __ Fcvt(s17, d0);
14940  }
14941
14942  if (test_2op) {
14943    __ Fadd(d18, d0, d1);
14944    __ Fsub(d19, d0, d1);
14945    __ Fmul(d20, d0, d1);
14946    __ Fdiv(d21, d0, d1);
14947    __ Fmax(d22, d0, d1);
14948    __ Fmin(d23, d0, d1);
14949  }
14950
14951  __ Fmadd(d24, d0, d1, d2);
14952  __ Fmsub(d25, d0, d1, d2);
14953  __ Fnmadd(d26, d0, d1, d2);
14954  __ Fnmsub(d27, d0, d1, d2);
14955
14956  // Restore FPCR.
14957  __ Msr(FPCR, x0);
14958
14959  END();
14960  RUN();
14961
14962  if (test_1op) {
14963    uint64_t n_raw = double_to_rawbits(n);
14964    ASSERT_EQUAL_FP64(n, d10);
14965    ASSERT_EQUAL_FP64(rawbits_to_double(n_raw & ~kDSignMask), d11);
14966    ASSERT_EQUAL_FP64(rawbits_to_double(n_raw ^ kDSignMask), d12);
14967    ASSERT_EQUAL_FP64(kFP64DefaultNaN, d13);
14968    ASSERT_EQUAL_FP64(kFP64DefaultNaN, d14);
14969    ASSERT_EQUAL_FP64(kFP64DefaultNaN, d15);
14970    ASSERT_EQUAL_FP64(kFP64DefaultNaN, d16);
14971    ASSERT_EQUAL_FP32(kFP32DefaultNaN, s17);
14972  }
14973
14974  if (test_2op) {
14975    ASSERT_EQUAL_FP64(kFP64DefaultNaN, d18);
14976    ASSERT_EQUAL_FP64(kFP64DefaultNaN, d19);
14977    ASSERT_EQUAL_FP64(kFP64DefaultNaN, d20);
14978    ASSERT_EQUAL_FP64(kFP64DefaultNaN, d21);
14979    ASSERT_EQUAL_FP64(kFP64DefaultNaN, d22);
14980    ASSERT_EQUAL_FP64(kFP64DefaultNaN, d23);
14981  }
14982
14983  ASSERT_EQUAL_FP64(kFP64DefaultNaN, d24);
14984  ASSERT_EQUAL_FP64(kFP64DefaultNaN, d25);
14985  ASSERT_EQUAL_FP64(kFP64DefaultNaN, d26);
14986  ASSERT_EQUAL_FP64(kFP64DefaultNaN, d27);
14987
14988  TEARDOWN();
14989}
14990
14991
14992TEST(default_nan_double) {
14993  double sn = rawbits_to_double(0x7ff5555511111111);
14994  double sm = rawbits_to_double(0x7ff5555522222222);
14995  double sa = rawbits_to_double(0x7ff55555aaaaaaaa);
14996  double qn = rawbits_to_double(0x7ffaaaaa11111111);
14997  double qm = rawbits_to_double(0x7ffaaaaa22222222);
14998  double qa = rawbits_to_double(0x7ffaaaaaaaaaaaaa);
14999  VIXL_ASSERT(IsSignallingNaN(sn));
15000  VIXL_ASSERT(IsSignallingNaN(sm));
15001  VIXL_ASSERT(IsSignallingNaN(sa));
15002  VIXL_ASSERT(IsQuietNaN(qn));
15003  VIXL_ASSERT(IsQuietNaN(qm));
15004  VIXL_ASSERT(IsQuietNaN(qa));
15005
15006  //   - Signalling NaNs
15007  DefaultNaNHelper(sn, 0.0, 0.0);
15008  DefaultNaNHelper(0.0, sm, 0.0);
15009  DefaultNaNHelper(0.0, 0.0, sa);
15010  DefaultNaNHelper(sn, sm, 0.0);
15011  DefaultNaNHelper(0.0, sm, sa);
15012  DefaultNaNHelper(sn, 0.0, sa);
15013  DefaultNaNHelper(sn, sm, sa);
15014  //   - Quiet NaNs
15015  DefaultNaNHelper(qn, 0.0, 0.0);
15016  DefaultNaNHelper(0.0, qm, 0.0);
15017  DefaultNaNHelper(0.0, 0.0, qa);
15018  DefaultNaNHelper(qn, qm, 0.0);
15019  DefaultNaNHelper(0.0, qm, qa);
15020  DefaultNaNHelper(qn, 0.0, qa);
15021  DefaultNaNHelper(qn, qm, qa);
15022  //   - Mixed NaNs
15023  DefaultNaNHelper(qn, sm, sa);
15024  DefaultNaNHelper(sn, qm, sa);
15025  DefaultNaNHelper(sn, sm, qa);
15026  DefaultNaNHelper(qn, qm, sa);
15027  DefaultNaNHelper(sn, qm, qa);
15028  DefaultNaNHelper(qn, sm, qa);
15029  DefaultNaNHelper(qn, qm, qa);
15030}
15031
15032
15033TEST(ldar_stlr) {
15034  // The middle value is read, modified, and written. The padding exists only to
15035  // check for over-write.
15036  uint8_t b[] = {0, 0x12, 0};
15037  uint16_t h[] = {0, 0x1234, 0};
15038  uint32_t w[] = {0, 0x12345678, 0};
15039  uint64_t x[] = {0, 0x123456789abcdef0, 0};
15040
15041  SETUP();
15042  START();
15043
15044  __ Mov(x10, reinterpret_cast<uintptr_t>(&b[1]));
15045  __ Ldarb(w0, MemOperand(x10));
15046  __ Add(w0, w0, 1);
15047  __ Stlrb(w0, MemOperand(x10));
15048
15049  __ Mov(x10, reinterpret_cast<uintptr_t>(&h[1]));
15050  __ Ldarh(w0, MemOperand(x10));
15051  __ Add(w0, w0, 1);
15052  __ Stlrh(w0, MemOperand(x10));
15053
15054  __ Mov(x10, reinterpret_cast<uintptr_t>(&w[1]));
15055  __ Ldar(w0, MemOperand(x10));
15056  __ Add(w0, w0, 1);
15057  __ Stlr(w0, MemOperand(x10));
15058
15059  __ Mov(x10, reinterpret_cast<uintptr_t>(&x[1]));
15060  __ Ldar(x0, MemOperand(x10));
15061  __ Add(x0, x0, 1);
15062  __ Stlr(x0, MemOperand(x10));
15063
15064  END();
15065  RUN();
15066
15067  ASSERT_EQUAL_32(0x13, b[1]);
15068  ASSERT_EQUAL_32(0x1235, h[1]);
15069  ASSERT_EQUAL_32(0x12345679, w[1]);
15070  ASSERT_EQUAL_64(0x123456789abcdef1, x[1]);
15071
15072  // Check for over-write.
15073  ASSERT_EQUAL_32(0, b[0]);
15074  ASSERT_EQUAL_32(0, b[2]);
15075  ASSERT_EQUAL_32(0, h[0]);
15076  ASSERT_EQUAL_32(0, h[2]);
15077  ASSERT_EQUAL_32(0, w[0]);
15078  ASSERT_EQUAL_32(0, w[2]);
15079  ASSERT_EQUAL_64(0, x[0]);
15080  ASSERT_EQUAL_64(0, x[2]);
15081
15082  TEARDOWN();
15083}
15084
15085
15086TEST(ldxr_stxr) {
15087  // The middle value is read, modified, and written. The padding exists only to
15088  // check for over-write.
15089  uint8_t b[] = {0, 0x12, 0};
15090  uint16_t h[] = {0, 0x1234, 0};
15091  uint32_t w[] = {0, 0x12345678, 0};
15092  uint64_t x[] = {0, 0x123456789abcdef0, 0};
15093
15094  // As above, but get suitably-aligned values for ldxp and stxp.
15095  uint32_t wp_data[] = {0, 0, 0, 0, 0};
15096  uint32_t * wp = AlignUp(wp_data + 1, kWRegSizeInBytes * 2) - 1;
15097  wp[1] = 0x12345678;           // wp[1] is 64-bit-aligned.
15098  wp[2] = 0x87654321;
15099  uint64_t xp_data[] = {0, 0, 0, 0, 0};
15100  uint64_t * xp = AlignUp(xp_data + 1, kXRegSizeInBytes * 2) - 1;
15101  xp[1] = 0x123456789abcdef0;   // xp[1] is 128-bit-aligned.
15102  xp[2] = 0x0fedcba987654321;
15103
15104  SETUP();
15105  START();
15106
15107  __ Mov(x10, reinterpret_cast<uintptr_t>(&b[1]));
15108  Label try_b;
15109  __ Bind(&try_b);
15110  __ Ldxrb(w0, MemOperand(x10));
15111  __ Add(w0, w0, 1);
15112  __ Stxrb(w5, w0, MemOperand(x10));
15113  __ Cbnz(w5, &try_b);
15114
15115  __ Mov(x10, reinterpret_cast<uintptr_t>(&h[1]));
15116  Label try_h;
15117  __ Bind(&try_h);
15118  __ Ldxrh(w0, MemOperand(x10));
15119  __ Add(w0, w0, 1);
15120  __ Stxrh(w5, w0, MemOperand(x10));
15121  __ Cbnz(w5, &try_h);
15122
15123  __ Mov(x10, reinterpret_cast<uintptr_t>(&w[1]));
15124  Label try_w;
15125  __ Bind(&try_w);
15126  __ Ldxr(w0, MemOperand(x10));
15127  __ Add(w0, w0, 1);
15128  __ Stxr(w5, w0, MemOperand(x10));
15129  __ Cbnz(w5, &try_w);
15130
15131  __ Mov(x10, reinterpret_cast<uintptr_t>(&x[1]));
15132  Label try_x;
15133  __ Bind(&try_x);
15134  __ Ldxr(x0, MemOperand(x10));
15135  __ Add(x0, x0, 1);
15136  __ Stxr(w5, x0, MemOperand(x10));
15137  __ Cbnz(w5, &try_x);
15138
15139  __ Mov(x10, reinterpret_cast<uintptr_t>(&wp[1]));
15140  Label try_wp;
15141  __ Bind(&try_wp);
15142  __ Ldxp(w0, w1, MemOperand(x10));
15143  __ Add(w0, w0, 1);
15144  __ Add(w1, w1, 1);
15145  __ Stxp(w5, w0, w1, MemOperand(x10));
15146  __ Cbnz(w5, &try_wp);
15147
15148  __ Mov(x10, reinterpret_cast<uintptr_t>(&xp[1]));
15149  Label try_xp;
15150  __ Bind(&try_xp);
15151  __ Ldxp(x0, x1, MemOperand(x10));
15152  __ Add(x0, x0, 1);
15153  __ Add(x1, x1, 1);
15154  __ Stxp(w5, x0, x1, MemOperand(x10));
15155  __ Cbnz(w5, &try_xp);
15156
15157  END();
15158  RUN();
15159
15160  ASSERT_EQUAL_32(0x13, b[1]);
15161  ASSERT_EQUAL_32(0x1235, h[1]);
15162  ASSERT_EQUAL_32(0x12345679, w[1]);
15163  ASSERT_EQUAL_64(0x123456789abcdef1, x[1]);
15164  ASSERT_EQUAL_32(0x12345679, wp[1]);
15165  ASSERT_EQUAL_32(0x87654322, wp[2]);
15166  ASSERT_EQUAL_64(0x123456789abcdef1, xp[1]);
15167  ASSERT_EQUAL_64(0x0fedcba987654322, xp[2]);
15168
15169  // Check for over-write.
15170  ASSERT_EQUAL_32(0, b[0]);
15171  ASSERT_EQUAL_32(0, b[2]);
15172  ASSERT_EQUAL_32(0, h[0]);
15173  ASSERT_EQUAL_32(0, h[2]);
15174  ASSERT_EQUAL_32(0, w[0]);
15175  ASSERT_EQUAL_32(0, w[2]);
15176  ASSERT_EQUAL_64(0, x[0]);
15177  ASSERT_EQUAL_64(0, x[2]);
15178  ASSERT_EQUAL_32(0, wp[0]);
15179  ASSERT_EQUAL_32(0, wp[3]);
15180  ASSERT_EQUAL_64(0, xp[0]);
15181  ASSERT_EQUAL_64(0, xp[3]);
15182
15183  TEARDOWN();
15184}
15185
15186
15187TEST(ldaxr_stlxr) {
15188  // The middle value is read, modified, and written. The padding exists only to
15189  // check for over-write.
15190  uint8_t b[] = {0, 0x12, 0};
15191  uint16_t h[] = {0, 0x1234, 0};
15192  uint32_t w[] = {0, 0x12345678, 0};
15193  uint64_t x[] = {0, 0x123456789abcdef0, 0};
15194
15195  // As above, but get suitably-aligned values for ldxp and stxp.
15196  uint32_t wp_data[] = {0, 0, 0, 0, 0};
15197  uint32_t * wp = AlignUp(wp_data + 1, kWRegSizeInBytes * 2) - 1;
15198  wp[1] = 0x12345678;           // wp[1] is 64-bit-aligned.
15199  wp[2] = 0x87654321;
15200  uint64_t xp_data[] = {0, 0, 0, 0, 0};
15201  uint64_t * xp = AlignUp(xp_data + 1, kXRegSizeInBytes * 2) - 1;
15202  xp[1] = 0x123456789abcdef0;   // xp[1] is 128-bit-aligned.
15203  xp[2] = 0x0fedcba987654321;
15204
15205  SETUP();
15206  START();
15207
15208  __ Mov(x10, reinterpret_cast<uintptr_t>(&b[1]));
15209  Label try_b;
15210  __ Bind(&try_b);
15211  __ Ldaxrb(w0, MemOperand(x10));
15212  __ Add(w0, w0, 1);
15213  __ Stlxrb(w5, w0, MemOperand(x10));
15214  __ Cbnz(w5, &try_b);
15215
15216  __ Mov(x10, reinterpret_cast<uintptr_t>(&h[1]));
15217  Label try_h;
15218  __ Bind(&try_h);
15219  __ Ldaxrh(w0, MemOperand(x10));
15220  __ Add(w0, w0, 1);
15221  __ Stlxrh(w5, w0, MemOperand(x10));
15222  __ Cbnz(w5, &try_h);
15223
15224  __ Mov(x10, reinterpret_cast<uintptr_t>(&w[1]));
15225  Label try_w;
15226  __ Bind(&try_w);
15227  __ Ldaxr(w0, MemOperand(x10));
15228  __ Add(w0, w0, 1);
15229  __ Stlxr(w5, w0, MemOperand(x10));
15230  __ Cbnz(w5, &try_w);
15231
15232  __ Mov(x10, reinterpret_cast<uintptr_t>(&x[1]));
15233  Label try_x;
15234  __ Bind(&try_x);
15235  __ Ldaxr(x0, MemOperand(x10));
15236  __ Add(x0, x0, 1);
15237  __ Stlxr(w5, x0, MemOperand(x10));
15238  __ Cbnz(w5, &try_x);
15239
15240  __ Mov(x10, reinterpret_cast<uintptr_t>(&wp[1]));
15241  Label try_wp;
15242  __ Bind(&try_wp);
15243  __ Ldaxp(w0, w1, MemOperand(x10));
15244  __ Add(w0, w0, 1);
15245  __ Add(w1, w1, 1);
15246  __ Stlxp(w5, w0, w1, MemOperand(x10));
15247  __ Cbnz(w5, &try_wp);
15248
15249  __ Mov(x10, reinterpret_cast<uintptr_t>(&xp[1]));
15250  Label try_xp;
15251  __ Bind(&try_xp);
15252  __ Ldaxp(x0, x1, MemOperand(x10));
15253  __ Add(x0, x0, 1);
15254  __ Add(x1, x1, 1);
15255  __ Stlxp(w5, x0, x1, MemOperand(x10));
15256  __ Cbnz(w5, &try_xp);
15257
15258  END();
15259  RUN();
15260
15261  ASSERT_EQUAL_32(0x13, b[1]);
15262  ASSERT_EQUAL_32(0x1235, h[1]);
15263  ASSERT_EQUAL_32(0x12345679, w[1]);
15264  ASSERT_EQUAL_64(0x123456789abcdef1, x[1]);
15265  ASSERT_EQUAL_32(0x12345679, wp[1]);
15266  ASSERT_EQUAL_32(0x87654322, wp[2]);
15267  ASSERT_EQUAL_64(0x123456789abcdef1, xp[1]);
15268  ASSERT_EQUAL_64(0x0fedcba987654322, xp[2]);
15269
15270  // Check for over-write.
15271  ASSERT_EQUAL_32(0, b[0]);
15272  ASSERT_EQUAL_32(0, b[2]);
15273  ASSERT_EQUAL_32(0, h[0]);
15274  ASSERT_EQUAL_32(0, h[2]);
15275  ASSERT_EQUAL_32(0, w[0]);
15276  ASSERT_EQUAL_32(0, w[2]);
15277  ASSERT_EQUAL_64(0, x[0]);
15278  ASSERT_EQUAL_64(0, x[2]);
15279  ASSERT_EQUAL_32(0, wp[0]);
15280  ASSERT_EQUAL_32(0, wp[3]);
15281  ASSERT_EQUAL_64(0, xp[0]);
15282  ASSERT_EQUAL_64(0, xp[3]);
15283
15284  TEARDOWN();
15285}
15286
15287
15288TEST(clrex) {
15289  // This data should never be written.
15290  uint64_t data[] = {0, 0, 0};
15291  uint64_t * data_aligned = AlignUp(data, kXRegSizeInBytes * 2);
15292
15293  SETUP();
15294  START();
15295
15296  __ Mov(x10, reinterpret_cast<uintptr_t>(data_aligned));
15297  __ Mov(w6, 0);
15298
15299  __ Ldxrb(w0, MemOperand(x10));
15300  __ Clrex();
15301  __ Add(w0, w0, 1);
15302  __ Stxrb(w5, w0, MemOperand(x10));
15303  __ Add(w6, w6, w5);
15304
15305  __ Ldxrh(w0, MemOperand(x10));
15306  __ Clrex();
15307  __ Add(w0, w0, 1);
15308  __ Stxrh(w5, w0, MemOperand(x10));
15309  __ Add(w6, w6, w5);
15310
15311  __ Ldxr(w0, MemOperand(x10));
15312  __ Clrex();
15313  __ Add(w0, w0, 1);
15314  __ Stxr(w5, w0, MemOperand(x10));
15315  __ Add(w6, w6, w5);
15316
15317  __ Ldxr(x0, MemOperand(x10));
15318  __ Clrex();
15319  __ Add(x0, x0, 1);
15320  __ Stxr(w5, x0, MemOperand(x10));
15321  __ Add(w6, w6, w5);
15322
15323  __ Ldxp(w0, w1, MemOperand(x10));
15324  __ Clrex();
15325  __ Add(w0, w0, 1);
15326  __ Add(w1, w1, 1);
15327  __ Stxp(w5, w0, w1, MemOperand(x10));
15328  __ Add(w6, w6, w5);
15329
15330  __ Ldxp(x0, x1, MemOperand(x10));
15331  __ Clrex();
15332  __ Add(x0, x0, 1);
15333  __ Add(x1, x1, 1);
15334  __ Stxp(w5, x0, x1, MemOperand(x10));
15335  __ Add(w6, w6, w5);
15336
15337  // Acquire-release variants.
15338
15339  __ Ldaxrb(w0, MemOperand(x10));
15340  __ Clrex();
15341  __ Add(w0, w0, 1);
15342  __ Stlxrb(w5, w0, MemOperand(x10));
15343  __ Add(w6, w6, w5);
15344
15345  __ Ldaxrh(w0, MemOperand(x10));
15346  __ Clrex();
15347  __ Add(w0, w0, 1);
15348  __ Stlxrh(w5, w0, MemOperand(x10));
15349  __ Add(w6, w6, w5);
15350
15351  __ Ldaxr(w0, MemOperand(x10));
15352  __ Clrex();
15353  __ Add(w0, w0, 1);
15354  __ Stlxr(w5, w0, MemOperand(x10));
15355  __ Add(w6, w6, w5);
15356
15357  __ Ldaxr(x0, MemOperand(x10));
15358  __ Clrex();
15359  __ Add(x0, x0, 1);
15360  __ Stlxr(w5, x0, MemOperand(x10));
15361  __ Add(w6, w6, w5);
15362
15363  __ Ldaxp(w0, w1, MemOperand(x10));
15364  __ Clrex();
15365  __ Add(w0, w0, 1);
15366  __ Add(w1, w1, 1);
15367  __ Stlxp(w5, w0, w1, MemOperand(x10));
15368  __ Add(w6, w6, w5);
15369
15370  __ Ldaxp(x0, x1, MemOperand(x10));
15371  __ Clrex();
15372  __ Add(x0, x0, 1);
15373  __ Add(x1, x1, 1);
15374  __ Stlxp(w5, x0, x1, MemOperand(x10));
15375  __ Add(w6, w6, w5);
15376
15377  END();
15378  RUN();
15379
15380  // None of the 12 store-exclusives should have succeeded.
15381  ASSERT_EQUAL_32(12, w6);
15382
15383  ASSERT_EQUAL_64(0, data[0]);
15384  ASSERT_EQUAL_64(0, data[1]);
15385  ASSERT_EQUAL_64(0, data[2]);
15386
15387  TEARDOWN();
15388}
15389
15390
15391#ifdef VIXL_INCLUDE_SIMULATOR
15392// Check that the simulator occasionally makes store-exclusive fail.
15393TEST(ldxr_stxr_fail) {
15394  uint64_t data[] = {0, 0, 0};
15395  uint64_t * data_aligned = AlignUp(data, kXRegSizeInBytes * 2);
15396
15397  // Impose a hard limit on the number of attempts, so the test cannot hang.
15398  static const uint64_t kWatchdog = 10000;
15399  Label done;
15400
15401  SETUP();
15402  START();
15403
15404  __ Mov(x10, reinterpret_cast<uintptr_t>(data_aligned));
15405  __ Mov(x11, kWatchdog);
15406
15407  // This loop is the opposite of what we normally do with ldxr and stxr; we
15408  // keep trying until we fail (or the watchdog counter runs out).
15409  Label try_b;
15410  __ Bind(&try_b);
15411  __ Ldxrb(w0, MemOperand(x10));
15412  __ Stxrb(w5, w0, MemOperand(x10));
15413  // Check the watchdog counter.
15414  __ Sub(x11, x11, 1);
15415  __ Cbz(x11, &done);
15416  // Check the exclusive-store result.
15417  __ Cbz(w5, &try_b);
15418
15419  Label try_h;
15420  __ Bind(&try_h);
15421  __ Ldxrh(w0, MemOperand(x10));
15422  __ Stxrh(w5, w0, MemOperand(x10));
15423  __ Sub(x11, x11, 1);
15424  __ Cbz(x11, &done);
15425  __ Cbz(w5, &try_h);
15426
15427  Label try_w;
15428  __ Bind(&try_w);
15429  __ Ldxr(w0, MemOperand(x10));
15430  __ Stxr(w5, w0, MemOperand(x10));
15431  __ Sub(x11, x11, 1);
15432  __ Cbz(x11, &done);
15433  __ Cbz(w5, &try_w);
15434
15435  Label try_x;
15436  __ Bind(&try_x);
15437  __ Ldxr(x0, MemOperand(x10));
15438  __ Stxr(w5, x0, MemOperand(x10));
15439  __ Sub(x11, x11, 1);
15440  __ Cbz(x11, &done);
15441  __ Cbz(w5, &try_x);
15442
15443  Label try_wp;
15444  __ Bind(&try_wp);
15445  __ Ldxp(w0, w1, MemOperand(x10));
15446  __ Stxp(w5, w0, w1, MemOperand(x10));
15447  __ Sub(x11, x11, 1);
15448  __ Cbz(x11, &done);
15449  __ Cbz(w5, &try_wp);
15450
15451  Label try_xp;
15452  __ Bind(&try_xp);
15453  __ Ldxp(x0, x1, MemOperand(x10));
15454  __ Stxp(w5, x0, x1, MemOperand(x10));
15455  __ Sub(x11, x11, 1);
15456  __ Cbz(x11, &done);
15457  __ Cbz(w5, &try_xp);
15458
15459  __ Bind(&done);
15460  // Trigger an error if x11 (watchdog) is zero.
15461  __ Cmp(x11, 0);
15462  __ Cset(x12, eq);
15463
15464  END();
15465  RUN();
15466
15467  // Check that the watchdog counter didn't run out.
15468  ASSERT_EQUAL_64(0, x12);
15469
15470  TEARDOWN();
15471}
15472#endif
15473
15474
15475#ifdef VIXL_INCLUDE_SIMULATOR
15476// Check that the simulator occasionally makes store-exclusive fail.
15477TEST(ldaxr_stlxr_fail) {
15478  uint64_t data[] = {0, 0, 0};
15479  uint64_t * data_aligned = AlignUp(data, kXRegSizeInBytes * 2);
15480
15481  // Impose a hard limit on the number of attempts, so the test cannot hang.
15482  static const uint64_t kWatchdog = 10000;
15483  Label done;
15484
15485  SETUP();
15486  START();
15487
15488  __ Mov(x10, reinterpret_cast<uintptr_t>(data_aligned));
15489  __ Mov(x11, kWatchdog);
15490
15491  // This loop is the opposite of what we normally do with ldxr and stxr; we
15492  // keep trying until we fail (or the watchdog counter runs out).
15493  Label try_b;
15494  __ Bind(&try_b);
15495  __ Ldxrb(w0, MemOperand(x10));
15496  __ Stxrb(w5, w0, MemOperand(x10));
15497  // Check the watchdog counter.
15498  __ Sub(x11, x11, 1);
15499  __ Cbz(x11, &done);
15500  // Check the exclusive-store result.
15501  __ Cbz(w5, &try_b);
15502
15503  Label try_h;
15504  __ Bind(&try_h);
15505  __ Ldaxrh(w0, MemOperand(x10));
15506  __ Stlxrh(w5, w0, MemOperand(x10));
15507  __ Sub(x11, x11, 1);
15508  __ Cbz(x11, &done);
15509  __ Cbz(w5, &try_h);
15510
15511  Label try_w;
15512  __ Bind(&try_w);
15513  __ Ldaxr(w0, MemOperand(x10));
15514  __ Stlxr(w5, w0, MemOperand(x10));
15515  __ Sub(x11, x11, 1);
15516  __ Cbz(x11, &done);
15517  __ Cbz(w5, &try_w);
15518
15519  Label try_x;
15520  __ Bind(&try_x);
15521  __ Ldaxr(x0, MemOperand(x10));
15522  __ Stlxr(w5, x0, MemOperand(x10));
15523  __ Sub(x11, x11, 1);
15524  __ Cbz(x11, &done);
15525  __ Cbz(w5, &try_x);
15526
15527  Label try_wp;
15528  __ Bind(&try_wp);
15529  __ Ldaxp(w0, w1, MemOperand(x10));
15530  __ Stlxp(w5, w0, w1, MemOperand(x10));
15531  __ Sub(x11, x11, 1);
15532  __ Cbz(x11, &done);
15533  __ Cbz(w5, &try_wp);
15534
15535  Label try_xp;
15536  __ Bind(&try_xp);
15537  __ Ldaxp(x0, x1, MemOperand(x10));
15538  __ Stlxp(w5, x0, x1, MemOperand(x10));
15539  __ Sub(x11, x11, 1);
15540  __ Cbz(x11, &done);
15541  __ Cbz(w5, &try_xp);
15542
15543  __ Bind(&done);
15544  // Trigger an error if x11 (watchdog) is zero.
15545  __ Cmp(x11, 0);
15546  __ Cset(x12, eq);
15547
15548  END();
15549  RUN();
15550
15551  // Check that the watchdog counter didn't run out.
15552  ASSERT_EQUAL_64(0, x12);
15553
15554  TEARDOWN();
15555}
15556#endif
15557
15558
15559TEST(load_store_tagged_immediate_offset) {
15560  uint64_t tags[] = { 0x00, 0x1, 0x55, 0xff };
15561  int tag_count = sizeof(tags) / sizeof(tags[0]);
15562
15563  const int kMaxDataLength = 160;
15564
15565  for (int i = 0; i < tag_count; i++) {
15566    unsigned char src[kMaxDataLength];
15567    uint64_t src_raw = reinterpret_cast<uint64_t>(src);
15568    uint64_t src_tag = tags[i];
15569    uint64_t src_tagged = CPU::SetPointerTag(src_raw, src_tag);
15570
15571    for (int k = 0; k < kMaxDataLength; k++) {
15572      src[k] = k + 1;
15573    }
15574
15575    for (int j = 0; j < tag_count; j++) {
15576      unsigned char dst[kMaxDataLength];
15577      uint64_t dst_raw = reinterpret_cast<uint64_t>(dst);
15578      uint64_t dst_tag = tags[j];
15579      uint64_t dst_tagged = CPU::SetPointerTag(dst_raw, dst_tag);
15580
15581      memset(dst, 0, kMaxDataLength);
15582
15583      SETUP();
15584      ALLOW_ASM();
15585      START();
15586
15587      __ Mov(x0, src_tagged);
15588      __ Mov(x1, dst_tagged);
15589
15590      int offset = 0;
15591
15592      // Scaled-immediate offsets.
15593      __ ldp(q0, q1, MemOperand(x0, offset));
15594      __ stp(q0, q1, MemOperand(x1, offset));
15595      offset += 2 * kQRegSizeInBytes;
15596
15597      __ ldp(x2, x3, MemOperand(x0, offset));
15598      __ stp(x2, x3, MemOperand(x1, offset));
15599      offset += 2 * kXRegSizeInBytes;
15600
15601      __ ldpsw(x2, x3, MemOperand(x0, offset));
15602      __ stp(w2, w3, MemOperand(x1, offset));
15603      offset += 2 * kWRegSizeInBytes;
15604
15605      __ ldp(d0, d1, MemOperand(x0, offset));
15606      __ stp(d0, d1, MemOperand(x1, offset));
15607      offset += 2 * kDRegSizeInBytes;
15608
15609      __ ldp(w2, w3, MemOperand(x0, offset));
15610      __ stp(w2, w3, MemOperand(x1, offset));
15611      offset += 2 * kWRegSizeInBytes;
15612
15613      __ ldp(s0, s1, MemOperand(x0, offset));
15614      __ stp(s0, s1, MemOperand(x1, offset));
15615      offset += 2 * kSRegSizeInBytes;
15616
15617      __ ldr(x2, MemOperand(x0, offset), RequireScaledOffset);
15618      __ str(x2, MemOperand(x1, offset), RequireScaledOffset);
15619      offset += kXRegSizeInBytes;
15620
15621      __ ldr(d0, MemOperand(x0, offset), RequireScaledOffset);
15622      __ str(d0, MemOperand(x1, offset), RequireScaledOffset);
15623      offset += kDRegSizeInBytes;
15624
15625      __ ldr(w2, MemOperand(x0, offset), RequireScaledOffset);
15626      __ str(w2, MemOperand(x1, offset), RequireScaledOffset);
15627      offset += kWRegSizeInBytes;
15628
15629      __ ldr(s0, MemOperand(x0, offset), RequireScaledOffset);
15630      __ str(s0, MemOperand(x1, offset), RequireScaledOffset);
15631      offset += kSRegSizeInBytes;
15632
15633      __ ldrh(w2, MemOperand(x0, offset), RequireScaledOffset);
15634      __ strh(w2, MemOperand(x1, offset), RequireScaledOffset);
15635      offset += 2;
15636
15637      __ ldrsh(w2, MemOperand(x0, offset), RequireScaledOffset);
15638      __ strh(w2, MemOperand(x1, offset), RequireScaledOffset);
15639      offset += 2;
15640
15641      __ ldrb(w2, MemOperand(x0, offset), RequireScaledOffset);
15642      __ strb(w2, MemOperand(x1, offset), RequireScaledOffset);
15643      offset += 1;
15644
15645      __ ldrsb(w2, MemOperand(x0, offset), RequireScaledOffset);
15646      __ strb(w2, MemOperand(x1, offset), RequireScaledOffset);
15647      offset += 1;
15648
15649      // Unscaled-immediate offsets.
15650
15651      __ ldur(x2, MemOperand(x0, offset), RequireUnscaledOffset);
15652      __ stur(x2, MemOperand(x1, offset), RequireUnscaledOffset);
15653      offset += kXRegSizeInBytes;
15654
15655      __ ldur(d0, MemOperand(x0, offset), RequireUnscaledOffset);
15656      __ stur(d0, MemOperand(x1, offset), RequireUnscaledOffset);
15657      offset += kDRegSizeInBytes;
15658
15659      __ ldur(w2, MemOperand(x0, offset), RequireUnscaledOffset);
15660      __ stur(w2, MemOperand(x1, offset), RequireUnscaledOffset);
15661      offset += kWRegSizeInBytes;
15662
15663      __ ldur(s0, MemOperand(x0, offset), RequireUnscaledOffset);
15664      __ stur(s0, MemOperand(x1, offset), RequireUnscaledOffset);
15665      offset += kSRegSizeInBytes;
15666
15667      __ ldurh(w2, MemOperand(x0, offset), RequireUnscaledOffset);
15668      __ sturh(w2, MemOperand(x1, offset), RequireUnscaledOffset);
15669      offset += 2;
15670
15671      __ ldursh(w2, MemOperand(x0, offset), RequireUnscaledOffset);
15672      __ sturh(w2, MemOperand(x1, offset), RequireUnscaledOffset);
15673      offset += 2;
15674
15675      __ ldurb(w2, MemOperand(x0, offset), RequireUnscaledOffset);
15676      __ sturb(w2, MemOperand(x1, offset), RequireUnscaledOffset);
15677      offset += 1;
15678
15679      __ ldursb(w2, MemOperand(x0, offset), RequireUnscaledOffset);
15680      __ sturb(w2, MemOperand(x1, offset), RequireUnscaledOffset);
15681      offset += 1;
15682
15683      // Extract the tag (so we can test that it was preserved correctly).
15684      __ Ubfx(x0, x0, kAddressTagOffset, kAddressTagWidth);
15685      __ Ubfx(x1, x1, kAddressTagOffset, kAddressTagWidth);
15686
15687      VIXL_ASSERT(kMaxDataLength >= offset);
15688
15689      END();
15690      RUN();
15691
15692      ASSERT_EQUAL_64(src_tag, x0);
15693      ASSERT_EQUAL_64(dst_tag, x1);
15694
15695      for (int k = 0; k < offset; k++) {
15696        VIXL_CHECK(src[k] == dst[k]);
15697      }
15698
15699      TEARDOWN();
15700    }
15701  }
15702}
15703
15704
15705TEST(load_store_tagged_immediate_preindex) {
15706  uint64_t tags[] = { 0x00, 0x1, 0x55, 0xff };
15707  int tag_count = sizeof(tags) / sizeof(tags[0]);
15708
15709  const int kMaxDataLength = 128;
15710
15711  for (int i = 0; i < tag_count; i++) {
15712    unsigned char src[kMaxDataLength];
15713    uint64_t src_raw = reinterpret_cast<uint64_t>(src);
15714    uint64_t src_tag = tags[i];
15715    uint64_t src_tagged = CPU::SetPointerTag(src_raw, src_tag);
15716
15717    for (int k = 0; k < kMaxDataLength; k++) {
15718      src[k] = k + 1;
15719    }
15720
15721    for (int j = 0; j < tag_count; j++) {
15722      unsigned char dst[kMaxDataLength];
15723      uint64_t dst_raw = reinterpret_cast<uint64_t>(dst);
15724      uint64_t dst_tag = tags[j];
15725      uint64_t dst_tagged = CPU::SetPointerTag(dst_raw, dst_tag);
15726
15727      for (int k = 0; k < kMaxDataLength; k++) {
15728        dst[k] = 0;
15729      }
15730
15731      SETUP();
15732      ALLOW_ASM();
15733      START();
15734
15735      // Each MemOperand must apply a pre-index equal to the size of the
15736      // previous access.
15737
15738      // Start with a non-zero preindex.
15739      int preindex = 62 * kXRegSizeInBytes;
15740      int data_length = 0;
15741
15742      __ Mov(x0, src_tagged - preindex);
15743      __ Mov(x1, dst_tagged - preindex);
15744
15745      __ ldp(q0, q1, MemOperand(x0, preindex, PreIndex));
15746      __ stp(q0, q1, MemOperand(x1, preindex, PreIndex));
15747      preindex = 2 * kQRegSizeInBytes;
15748      data_length = preindex;
15749
15750      __ ldp(x2, x3, MemOperand(x0, preindex, PreIndex));
15751      __ stp(x2, x3, MemOperand(x1, preindex, PreIndex));
15752      preindex = 2 * kXRegSizeInBytes;
15753      data_length += preindex;
15754
15755      __ ldpsw(x2, x3, MemOperand(x0, preindex, PreIndex));
15756      __ stp(w2, w3, MemOperand(x1, preindex, PreIndex));
15757      preindex = 2 * kWRegSizeInBytes;
15758      data_length += preindex;
15759
15760      __ ldp(d0, d1, MemOperand(x0, preindex, PreIndex));
15761      __ stp(d0, d1, MemOperand(x1, preindex, PreIndex));
15762      preindex = 2 * kDRegSizeInBytes;
15763      data_length += preindex;
15764
15765      __ ldp(w2, w3, MemOperand(x0, preindex, PreIndex));
15766      __ stp(w2, w3, MemOperand(x1, preindex, PreIndex));
15767      preindex = 2 * kWRegSizeInBytes;
15768      data_length += preindex;
15769
15770      __ ldp(s0, s1, MemOperand(x0, preindex, PreIndex));
15771      __ stp(s0, s1, MemOperand(x1, preindex, PreIndex));
15772      preindex = 2 * kSRegSizeInBytes;
15773      data_length += preindex;
15774
15775      __ ldr(x2, MemOperand(x0, preindex, PreIndex));
15776      __ str(x2, MemOperand(x1, preindex, PreIndex));
15777      preindex = kXRegSizeInBytes;
15778      data_length += preindex;
15779
15780      __ ldr(d0, MemOperand(x0, preindex, PreIndex));
15781      __ str(d0, MemOperand(x1, preindex, PreIndex));
15782      preindex = kDRegSizeInBytes;
15783      data_length += preindex;
15784
15785      __ ldr(w2, MemOperand(x0, preindex, PreIndex));
15786      __ str(w2, MemOperand(x1, preindex, PreIndex));
15787      preindex = kWRegSizeInBytes;
15788      data_length += preindex;
15789
15790      __ ldr(s0, MemOperand(x0, preindex, PreIndex));
15791      __ str(s0, MemOperand(x1, preindex, PreIndex));
15792      preindex = kSRegSizeInBytes;
15793      data_length += preindex;
15794
15795      __ ldrh(w2, MemOperand(x0, preindex, PreIndex));
15796      __ strh(w2, MemOperand(x1, preindex, PreIndex));
15797      preindex = 2;
15798      data_length += preindex;
15799
15800      __ ldrsh(w2, MemOperand(x0, preindex, PreIndex));
15801      __ strh(w2, MemOperand(x1, preindex, PreIndex));
15802      preindex = 2;
15803      data_length += preindex;
15804
15805      __ ldrb(w2, MemOperand(x0, preindex, PreIndex));
15806      __ strb(w2, MemOperand(x1, preindex, PreIndex));
15807      preindex = 1;
15808      data_length += preindex;
15809
15810      __ ldrsb(w2, MemOperand(x0, preindex, PreIndex));
15811      __ strb(w2, MemOperand(x1, preindex, PreIndex));
15812      preindex = 1;
15813      data_length += preindex;
15814
15815      VIXL_ASSERT(kMaxDataLength >= data_length);
15816
15817      END();
15818      RUN();
15819
15820      // Check that the preindex was correctly applied in each operation, and
15821      // that the tag was preserved.
15822      ASSERT_EQUAL_64(src_tagged + data_length - preindex, x0);
15823      ASSERT_EQUAL_64(dst_tagged + data_length - preindex, x1);
15824
15825      for (int k = 0; k < data_length; k++) {
15826        VIXL_CHECK(src[k] == dst[k]);
15827      }
15828
15829      TEARDOWN();
15830    }
15831  }
15832}
15833
15834
15835TEST(load_store_tagged_immediate_postindex) {
15836  uint64_t tags[] = { 0x00, 0x1, 0x55, 0xff };
15837  int tag_count = sizeof(tags) / sizeof(tags[0]);
15838
15839  const int kMaxDataLength = 128;
15840
15841  for (int i = 0; i < tag_count; i++) {
15842    unsigned char src[kMaxDataLength];
15843    uint64_t src_raw = reinterpret_cast<uint64_t>(src);
15844    uint64_t src_tag = tags[i];
15845    uint64_t src_tagged = CPU::SetPointerTag(src_raw, src_tag);
15846
15847    for (int k = 0; k < kMaxDataLength; k++) {
15848      src[k] = k + 1;
15849    }
15850
15851    for (int j = 0; j < tag_count; j++) {
15852      unsigned char dst[kMaxDataLength];
15853      uint64_t dst_raw = reinterpret_cast<uint64_t>(dst);
15854      uint64_t dst_tag = tags[j];
15855      uint64_t dst_tagged = CPU::SetPointerTag(dst_raw, dst_tag);
15856
15857      for (int k = 0; k < kMaxDataLength; k++) {
15858        dst[k] = 0;
15859      }
15860
15861      SETUP();
15862      ALLOW_ASM();
15863      START();
15864
15865      int postindex = 2 * kXRegSizeInBytes;
15866      int data_length = 0;
15867
15868      __ Mov(x0, src_tagged);
15869      __ Mov(x1, dst_tagged);
15870
15871      __ ldp(x2, x3, MemOperand(x0, postindex, PostIndex));
15872      __ stp(x2, x3, MemOperand(x1, postindex, PostIndex));
15873      data_length = postindex;
15874
15875      postindex = 2 * kQRegSizeInBytes;
15876      __ ldp(q0, q1, MemOperand(x0, postindex, PostIndex));
15877      __ stp(q0, q1, MemOperand(x1, postindex, PostIndex));
15878      data_length += postindex;
15879
15880      postindex = 2 * kWRegSizeInBytes;
15881      __ ldpsw(x2, x3, MemOperand(x0, postindex, PostIndex));
15882      __ stp(w2, w3, MemOperand(x1, postindex, PostIndex));
15883      data_length += postindex;
15884
15885      postindex = 2 * kDRegSizeInBytes;
15886      __ ldp(d0, d1, MemOperand(x0, postindex, PostIndex));
15887      __ stp(d0, d1, MemOperand(x1, postindex, PostIndex));
15888      data_length += postindex;
15889
15890      postindex = 2 * kWRegSizeInBytes;
15891      __ ldp(w2, w3, MemOperand(x0, postindex, PostIndex));
15892      __ stp(w2, w3, MemOperand(x1, postindex, PostIndex));
15893      data_length += postindex;
15894
15895      postindex = 2 * kSRegSizeInBytes;
15896      __ ldp(s0, s1, MemOperand(x0, postindex, PostIndex));
15897      __ stp(s0, s1, MemOperand(x1, postindex, PostIndex));
15898      data_length += postindex;
15899
15900      postindex = kXRegSizeInBytes;
15901      __ ldr(x2, MemOperand(x0, postindex, PostIndex));
15902      __ str(x2, MemOperand(x1, postindex, PostIndex));
15903      data_length += postindex;
15904
15905      postindex = kDRegSizeInBytes;
15906      __ ldr(d0, MemOperand(x0, postindex, PostIndex));
15907      __ str(d0, MemOperand(x1, postindex, PostIndex));
15908      data_length += postindex;
15909
15910      postindex = kWRegSizeInBytes;
15911      __ ldr(w2, MemOperand(x0, postindex, PostIndex));
15912      __ str(w2, MemOperand(x1, postindex, PostIndex));
15913      data_length += postindex;
15914
15915      postindex = kSRegSizeInBytes;
15916      __ ldr(s0, MemOperand(x0, postindex, PostIndex));
15917      __ str(s0, MemOperand(x1, postindex, PostIndex));
15918      data_length += postindex;
15919
15920      postindex = 2;
15921      __ ldrh(w2, MemOperand(x0, postindex, PostIndex));
15922      __ strh(w2, MemOperand(x1, postindex, PostIndex));
15923      data_length += postindex;
15924
15925      postindex = 2;
15926      __ ldrsh(w2, MemOperand(x0, postindex, PostIndex));
15927      __ strh(w2, MemOperand(x1, postindex, PostIndex));
15928      data_length += postindex;
15929
15930      postindex = 1;
15931      __ ldrb(w2, MemOperand(x0, postindex, PostIndex));
15932      __ strb(w2, MemOperand(x1, postindex, PostIndex));
15933      data_length += postindex;
15934
15935      postindex = 1;
15936      __ ldrsb(w2, MemOperand(x0, postindex, PostIndex));
15937      __ strb(w2, MemOperand(x1, postindex, PostIndex));
15938      data_length += postindex;
15939
15940      VIXL_ASSERT(kMaxDataLength >= data_length);
15941
15942      END();
15943      RUN();
15944
15945      // Check that the postindex was correctly applied in each operation, and
15946      // that the tag was preserved.
15947      ASSERT_EQUAL_64(src_tagged + data_length, x0);
15948      ASSERT_EQUAL_64(dst_tagged + data_length, x1);
15949
15950      for (int k = 0; k < data_length; k++) {
15951        VIXL_CHECK(src[k] == dst[k]);
15952      }
15953
15954      TEARDOWN();
15955    }
15956  }
15957}
15958
15959
15960TEST(load_store_tagged_register_offset) {
15961  uint64_t tags[] = { 0x00, 0x1, 0x55, 0xff };
15962  int tag_count = sizeof(tags) / sizeof(tags[0]);
15963
15964  const int kMaxDataLength = 128;
15965
15966  for (int i = 0; i < tag_count; i++) {
15967    unsigned char src[kMaxDataLength];
15968    uint64_t src_raw = reinterpret_cast<uint64_t>(src);
15969    uint64_t src_tag = tags[i];
15970    uint64_t src_tagged = CPU::SetPointerTag(src_raw, src_tag);
15971
15972    for (int k = 0; k < kMaxDataLength; k++) {
15973      src[k] = k + 1;
15974    }
15975
15976    for (int j = 0; j < tag_count; j++) {
15977      unsigned char dst[kMaxDataLength];
15978      uint64_t dst_raw = reinterpret_cast<uint64_t>(dst);
15979      uint64_t dst_tag = tags[j];
15980      uint64_t dst_tagged = CPU::SetPointerTag(dst_raw, dst_tag);
15981
15982      // Also tag the offset register; the operation should still succeed.
15983      for (int o = 0; o < tag_count; o++) {
15984        uint64_t offset_base = CPU::SetPointerTag(UINT64_C(0), tags[o]);
15985        int data_length = 0;
15986
15987        for (int k = 0; k < kMaxDataLength; k++) {
15988          dst[k] = 0;
15989        }
15990
15991        SETUP();
15992        ALLOW_ASM();
15993        START();
15994
15995        __ Mov(x0, src_tagged);
15996        __ Mov(x1, dst_tagged);
15997
15998        __ Mov(x10, offset_base + data_length);
15999        __ ldr(x2, MemOperand(x0, x10));
16000        __ str(x2, MemOperand(x1, x10));
16001        data_length += kXRegSizeInBytes;
16002
16003        __ Mov(x10, offset_base + data_length);
16004        __ ldr(d0, MemOperand(x0, x10));
16005        __ str(d0, MemOperand(x1, x10));
16006        data_length += kDRegSizeInBytes;
16007
16008        __ Mov(x10, offset_base + data_length);
16009        __ ldr(w2, MemOperand(x0, x10));
16010        __ str(w2, MemOperand(x1, x10));
16011        data_length += kWRegSizeInBytes;
16012
16013        __ Mov(x10, offset_base + data_length);
16014        __ ldr(s0, MemOperand(x0, x10));
16015        __ str(s0, MemOperand(x1, x10));
16016        data_length += kSRegSizeInBytes;
16017
16018        __ Mov(x10, offset_base + data_length);
16019        __ ldrh(w2, MemOperand(x0, x10));
16020        __ strh(w2, MemOperand(x1, x10));
16021        data_length += 2;
16022
16023        __ Mov(x10, offset_base + data_length);
16024        __ ldrsh(w2, MemOperand(x0, x10));
16025        __ strh(w2, MemOperand(x1, x10));
16026        data_length += 2;
16027
16028        __ Mov(x10, offset_base + data_length);
16029        __ ldrb(w2, MemOperand(x0, x10));
16030        __ strb(w2, MemOperand(x1, x10));
16031        data_length += 1;
16032
16033        __ Mov(x10, offset_base + data_length);
16034        __ ldrsb(w2, MemOperand(x0, x10));
16035        __ strb(w2, MemOperand(x1, x10));
16036        data_length += 1;
16037
16038        VIXL_ASSERT(kMaxDataLength >= data_length);
16039
16040        END();
16041        RUN();
16042
16043        // Check that the postindex was correctly applied in each operation, and
16044        // that the tag was preserved.
16045        ASSERT_EQUAL_64(src_tagged, x0);
16046        ASSERT_EQUAL_64(dst_tagged, x1);
16047        ASSERT_EQUAL_64(offset_base + data_length - 1, x10);
16048
16049        for (int k = 0; k < data_length; k++) {
16050          VIXL_CHECK(src[k] == dst[k]);
16051        }
16052
16053        TEARDOWN();
16054      }
16055    }
16056  }
16057}
16058
16059
16060TEST(load_store_tagged_register_postindex) {
16061  uint64_t src[] = { 0x0706050403020100, 0x0f0e0d0c0b0a0908 };
16062  uint64_t tags[] = { 0x00, 0x1, 0x55, 0xff };
16063  int tag_count = sizeof(tags) / sizeof(tags[0]);
16064
16065  for (int j = 0; j < tag_count; j++) {
16066    for (int i = 0; i < tag_count; i++) {
16067      SETUP();
16068      uint64_t src_base = reinterpret_cast<uint64_t>(src);
16069      uint64_t src_tagged = CPU::SetPointerTag(src_base, tags[i]);
16070      uint64_t offset_tagged = CPU::SetPointerTag(UINT64_C(0), tags[j]);
16071
16072      START();
16073      __ Mov(x10, src_tagged);
16074      __ Mov(x11, offset_tagged);
16075      __ Ld1(v0.V16B(), MemOperand(x10, x11, PostIndex));
16076      // TODO: add other instructions (ld2-4, st1-4) as they become available.
16077      END();
16078
16079      RUN();
16080
16081      ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q0);
16082      ASSERT_EQUAL_64(src_tagged + offset_tagged, x10);
16083
16084      TEARDOWN();
16085    }
16086  }
16087}
16088
16089
16090TEST(branch_tagged) {
16091  SETUP();
16092  START();
16093
16094  Label loop, loop_entry, done;
16095  __ Adr(x0, &loop);
16096  __ Mov(x1, 0);
16097  __ B(&loop_entry);
16098
16099  __ Bind(&loop);
16100  __ Add(x1, x1, 1);  // Count successful jumps.
16101
16102  // Advance to the next tag, then bail out if we've come back around to tag 0.
16103  __ Add(x0, x0, UINT64_C(1) << kAddressTagOffset);
16104  __ Tst(x0, kAddressTagMask);
16105  __ B(eq, &done);
16106
16107  __ Bind(&loop_entry);
16108  __ Br(x0);
16109
16110  __ Bind(&done);
16111
16112  END();
16113  RUN();
16114
16115  ASSERT_EQUAL_64(1 << kAddressTagWidth, x1);
16116
16117  TEARDOWN();
16118}
16119
16120
16121TEST(branch_and_link_tagged) {
16122  SETUP();
16123  START();
16124
16125  Label loop, loop_entry, done;
16126  __ Adr(x0, &loop);
16127  __ Mov(x1, 0);
16128  __ B(&loop_entry);
16129
16130  __ Bind(&loop);
16131
16132  // Bail out (before counting a successful jump) if lr appears to be tagged.
16133  __ Tst(lr, kAddressTagMask);
16134  __ B(ne, &done);
16135
16136  __ Add(x1, x1, 1);  // Count successful jumps.
16137
16138  // Advance to the next tag, then bail out if we've come back around to tag 0.
16139  __ Add(x0, x0, UINT64_C(1) << kAddressTagOffset);
16140  __ Tst(x0, kAddressTagMask);
16141  __ B(eq, &done);
16142
16143  __ Bind(&loop_entry);
16144  __ Blr(x0);
16145
16146  __ Bind(&done);
16147
16148  END();
16149  RUN();
16150
16151  ASSERT_EQUAL_64(1 << kAddressTagWidth, x1);
16152
16153  TEARDOWN();
16154}
16155
16156
16157TEST(branch_tagged_and_adr_adrp) {
16158  SETUP_CUSTOM(BUF_SIZE, PageOffsetDependentCode);
16159  START();
16160
16161  Label loop, loop_entry, done;
16162  __ Adr(x0, &loop);
16163  __ Mov(x1, 0);
16164  __ B(&loop_entry);
16165
16166  __ Bind(&loop);
16167
16168  // Bail out (before counting a successful jump) if `adr x10, ...` is tagged.
16169  __ Adr(x10, &done);
16170  __ Tst(x10, kAddressTagMask);
16171  __ B(ne, &done);
16172
16173  // Bail out (before counting a successful jump) if `adrp x11, ...` is tagged.
16174  __ Adrp(x11, &done);
16175  __ Tst(x11, kAddressTagMask);
16176  __ B(ne, &done);
16177
16178  __ Add(x1, x1, 1);  // Count successful iterations.
16179
16180  // Advance to the next tag, then bail out if we've come back around to tag 0.
16181  __ Add(x0, x0, UINT64_C(1) << kAddressTagOffset);
16182  __ Tst(x0, kAddressTagMask);
16183  __ B(eq, &done);
16184
16185  __ Bind(&loop_entry);
16186  __ Br(x0);
16187
16188  __ Bind(&done);
16189
16190  END();
16191  RUN();
16192
16193  ASSERT_EQUAL_64(1 << kAddressTagWidth, x1);
16194
16195  TEARDOWN_CUSTOM();
16196}
16197
16198TEST(neon_3same_addp) {
16199  SETUP();
16200
16201  START();
16202
16203  __ Movi(v0.V2D(),  0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
16204  __ Movi(v1.V2D(),  0x000055aaff55ff00, 0xaa55ff55555500ff);
16205  __ Addp(v16.V16B(), v0.V16B(), v1.V16B());
16206
16207  END();
16208
16209  RUN();
16210  ASSERT_EQUAL_128(0x00ff54ffff54aaff, 0xffffffffffffffff, q16);
16211  TEARDOWN();
16212}
16213
16214TEST(neon_3same_sqdmulh_sqrdmulh) {
16215  SETUP();
16216
16217  START();
16218
16219  __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000);
16220  __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000);
16221  __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
16222  __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
16223
16224  __ Sqdmulh(v16.V4H(), v0.V4H(), v1.V4H());
16225  __ Sqdmulh(v17.V4S(), v2.V4S(), v3.V4S());
16226  __ Sqdmulh(h18, h0, h1);
16227  __ Sqdmulh(s19, s2, s3);
16228
16229  __ Sqrdmulh(v20.V4H(), v0.V4H(), v1.V4H());
16230  __ Sqrdmulh(v21.V4S(), v2.V4S(), v3.V4S());
16231  __ Sqrdmulh(h22, h0, h1);
16232  __ Sqrdmulh(s23, s2, s3);
16233
16234  END();
16235
16236  RUN();
16237  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000100007fff, q16);
16238  ASSERT_EQUAL_128(0x000000017fffffff, 0x000000007fffffff, q17);
16239  ASSERT_EQUAL_128(0, 0x7fff, q18);
16240  ASSERT_EQUAL_128(0, 0x7fffffff, q19);
16241  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000100017fff, q20);
16242  ASSERT_EQUAL_128(0x000000017fffffff, 0x000000017fffffff, q21);
16243  ASSERT_EQUAL_128(0, 0x7fff, q22);
16244  ASSERT_EQUAL_128(0, 0x7fffffff, q23);
16245  TEARDOWN();
16246}
16247
16248TEST(neon_byelement_sqdmulh_sqrdmulh) {
16249  SETUP();
16250
16251  START();
16252
16253  __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000);
16254  __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000);
16255  __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
16256  __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
16257
16258  __ Sqdmulh(v16.V4H(), v0.V4H(), v1.H(), 1);
16259  __ Sqdmulh(v17.V4S(), v2.V4S(), v3.S(), 1);
16260  __ Sqdmulh(h18, h0, v1.H(), 0);
16261  __ Sqdmulh(s19, s2, v3.S(), 0);
16262
16263  __ Sqrdmulh(v20.V4H(), v0.V4H(), v1.H(), 1);
16264  __ Sqrdmulh(v21.V4S(), v2.V4S(), v3.S(), 1);
16265  __ Sqrdmulh(h22, h0, v1.H(), 0);
16266  __ Sqrdmulh(s23, s2, v3.S(), 0);
16267
16268  END();
16269
16270  RUN();
16271  ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000fff0, q16);
16272  ASSERT_EQUAL_128(0x00000000fffffff0, 0x00000000fffffff0, q17);
16273  ASSERT_EQUAL_128(0, 0x7fff, q18);
16274  ASSERT_EQUAL_128(0, 0x7fffffff, q19);
16275  ASSERT_EQUAL_128(0x0000000000000000, 0x000000010001fff0, q20);
16276  ASSERT_EQUAL_128(0x00000001fffffff0, 0x00000001fffffff0, q21);
16277  ASSERT_EQUAL_128(0, 0x7fff, q22);
16278  ASSERT_EQUAL_128(0, 0x7fffffff, q23);
16279  TEARDOWN();
16280}
16281
16282
16283TEST(neon_2regmisc_saddlp) {
16284  SETUP();
16285
16286  START();
16287
16288  __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
16289
16290  __ Saddlp(v16.V8H(), v0.V16B());
16291  __ Saddlp(v17.V4H(), v0.V8B());
16292
16293  __ Saddlp(v18.V4S(), v0.V8H());
16294  __ Saddlp(v19.V2S(), v0.V4H());
16295
16296  __ Saddlp(v20.V2D(), v0.V4S());
16297  __ Saddlp(v21.V1D(), v0.V2S());
16298
16299  END();
16300
16301  RUN();
16302  ASSERT_EQUAL_128(0x0080ffffff010080, 0xff01ffff0080ff01, q16);
16303  ASSERT_EQUAL_128(0x0000000000000000, 0xff01ffff0080ff01, q17);
16304  ASSERT_EQUAL_128(0x0000800000000081, 0xffff7f81ffff8200, q18);
16305  ASSERT_EQUAL_128(0x0000000000000000, 0xffff7f81ffff8200, q19);
16306  ASSERT_EQUAL_128(0x0000000000818000, 0xffffffff82017f81, q20);
16307  ASSERT_EQUAL_128(0x0000000000000000, 0xffffffff82017f81, q21);
16308  TEARDOWN();
16309}
16310
16311TEST(neon_2regmisc_uaddlp) {
16312  SETUP();
16313
16314  START();
16315
16316  __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
16317
16318  __ Uaddlp(v16.V8H(), v0.V16B());
16319  __ Uaddlp(v17.V4H(), v0.V8B());
16320
16321  __ Uaddlp(v18.V4S(), v0.V8H());
16322  __ Uaddlp(v19.V2S(), v0.V4H());
16323
16324  __ Uaddlp(v20.V2D(), v0.V4S());
16325  __ Uaddlp(v21.V1D(), v0.V2S());
16326
16327  END();
16328
16329  RUN();
16330  ASSERT_EQUAL_128(0x008000ff01010080, 0x010100ff00800101, q16);
16331  ASSERT_EQUAL_128(0x0000000000000000, 0x010100ff00800101, q17);
16332  ASSERT_EQUAL_128(0x0000800000010081, 0x00017f8100008200, q18);
16333  ASSERT_EQUAL_128(0x0000000000000000, 0x00017f8100008200, q19);
16334  ASSERT_EQUAL_128(0x0000000100818000, 0x0000000082017f81, q20);
16335  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000082017f81, q21);
16336  TEARDOWN();
16337}
16338
16339TEST(neon_2regmisc_sadalp) {
16340  SETUP();
16341
16342  START();
16343
16344  __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
16345  __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
16346  __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
16347  __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
16348  __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
16349
16350  __ Mov(v16.V16B(), v1.V16B());
16351  __ Mov(v17.V16B(), v1.V16B());
16352  __ Sadalp(v16.V8H(), v0.V16B());
16353  __ Sadalp(v17.V4H(), v0.V8B());
16354
16355  __ Mov(v18.V16B(), v2.V16B());
16356  __ Mov(v19.V16B(), v2.V16B());
16357  __ Sadalp(v18.V4S(), v1.V8H());
16358  __ Sadalp(v19.V2S(), v1.V4H());
16359
16360  __ Mov(v20.V16B(), v3.V16B());
16361  __ Mov(v21.V16B(), v4.V16B());
16362  __ Sadalp(v20.V2D(), v2.V4S());
16363  __ Sadalp(v21.V1D(), v2.V2S());
16364
16365  END();
16366
16367  RUN();
16368  ASSERT_EQUAL_128(0x80808000ff000080, 0xff00ffff00817f00, q16);
16369  ASSERT_EQUAL_128(0x0000000000000000, 0xff00ffff00817f00, q17);
16370  ASSERT_EQUAL_128(0x7fff0001fffffffe, 0xffffffff80007fff, q18);
16371  ASSERT_EQUAL_128(0x0000000000000000, 0xffffffff80007fff, q19);
16372  ASSERT_EQUAL_128(0x7fffffff80000000, 0x800000007ffffffe, q20);
16373  ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
16374  TEARDOWN();
16375}
16376
16377TEST(neon_2regmisc_uadalp) {
16378  SETUP();
16379
16380  START();
16381
16382  __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
16383  __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
16384  __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
16385  __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
16386  __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
16387
16388  __ Mov(v16.V16B(), v1.V16B());
16389  __ Mov(v17.V16B(), v1.V16B());
16390  __ Uadalp(v16.V8H(), v0.V16B());
16391  __ Uadalp(v17.V4H(), v0.V8B());
16392
16393  __ Mov(v18.V16B(), v2.V16B());
16394  __ Mov(v19.V16B(), v2.V16B());
16395  __ Uadalp(v18.V4S(), v1.V8H());
16396  __ Uadalp(v19.V2S(), v1.V4H());
16397
16398  __ Mov(v20.V16B(), v3.V16B());
16399  __ Mov(v21.V16B(), v4.V16B());
16400  __ Uadalp(v20.V2D(), v2.V4S());
16401  __ Uadalp(v21.V1D(), v2.V2S());
16402
16403  END();
16404
16405  RUN();
16406  ASSERT_EQUAL_128(0x8080810001000080, 0x010000ff00818100, q16);
16407  ASSERT_EQUAL_128(0x0000000000000000, 0x010000ff00818100, q17);
16408  ASSERT_EQUAL_128(0x800100010000fffe, 0x0000ffff80007fff, q18);
16409  ASSERT_EQUAL_128(0x0000000000000000, 0x0000ffff80007fff, q19);
16410  ASSERT_EQUAL_128(0x8000000180000000, 0x800000007ffffffe, q20);
16411  ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
16412  TEARDOWN();
16413}
16414
16415TEST(neon_3same_mul) {
16416  SETUP();
16417
16418  START();
16419
16420  __ Movi(v0.V2D(),  0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
16421  __ Movi(v1.V2D(),  0x000055aaff55ff00, 0xaa55ff55555500ff);
16422  __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
16423  __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
16424
16425  __ Mla(v16.V16B(), v0.V16B(), v1.V16B());
16426  __ Mls(v17.V16B(), v0.V16B(), v1.V16B());
16427  __ Mul(v18.V16B(), v0.V16B(), v1.V16B());
16428
16429  END();
16430
16431  RUN();
16432  ASSERT_EQUAL_128(0x0102757605b1b208, 0x5f0a61450db90f56, q16);
16433  ASSERT_EQUAL_128(0x01029192055b5c08, 0xb30ab5d30d630faa, q17);
16434  ASSERT_EQUAL_128(0x0000727200abab00, 0x5600563900ab0056, q18);
16435  TEARDOWN();
16436}
16437
16438
16439
16440TEST(neon_3same_absdiff) {
16441  SETUP();
16442
16443  START();
16444
16445  __ Movi(v0.V2D(),  0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
16446  __ Movi(v1.V2D(),  0x000055aaff55ff00, 0xaa55ff55555500ff);
16447  __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
16448  __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
16449
16450  __ Saba(v16.V16B(), v0.V16B(), v1.V16B());
16451  __ Uaba(v17.V16B(), v0.V16B(), v1.V16B());
16452  __ Sabd(v18.V16B(), v0.V16B(), v1.V16B());
16453  __ Uabd(v19.V16B(), v0.V16B(), v1.V16B());
16454
16455  END();
16456
16457  RUN();
16458  ASSERT_EQUAL_128(0x0202aeaf065c5d5e, 0x5e5f600c62646455, q16);
16459  ASSERT_EQUAL_128(0x0002585904b0b1b2, 0x5e5f600c62b86455, q17);
16460  ASSERT_EQUAL_128(0x0100abab01565656, 0x5555550055565555, q18);
16461  ASSERT_EQUAL_128(0xff005555ffaaaaaa, 0x5555550055aa5555, q19);
16462  TEARDOWN();
16463}
16464
16465
16466TEST(neon_byelement_mul) {
16467  SETUP();
16468
16469  START();
16470
16471  __ Movi(v0.V2D(),  0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
16472  __ Movi(v1.V2D(),  0x000155aaff55ff00, 0xaa55ff55555500ff);
16473
16474
16475  __ Mul(v16.V4H(), v0.V4H(), v1.H(), 0);
16476  __ Mul(v17.V8H(), v0.V8H(), v1.H(), 7);
16477  __ Mul(v18.V2S(), v0.V2S(), v1.S(), 0);
16478  __ Mul(v19.V4S(), v0.V4S(), v1.S(), 3);
16479
16480  __ Movi(v20.V2D(), 0x0000000000000000, 0x0001000200030004);
16481  __ Movi(v21.V2D(), 0x0005000600070008, 0x0001000200030004);
16482  __ Mla(v20.V4H(), v0.V4H(), v1.H(), 0);
16483  __ Mla(v21.V8H(), v0.V8H(), v1.H(), 7);
16484
16485  __ Movi(v22.V2D(), 0x0000000000000000, 0x0000000200000004);
16486  __ Movi(v23.V2D(), 0x0000000600000008, 0x0000000200000004);
16487  __ Mla(v22.V2S(), v0.V2S(), v1.S(), 0);
16488  __ Mla(v23.V4S(), v0.V4S(), v1.S(), 3);
16489
16490  __ Movi(v24.V2D(), 0x0000000000000000, 0x0100aaabfe015456);
16491  __ Movi(v25.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
16492  __ Mls(v24.V4H(), v0.V4H(), v1.H(), 0);
16493  __ Mls(v25.V8H(), v0.V8H(), v1.H(), 7);
16494
16495  __ Movi(v26.V2D(), 0x0000000000000000, 0xc8e2aaabe1c85456);
16496  __ Movi(v27.V2D(), 0x39545572c6aa54e4, 0x39545572c6aa54e4);
16497  __ Mls(v26.V2S(), v0.V2S(), v1.S(), 0);
16498  __ Mls(v27.V4S(), v0.V4S(), v1.S(), 3);
16499
16500  END();
16501
16502  RUN();
16503  ASSERT_EQUAL_128(0x0000000000000000, 0x0100aaabfe015456, q16);
16504  ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q17);
16505  ASSERT_EQUAL_128(0x0000000000000000, 0xc8e2aaabe1c85456, q18);
16506  ASSERT_EQUAL_128(0x39545572c6aa54e4, 0x39545572c6aa54e4, q19);
16507
16508  ASSERT_EQUAL_128(0x0000000000000000, 0x0101aaadfe04545a, q20);
16509  ASSERT_EQUAL_128(0xff05aa5b010655b2, 0xff01aa57010255ae, q21);
16510  ASSERT_EQUAL_128(0x0000000000000000, 0xc8e2aaade1c8545a, q22);
16511  ASSERT_EQUAL_128(0x39545578c6aa54ec, 0x39545574c6aa54e8, q23);
16512
16513  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
16514  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25);
16515  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q26);
16516  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q27);
16517  TEARDOWN();
16518}
16519
16520
16521TEST(neon_byelement_mull) {
16522  SETUP();
16523
16524  START();
16525
16526  __ Movi(v0.V2D(),  0xaa55ff55555500ff, 0xff00aa5500ff55aa);
16527  __ Movi(v1.V2D(),  0x000155aaff55ff00, 0xaa55ff55555500ff);
16528
16529
16530  __ Smull(v16.V4S(),  v0.V4H(), v1.H(), 7);
16531  __ Smull2(v17.V4S(), v0.V8H(), v1.H(), 0);
16532  __ Umull(v18.V4S(),  v0.V4H(), v1.H(), 7);
16533  __ Umull2(v19.V4S(), v0.V8H(), v1.H(), 0);
16534
16535  __ Movi(v20.V2D(), 0x0000000100000002, 0x0000000200000001);
16536  __ Movi(v21.V2D(), 0x0000000100000002, 0x0000000200000001);
16537  __ Movi(v22.V2D(), 0x0000000100000002, 0x0000000200000001);
16538  __ Movi(v23.V2D(), 0x0000000100000002, 0x0000000200000001);
16539
16540  __ Smlal(v20.V4S(),  v0.V4H(), v1.H(), 7);
16541  __ Smlal2(v21.V4S(), v0.V8H(), v1.H(), 0);
16542  __ Umlal(v22.V4S(),  v0.V4H(), v1.H(), 7);
16543  __ Umlal2(v23.V4S(), v0.V8H(), v1.H(), 0);
16544
16545  __ Movi(v24.V2D(), 0xffffff00ffffaa55, 0x000000ff000055aa);
16546  __ Movi(v25.V2D(), 0xffaaaaabffff55ab, 0x0054ffab0000fe01);
16547  __ Movi(v26.V2D(), 0x0000ff000000aa55, 0x000000ff000055aa);
16548  __ Movi(v27.V2D(), 0x00a9aaab00fe55ab, 0x0054ffab0000fe01);
16549
16550  __ Smlsl(v24.V4S(),  v0.V4H(), v1.H(), 7);
16551  __ Smlsl2(v25.V4S(), v0.V8H(), v1.H(), 0);
16552  __ Umlsl(v26.V4S(),  v0.V4H(), v1.H(), 7);
16553  __ Umlsl2(v27.V4S(), v0.V8H(), v1.H(), 0);
16554
16555  END();
16556
16557  RUN();
16558
16559  ASSERT_EQUAL_128(0xffffff00ffffaa55, 0x000000ff000055aa, q16);
16560  ASSERT_EQUAL_128(0xffaaaaabffff55ab, 0x0054ffab0000fe01, q17);
16561  ASSERT_EQUAL_128(0x0000ff000000aa55, 0x000000ff000055aa, q18);
16562  ASSERT_EQUAL_128(0x00a9aaab00fe55ab, 0x0054ffab0000fe01, q19);
16563
16564  ASSERT_EQUAL_128(0xffffff01ffffaa57, 0x00000101000055ab, q20);
16565  ASSERT_EQUAL_128(0xffaaaaacffff55ad, 0x0054ffad0000fe02, q21);
16566  ASSERT_EQUAL_128(0x0000ff010000aa57, 0x00000101000055ab, q22);
16567  ASSERT_EQUAL_128(0x00a9aaac00fe55ad, 0x0054ffad0000fe02, q23);
16568
16569  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
16570  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25);
16571  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q26);
16572  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q27);
16573
16574  TEARDOWN();
16575}
16576
16577
16578TEST(neon_byelement_sqdmull) {
16579  SETUP();
16580
16581  START();
16582
16583  __ Movi(v0.V2D(), 0xaa55ff55555500ff, 0xff00aa5500ff55aa);
16584  __ Movi(v1.V2D(), 0x000155aaff55ff00, 0xaa55ff55555500ff);
16585
16586  __ Sqdmull(v16.V4S(), v0.V4H(), v1.H(), 7);
16587  __ Sqdmull2(v17.V4S(), v0.V8H(), v1.H(), 0);
16588  __ Sqdmull(s18, h0, v1.H(), 7);
16589
16590  __ Movi(v20.V2D(), 0x0000000100000002, 0x0000000200000001);
16591  __ Movi(v21.V2D(), 0x0000000100000002, 0x0000000200000001);
16592  __ Movi(v22.V2D(), 0x0000000100000002, 0x0000000200000001);
16593
16594  __ Sqdmlal(v20.V4S(), v0.V4H(), v1.H(), 7);
16595  __ Sqdmlal2(v21.V4S(), v0.V8H(), v1.H(), 0);
16596  __ Sqdmlal(s22, h0, v1.H(), 7);
16597
16598  __ Movi(v24.V2D(), 0xfffffe00ffff54aa, 0x000001fe0000ab54);
16599  __ Movi(v25.V2D(), 0xff555556fffeab56, 0x00a9ff560001fc02);
16600  __ Movi(v26.V2D(), 0x0000000000000000, 0x000000000000ab54);
16601
16602  __ Sqdmlsl(v24.V4S(), v0.V4H(), v1.H(), 7);
16603  __ Sqdmlsl2(v25.V4S(), v0.V8H(), v1.H(), 0);
16604  __ Sqdmlsl(s26, h0, v1.H(), 7);
16605
16606  END();
16607
16608  RUN();
16609
16610  ASSERT_EQUAL_128(0xfffffe00ffff54aa, 0x000001fe0000ab54, q16);
16611  ASSERT_EQUAL_128(0xff555556fffeab56, 0x00a9ff560001fc02, q17);
16612  ASSERT_EQUAL_128(0, 0x0000ab54, q18);
16613
16614  ASSERT_EQUAL_128(0xfffffe01ffff54ac, 0x000002000000ab55, q20);
16615  ASSERT_EQUAL_128(0xff555557fffeab58, 0x00a9ff580001fc03, q21);
16616  ASSERT_EQUAL_128(0, 0x0000ab55, q22);
16617
16618  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
16619  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25);
16620  ASSERT_EQUAL_128(0, 0x00000000, q26);
16621
16622  TEARDOWN();
16623}
16624
16625
16626TEST(neon_3diff_absdiff) {
16627  SETUP();
16628
16629  START();
16630
16631  __ Movi(v0.V2D(),  0xff00aa5500ff55ab, 0xff00aa5500ff55aa);
16632  __ Movi(v1.V2D(),  0x000055aaff55ff00, 0xaa55ff55555500ff);
16633  __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
16634  __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
16635  __ Movi(v18.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
16636  __ Movi(v19.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
16637
16638  __ Sabal(v16.V8H(), v0.V8B(), v1.V8B());
16639  __ Uabal(v17.V8H(), v0.V8B(), v1.V8B());
16640  __ Sabal2(v18.V8H(), v0.V16B(), v1.V16B());
16641  __ Uabal2(v19.V8H(), v0.V16B(), v1.V16B());
16642
16643  END();
16644
16645  RUN();
16646  ASSERT_EQUAL_128(0x01570359055b0708, 0x095f0b620d630f55, q16);
16647  ASSERT_EQUAL_128(0x01570359055b0708, 0x095f0bb60d630f55, q17);
16648  ASSERT_EQUAL_128(0x0103030405b107b3, 0x090b0b620d640f55, q18);
16649  ASSERT_EQUAL_128(0x02010304055b075d, 0x0a090bb60db80fab, q19);
16650  TEARDOWN();
16651}
16652
16653
16654TEST(neon_3diff_sqdmull) {
16655  SETUP();
16656
16657  START();
16658
16659  __ Movi(v0.V2D(),  0x7fff7fff80008000, 0x80007fff7fff8000);
16660  __ Movi(v1.V2D(),  0x80007fff7fff8000, 0x7fff7fff80008000);
16661  __ Movi(v2.V2D(),  0x800000007fffffff, 0x7fffffff80000000);
16662  __ Movi(v3.V2D(),  0x8000000080000000, 0x8000000080000000);
16663
16664  __ Sqdmull(v16.V4S(),  v0.V4H(), v1.V4H());
16665  __ Sqdmull2(v17.V4S(), v0.V8H(), v1.V8H());
16666  __ Sqdmull(v18.V2D(),  v2.V2S(), v3.V2S());
16667  __ Sqdmull2(v19.V2D(), v2.V4S(), v3.V4S());
16668  __ Sqdmull(s20, h0, h1);
16669  __ Sqdmull(d21, s2, s3);
16670
16671  END();
16672
16673  RUN();
16674  ASSERT_EQUAL_128(0x800100007ffe0002, 0x800100007fffffff, q16);
16675  ASSERT_EQUAL_128(0x800100007ffe0002, 0x800100007fffffff, q17);
16676  ASSERT_EQUAL_128(0x8000000100000000, 0x7fffffffffffffff, q18);
16677  ASSERT_EQUAL_128(0x7fffffffffffffff, 0x8000000100000000, q19);
16678  ASSERT_EQUAL_128(0, 0x7fffffff, q20);
16679  ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q21);
16680  TEARDOWN();
16681}
16682
16683
16684TEST(neon_3diff_sqdmlal) {
16685  SETUP();
16686
16687  START();
16688
16689  __ Movi(v0.V2D(),  0x7fff7fff80008000, 0x80007fff7fff8000);
16690  __ Movi(v1.V2D(),  0x80007fff7fff8000, 0x7fff7fff80008000);
16691  __ Movi(v2.V2D(),  0x800000007fffffff, 0x7fffffff80000000);
16692  __ Movi(v3.V2D(),  0x8000000080000000, 0x8000000080000000);
16693
16694  __ Movi(v16.V2D(), 0xffffffff00000001, 0x8fffffff00000001);
16695  __ Movi(v17.V2D(), 0x00000001ffffffff, 0x00000001ffffffff);
16696  __ Movi(v18.V2D(), 0x8000000000000001, 0x0000000000000001);
16697  __ Movi(v19.V2D(), 0xffffffffffffffff, 0x7fffffffffffffff);
16698  __ Movi(v20.V2D(), 0, 0x00000001);
16699  __ Movi(v21.V2D(), 0, 0x00000001);
16700
16701  __ Sqdmlal(v16.V4S(),  v0.V4H(), v1.V4H());
16702  __ Sqdmlal2(v17.V4S(), v0.V8H(), v1.V8H());
16703  __ Sqdmlal(v18.V2D(),  v2.V2S(), v3.V2S());
16704  __ Sqdmlal2(v19.V2D(), v2.V4S(), v3.V4S());
16705  __ Sqdmlal(s20, h0, h1);
16706  __ Sqdmlal(d21, s2, s3);
16707
16708  END();
16709
16710  RUN();
16711  ASSERT_EQUAL_128(0x8000ffff7ffe0003, 0x800000007fffffff, q16);
16712  ASSERT_EQUAL_128(0x800100017ffe0001, 0x800100017ffffffe, q17);
16713  ASSERT_EQUAL_128(0x8000000000000000, 0x7fffffffffffffff, q18);
16714  ASSERT_EQUAL_128(0x7ffffffffffffffe, 0x00000000ffffffff, q19);
16715  ASSERT_EQUAL_128(0, 0x7fffffff, q20);
16716  ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q21);
16717  TEARDOWN();
16718}
16719
16720
16721TEST(neon_3diff_sqdmlsl) {
16722  SETUP();
16723
16724  START();
16725
16726  __ Movi(v0.V2D(),  0x7fff7fff80008000, 0x80007fff7fff8000);
16727  __ Movi(v1.V2D(),  0x80007fff7fff8000, 0x7fff7fff80008000);
16728  __ Movi(v2.V2D(),  0x800000007fffffff, 0x7fffffff80000000);
16729  __ Movi(v3.V2D(),  0x8000000080000000, 0x8000000080000000);
16730
16731  __ Movi(v16.V2D(), 0xffffffff00000001, 0x7ffffffe80000001);
16732  __ Movi(v17.V2D(), 0x00000001ffffffff, 0x7ffffffe00000001);
16733  __ Movi(v18.V2D(), 0x8000000000000001, 0x8000000000000001);
16734  __ Movi(v19.V2D(), 0xfffffffffffffffe, 0x7fffffffffffffff);
16735  __ Movi(v20.V2D(), 0, 0x00000001);
16736  __ Movi(v21.V2D(), 0, 0x00000001);
16737
16738  __ Sqdmlsl(v16.V4S(),  v0.V4H(), v1.V4H());
16739  __ Sqdmlsl2(v17.V4S(), v0.V8H(), v1.V8H());
16740  __ Sqdmlsl(v18.V2D(),  v2.V2S(), v3.V2S());
16741  __ Sqdmlsl2(v19.V2D(), v2.V4S(), v3.V4S());
16742  __ Sqdmlsl(s20, h0, h1);
16743  __ Sqdmlsl(d21, s2, s3);
16744
16745  END();
16746
16747  RUN();
16748  ASSERT_EQUAL_128(0x7ffeffff8001ffff, 0x7fffffff80000000, q16);
16749  ASSERT_EQUAL_128(0x7fff00018001fffd, 0x7fffffff80000002, q17);
16750  ASSERT_EQUAL_128(0xffffffff00000001, 0x8000000000000000, q18);
16751  ASSERT_EQUAL_128(0x8000000000000000, 0x7fffffffffffffff, q19);
16752  ASSERT_EQUAL_128(0, 0x80000002, q20);
16753  ASSERT_EQUAL_128(0, 0x8000000000000002, q21);
16754
16755  TEARDOWN();
16756}
16757
16758
16759TEST(neon_3diff_mla) {
16760  SETUP();
16761
16762  START();
16763
16764  __ Movi(v0.V2D(),  0xff00aa5500ff55ab, 0xff00aa5500ff55aa);
16765  __ Movi(v1.V2D(),  0x000055aaff55ff00, 0xaa55ff55555500ff);
16766  __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
16767  __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
16768  __ Movi(v18.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
16769  __ Movi(v19.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
16770
16771  __ Smlal(v16.V8H(), v0.V8B(), v1.V8B());
16772  __ Umlal(v17.V8H(), v0.V8B(), v1.V8B());
16773  __ Smlal2(v18.V8H(), v0.V16B(), v1.V16B());
16774  __ Umlal2(v19.V8H(), v0.V16B(), v1.V16B());
16775
16776  END();
16777
16778  RUN();
16779  ASSERT_EQUAL_128(0x01580304055c2341, 0x090a0ab70d0e0f56, q16);
16780  ASSERT_EQUAL_128(0xaa580304ae5c2341, 0x090a5fb70d0eb856, q17);
16781  ASSERT_EQUAL_128(0x01020304e878ea7a, 0x090a0ab70cb90f00, q18);
16782  ASSERT_EQUAL_128(0x010203043d783f7a, 0x090a5fb761b90f00, q19);
16783  TEARDOWN();
16784}
16785
16786
16787TEST(neon_3diff_mls) {
16788  SETUP();
16789
16790  START();
16791
16792  __ Movi(v0.V2D(),  0xff00aa5500ff55ab, 0xff00aa5500ff55aa);
16793  __ Movi(v1.V2D(),  0x000055aaff55ff00, 0xaa55ff55555500ff);
16794  __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
16795  __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
16796  __ Movi(v18.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
16797  __ Movi(v19.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
16798
16799  __ Smlsl(v16.V8H(), v0.V8B(), v1.V8B());
16800  __ Umlsl(v17.V8H(), v0.V8B(), v1.V8B());
16801  __ Smlsl2(v18.V8H(), v0.V16B(), v1.V16B());
16802  __ Umlsl2(v19.V8H(), v0.V16B(), v1.V16B());
16803
16804  END();
16805
16806  RUN();
16807  ASSERT_EQUAL_128(0x00ac030404b0eacf, 0x090a0b610d0e0eaa, q16);
16808  ASSERT_EQUAL_128(0x57ac03045bb0eacf, 0x090ab6610d0e65aa, q17);
16809  ASSERT_EQUAL_128(0x0102030421942396, 0x090a0b610d630f00, q18);
16810  ASSERT_EQUAL_128(0x01020304cc94ce96, 0x090ab661b8630f00, q19);
16811  TEARDOWN();
16812}
16813
16814
16815TEST(neon_3same_compare) {
16816  SETUP();
16817
16818  START();
16819
16820  __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
16821  __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
16822
16823  __ Cmeq(v16.V16B(), v0.V16B(), v0.V16B());
16824  __ Cmeq(v17.V16B(), v0.V16B(), v1.V16B());
16825  __ Cmge(v18.V16B(), v0.V16B(), v0.V16B());
16826  __ Cmge(v19.V16B(), v0.V16B(), v1.V16B());
16827  __ Cmgt(v20.V16B(), v0.V16B(), v0.V16B());
16828  __ Cmgt(v21.V16B(), v0.V16B(), v1.V16B());
16829  __ Cmhi(v22.V16B(), v0.V16B(), v0.V16B());
16830  __ Cmhi(v23.V16B(), v0.V16B(), v1.V16B());
16831  __ Cmhs(v24.V16B(), v0.V16B(), v0.V16B());
16832  __ Cmhs(v25.V16B(), v0.V16B(), v1.V16B());
16833
16834  END();
16835
16836  RUN();
16837  ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q16);
16838  ASSERT_EQUAL_128(0x00ff000000000000, 0x000000ff00000000, q17);
16839  ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q18);
16840  ASSERT_EQUAL_128(0x00ff00ffff00ff00, 0xff0000ff0000ff00, q19);
16841  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q20);
16842  ASSERT_EQUAL_128(0x000000ffff00ff00, 0xff0000000000ff00, q21);
16843  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q22);
16844  ASSERT_EQUAL_128(0xff00ff0000ff00ff, 0xff00000000ffff00, q23);
16845  ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q24);
16846  ASSERT_EQUAL_128(0xffffff0000ff00ff, 0xff0000ff00ffff00, q25);
16847  TEARDOWN();
16848}
16849
16850
16851TEST(neon_3same_scalar_compare) {
16852  SETUP();
16853
16854  START();
16855
16856  __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
16857  __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
16858
16859  __ Cmeq(d16, d0, d0);
16860  __ Cmeq(d17, d0, d1);
16861  __ Cmeq(d18, d1, d0);
16862  __ Cmge(d19, d0, d0);
16863  __ Cmge(d20, d0, d1);
16864  __ Cmge(d21, d1, d0);
16865  __ Cmgt(d22, d0, d0);
16866  __ Cmgt(d23, d0, d1);
16867  __ Cmhi(d24, d0, d0);
16868  __ Cmhi(d25, d0, d1);
16869  __ Cmhs(d26, d0, d0);
16870  __ Cmhs(d27, d0, d1);
16871  __ Cmhs(d28, d1, d0);
16872
16873  END();
16874
16875  RUN();
16876
16877  ASSERT_EQUAL_128(0, 0xffffffffffffffff, q16);
16878  ASSERT_EQUAL_128(0, 0x0000000000000000, q17);
16879  ASSERT_EQUAL_128(0, 0x0000000000000000, q18);
16880  ASSERT_EQUAL_128(0, 0xffffffffffffffff, q19);
16881  ASSERT_EQUAL_128(0, 0xffffffffffffffff, q20);
16882  ASSERT_EQUAL_128(0, 0x0000000000000000, q21);
16883  ASSERT_EQUAL_128(0, 0x0000000000000000, q22);
16884  ASSERT_EQUAL_128(0, 0xffffffffffffffff, q23);
16885  ASSERT_EQUAL_128(0, 0x0000000000000000, q24);
16886  ASSERT_EQUAL_128(0, 0xffffffffffffffff, q25);
16887  ASSERT_EQUAL_128(0, 0xffffffffffffffff, q26);
16888  ASSERT_EQUAL_128(0, 0xffffffffffffffff, q27);
16889  ASSERT_EQUAL_128(0, 0x0000000000000000, q28);
16890
16891  TEARDOWN();
16892}
16893
16894TEST(neon_2regmisc_fcmeq) {
16895  SETUP();
16896
16897  START();
16898
16899  __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);  // Zero.
16900  __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff);  // Nan.
16901  __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000);  // < 0.
16902  __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000);  // > 0.
16903
16904  __ Fcmeq(s16, s0, 0.0);
16905  __ Fcmeq(s17, s1, 0.0);
16906  __ Fcmeq(s18, s2, 0.0);
16907  __ Fcmeq(d19, d0, 0.0);
16908  __ Fcmeq(d20, d1, 0.0);
16909  __ Fcmeq(d21, d2, 0.0);
16910  __ Fcmeq(v22.V2S(), v0.V2S(), 0.0);
16911  __ Fcmeq(v23.V4S(), v1.V4S(), 0.0);
16912  __ Fcmeq(v24.V2D(), v1.V2D(), 0.0);
16913  __ Fcmeq(v25.V2D(), v2.V2D(), 0.0);
16914
16915  END();
16916
16917  RUN();
16918  ASSERT_EQUAL_128(0, 0xffffffff, q16);
16919  ASSERT_EQUAL_128(0, 0x00000000, q17);
16920  ASSERT_EQUAL_128(0, 0x00000000, q18);
16921  ASSERT_EQUAL_128(0, 0xffffffffffffffff, q19);
16922  ASSERT_EQUAL_128(0, 0x0000000000000000, q20);
16923  ASSERT_EQUAL_128(0, 0x0000000000000000, q21);
16924  ASSERT_EQUAL_128(0, 0xffffffffffffffff, q22);
16925  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
16926  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
16927  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25);
16928  TEARDOWN();
16929}
16930
16931TEST(neon_2regmisc_fcmge) {
16932  SETUP();
16933
16934  START();
16935
16936  __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);  // Zero.
16937  __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff);  // Nan.
16938  __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000);  // < 0.
16939  __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000);  // > 0.
16940
16941  __ Fcmge(s16, s0, 0.0);
16942  __ Fcmge(s17, s1, 0.0);
16943  __ Fcmge(s18, s2, 0.0);
16944  __ Fcmge(d19, d0, 0.0);
16945  __ Fcmge(d20, d1, 0.0);
16946  __ Fcmge(d21, d3, 0.0);
16947  __ Fcmge(v22.V2S(), v0.V2S(), 0.0);
16948  __ Fcmge(v23.V4S(), v1.V4S(), 0.0);
16949  __ Fcmge(v24.V2D(), v1.V2D(), 0.0);
16950  __ Fcmge(v25.V2D(), v3.V2D(), 0.0);
16951
16952  END();
16953
16954  RUN();
16955  ASSERT_EQUAL_128(0, 0xffffffff, q16);
16956  ASSERT_EQUAL_128(0, 0x00000000, q17);
16957  ASSERT_EQUAL_128(0, 0x00000000, q18);
16958  ASSERT_EQUAL_128(0, 0xffffffffffffffff, q19);
16959  ASSERT_EQUAL_128(0, 0x0000000000000000, q20);
16960  ASSERT_EQUAL_128(0, 0xffffffffffffffff, q21);
16961  ASSERT_EQUAL_128(0, 0xffffffffffffffff, q22);
16962  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
16963  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
16964  ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25);
16965  TEARDOWN();
16966}
16967
16968
16969TEST(neon_2regmisc_fcmgt) {
16970  SETUP();
16971
16972  START();
16973
16974  __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);  // Zero.
16975  __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff);  // Nan.
16976  __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000);  // < 0.
16977  __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000);  // > 0.
16978
16979  __ Fcmgt(s16, s0, 0.0);
16980  __ Fcmgt(s17, s1, 0.0);
16981  __ Fcmgt(s18, s2, 0.0);
16982  __ Fcmgt(d19, d0, 0.0);
16983  __ Fcmgt(d20, d1, 0.0);
16984  __ Fcmgt(d21, d3, 0.0);
16985  __ Fcmgt(v22.V2S(), v0.V2S(), 0.0);
16986  __ Fcmgt(v23.V4S(), v1.V4S(), 0.0);
16987  __ Fcmgt(v24.V2D(), v1.V2D(), 0.0);
16988  __ Fcmgt(v25.V2D(), v3.V2D(), 0.0);
16989
16990  END();
16991
16992  RUN();
16993  ASSERT_EQUAL_128(0, 0x00000000, q16);
16994  ASSERT_EQUAL_128(0, 0x00000000, q17);
16995  ASSERT_EQUAL_128(0, 0x00000000, q18);
16996  ASSERT_EQUAL_128(0, 0x0000000000000000, q19);
16997  ASSERT_EQUAL_128(0, 0x0000000000000000, q20);
16998  ASSERT_EQUAL_128(0, 0xffffffffffffffff, q21);
16999  ASSERT_EQUAL_128(0, 0x0000000000000000, q22);
17000  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
17001  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
17002  ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25);
17003  TEARDOWN();
17004}
17005
17006TEST(neon_2regmisc_fcmle) {
17007  SETUP();
17008
17009  START();
17010
17011  __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);  // Zero.
17012  __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff);  // Nan.
17013  __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000);  // < 0.
17014  __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000);  // > 0.
17015
17016  __ Fcmle(s16, s0, 0.0);
17017  __ Fcmle(s17, s1, 0.0);
17018  __ Fcmle(s18, s3, 0.0);
17019  __ Fcmle(d19, d0, 0.0);
17020  __ Fcmle(d20, d1, 0.0);
17021  __ Fcmle(d21, d2, 0.0);
17022  __ Fcmle(v22.V2S(), v0.V2S(), 0.0);
17023  __ Fcmle(v23.V4S(), v1.V4S(), 0.0);
17024  __ Fcmle(v24.V2D(), v1.V2D(), 0.0);
17025  __ Fcmle(v25.V2D(), v2.V2D(), 0.0);
17026
17027  END();
17028
17029  RUN();
17030  ASSERT_EQUAL_128(0, 0xffffffff, q16);
17031  ASSERT_EQUAL_128(0, 0x00000000, q17);
17032  ASSERT_EQUAL_128(0, 0x00000000, q18);
17033  ASSERT_EQUAL_128(0, 0xffffffffffffffff, q19);
17034  ASSERT_EQUAL_128(0, 0x0000000000000000, q20);
17035  ASSERT_EQUAL_128(0, 0xffffffffffffffff, q21);
17036  ASSERT_EQUAL_128(0, 0xffffffffffffffff, q22);
17037  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
17038  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
17039  ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25);
17040  TEARDOWN();
17041}
17042
17043
17044TEST(neon_2regmisc_fcmlt) {
17045  SETUP();
17046
17047  START();
17048
17049  __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);  // Zero.
17050  __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff);  // Nan.
17051  __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000);  // < 0.
17052  __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000);  // > 0.
17053
17054  __ Fcmlt(s16, s0, 0.0);
17055  __ Fcmlt(s17, s1, 0.0);
17056  __ Fcmlt(s18, s3, 0.0);
17057  __ Fcmlt(d19, d0, 0.0);
17058  __ Fcmlt(d20, d1, 0.0);
17059  __ Fcmlt(d21, d2, 0.0);
17060  __ Fcmlt(v22.V2S(), v0.V2S(), 0.0);
17061  __ Fcmlt(v23.V4S(), v1.V4S(), 0.0);
17062  __ Fcmlt(v24.V2D(), v1.V2D(), 0.0);
17063  __ Fcmlt(v25.V2D(), v2.V2D(), 0.0);
17064
17065  END();
17066
17067  RUN();
17068  ASSERT_EQUAL_128(0, 0x00000000, q16);
17069  ASSERT_EQUAL_128(0, 0x00000000, q17);
17070  ASSERT_EQUAL_128(0, 0x00000000, q18);
17071  ASSERT_EQUAL_128(0, 0x0000000000000000, q19);
17072  ASSERT_EQUAL_128(0, 0x0000000000000000, q20);
17073  ASSERT_EQUAL_128(0, 0xffffffffffffffff, q21);
17074  ASSERT_EQUAL_128(0, 0x0000000000000000, q22);
17075  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
17076  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
17077  ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25);
17078  TEARDOWN();
17079}
17080
17081TEST(neon_2regmisc_cmeq) {
17082  SETUP();
17083
17084  START();
17085
17086  __ Movi(v0.V2D(), 0x0001000200030004, 0x0000000000000000);
17087  __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
17088
17089  __ Cmeq(v16.V8B(),  v1.V8B(),  0);
17090  __ Cmeq(v17.V16B(), v1.V16B(), 0);
17091  __ Cmeq(v18.V4H(),  v1.V4H(),  0);
17092  __ Cmeq(v19.V8H(),  v1.V8H(),  0);
17093  __ Cmeq(v20.V2S(),  v0.V2S(),  0);
17094  __ Cmeq(v21.V4S(),  v0.V4S(),  0);
17095  __ Cmeq(d22,  d0,  0);
17096  __ Cmeq(d23,  d1,  0);
17097  __ Cmeq(v24.V2D(),  v0.V2D(),  0);
17098
17099  END();
17100
17101  RUN();
17102  ASSERT_EQUAL_128(0x0000000000000000, 0xffff00000000ff00, q16);
17103  ASSERT_EQUAL_128(0xffff0000000000ff, 0xffff00000000ff00, q17);
17104  ASSERT_EQUAL_128(0x0000000000000000, 0xffff000000000000, q18);
17105  ASSERT_EQUAL_128(0xffff000000000000, 0xffff000000000000, q19);
17106  ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q20);
17107  ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q21);
17108  ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q22);
17109  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
17110  ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q24);
17111  TEARDOWN();
17112}
17113
17114
17115TEST(neon_2regmisc_cmge) {
17116  SETUP();
17117
17118  START();
17119
17120  __ Movi(v0.V2D(), 0xff01000200030004, 0x0000000000000000);
17121  __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
17122
17123  __ Cmge(v16.V8B(),  v1.V8B(),  0);
17124  __ Cmge(v17.V16B(), v1.V16B(), 0);
17125  __ Cmge(v18.V4H(),  v1.V4H(),  0);
17126  __ Cmge(v19.V8H(),  v1.V8H(),  0);
17127  __ Cmge(v20.V2S(),  v0.V2S(),  0);
17128  __ Cmge(v21.V4S(),  v0.V4S(),  0);
17129  __ Cmge(d22,  d0,  0);
17130  __ Cmge(d23,  d1,  0);
17131  __ Cmge(v24.V2D(),  v0.V2D(),  0);
17132
17133  END();
17134
17135  RUN();
17136  ASSERT_EQUAL_128(0x0000000000000000, 0xffff00ffffffff00, q16);
17137  ASSERT_EQUAL_128(0xffffff0000ff00ff, 0xffff00ffffffff00, q17);
17138  ASSERT_EQUAL_128(0x0000000000000000, 0xffff0000ffffffff, q18);
17139  ASSERT_EQUAL_128(0xffffffff00000000, 0xffff0000ffffffff, q19);
17140  ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q20);
17141  ASSERT_EQUAL_128(0x00000000ffffffff, 0xffffffffffffffff, q21);
17142  ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q22);
17143  ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q23);
17144  ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q24);
17145  TEARDOWN();
17146}
17147
17148
17149TEST(neon_2regmisc_cmlt) {
17150  SETUP();
17151
17152  START();
17153
17154  __ Movi(v0.V2D(), 0x0001000200030004, 0xff00000000000000);
17155  __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
17156
17157  __ Cmlt(v16.V8B(),  v1.V8B(),  0);
17158  __ Cmlt(v17.V16B(), v1.V16B(), 0);
17159  __ Cmlt(v18.V4H(),  v1.V4H(),  0);
17160  __ Cmlt(v19.V8H(),  v1.V8H(),  0);
17161  __ Cmlt(v20.V2S(),  v1.V2S(),  0);
17162  __ Cmlt(v21.V4S(),  v1.V4S(),  0);
17163  __ Cmlt(d22,  d0,  0);
17164  __ Cmlt(d23,  d1,  0);
17165  __ Cmlt(v24.V2D(),  v0.V2D(),  0);
17166
17167  END();
17168
17169  RUN();
17170  ASSERT_EQUAL_128(0x0000000000000000, 0x0000ff00000000ff, q16);
17171  ASSERT_EQUAL_128(0x000000ffff00ff00, 0x0000ff00000000ff, q17);
17172  ASSERT_EQUAL_128(0x0000000000000000, 0x0000ffff00000000, q18);
17173  ASSERT_EQUAL_128(0x00000000ffffffff, 0x0000ffff00000000, q19);
17174  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q20);
17175  ASSERT_EQUAL_128(0x00000000ffffffff, 0x0000000000000000, q21);
17176  ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q22);
17177  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
17178  ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q24);
17179  TEARDOWN();
17180}
17181
17182
17183TEST(neon_2regmisc_cmle) {
17184  SETUP();
17185
17186  START();
17187
17188  __ Movi(v0.V2D(), 0x0001000200030004, 0x0000000000000000);
17189  __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
17190
17191  __ Cmle(v16.V8B(),  v1.V8B(),  0);
17192  __ Cmle(v17.V16B(), v1.V16B(), 0);
17193  __ Cmle(v18.V4H(),  v1.V4H(),  0);
17194  __ Cmle(v19.V8H(),  v1.V8H(),  0);
17195  __ Cmle(v20.V2S(),  v1.V2S(),  0);
17196  __ Cmle(v21.V4S(),  v1.V4S(),  0);
17197  __ Cmle(d22,  d0,  0);
17198  __ Cmle(d23,  d1,  0);
17199  __ Cmle(v24.V2D(),  v0.V2D(),  0);
17200
17201  END();
17202
17203  RUN();
17204  ASSERT_EQUAL_128(0x0000000000000000, 0xffffff000000ffff, q16);
17205  ASSERT_EQUAL_128(0xffff00ffff00ffff, 0xffffff000000ffff, q17);
17206  ASSERT_EQUAL_128(0x0000000000000000, 0xffffffff00000000, q18);
17207  ASSERT_EQUAL_128(0xffff0000ffffffff, 0xffffffff00000000, q19);
17208  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q20);
17209  ASSERT_EQUAL_128(0x00000000ffffffff, 0x0000000000000000, q21);
17210  ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q22);
17211  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
17212  ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q24);
17213  TEARDOWN();
17214}
17215
17216
17217TEST(neon_2regmisc_cmgt) {
17218  SETUP();
17219
17220  START();
17221
17222  __ Movi(v0.V2D(), 0x0001000200030004, 0x0000000000000000);
17223  __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
17224
17225  __ Cmgt(v16.V8B(),  v1.V8B(),  0);
17226  __ Cmgt(v17.V16B(), v1.V16B(), 0);
17227  __ Cmgt(v18.V4H(),  v1.V4H(),  0);
17228  __ Cmgt(v19.V8H(),  v1.V8H(),  0);
17229  __ Cmgt(v20.V2S(),  v0.V2S(),  0);
17230  __ Cmgt(v21.V4S(),  v0.V4S(),  0);
17231  __ Cmgt(d22,  d0,  0);
17232  __ Cmgt(d23,  d1,  0);
17233  __ Cmgt(v24.V2D(),  v0.V2D(),  0);
17234
17235  END();
17236
17237  RUN();
17238  ASSERT_EQUAL_128(0x0000000000000000, 0x000000ffffff0000, q16);
17239  ASSERT_EQUAL_128(0x0000ff0000ff0000, 0x000000ffffff0000, q17);
17240  ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffffff, q18);
17241  ASSERT_EQUAL_128(0x0000ffff00000000, 0x00000000ffffffff, q19);
17242  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q20);
17243  ASSERT_EQUAL_128(0xffffffffffffffff, 0x0000000000000000, q21);
17244  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q22);
17245  ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q23);
17246  ASSERT_EQUAL_128(0xffffffffffffffff, 0x0000000000000000, q24);
17247  TEARDOWN();
17248}
17249
17250
17251TEST(neon_2regmisc_neg) {
17252  SETUP();
17253
17254  START();
17255
17256  __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
17257  __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
17258  __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
17259  __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
17260  __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
17261
17262  __ Neg(v16.V8B(),  v0.V8B());
17263  __ Neg(v17.V16B(), v0.V16B());
17264  __ Neg(v18.V4H(),  v1.V4H());
17265  __ Neg(v19.V8H(),  v1.V8H());
17266  __ Neg(v20.V2S(),  v2.V2S());
17267  __ Neg(v21.V4S(),  v2.V4S());
17268  __ Neg(d22, d3);
17269  __ Neg(v23.V2D(),  v3.V2D());
17270  __ Neg(v24.V2D(),  v4.V2D());
17271
17272  END();
17273
17274  RUN();
17275  ASSERT_EQUAL_128(0x0000000000000000, 0x807f0100ff81807f, q16);
17276  ASSERT_EQUAL_128(0x81ff00017f8081ff, 0x807f0100ff81807f, q17);
17277  ASSERT_EQUAL_128(0x0000000000000000, 0x00010000ffff8001, q18);
17278  ASSERT_EQUAL_128(0x80007fff00010000, 0x00010000ffff8001, q19);
17279  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000001, q20);
17280  ASSERT_EQUAL_128(0x8000000000000001, 0x0000000080000001, q21);
17281  ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000000001, q22);
17282  ASSERT_EQUAL_128(0x7fffffffffffffff, 0x8000000000000001, q23);
17283  ASSERT_EQUAL_128(0x8000000000000000, 0x0000000000000000, q24);
17284
17285  TEARDOWN();
17286}
17287
17288
17289TEST(neon_2regmisc_sqneg) {
17290  SETUP();
17291
17292  START();
17293
17294  __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
17295  __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
17296  __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
17297  __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
17298  __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
17299
17300  __ Sqneg(v16.V8B(),  v0.V8B());
17301  __ Sqneg(v17.V16B(), v0.V16B());
17302  __ Sqneg(v18.V4H(),  v1.V4H());
17303  __ Sqneg(v19.V8H(),  v1.V8H());
17304  __ Sqneg(v20.V2S(),  v2.V2S());
17305  __ Sqneg(v21.V4S(),  v2.V4S());
17306  __ Sqneg(v22.V2D(),  v3.V2D());
17307  __ Sqneg(v23.V2D(),  v4.V2D());
17308
17309  __ Sqneg(b24, b0);
17310  __ Sqneg(h25, h1);
17311  __ Sqneg(s26, s2);
17312  __ Sqneg(d27, d3);
17313
17314  END();
17315
17316  RUN();
17317  ASSERT_EQUAL_128(0x0000000000000000, 0x7f7f0100ff817f7f, q16);
17318  ASSERT_EQUAL_128(0x81ff00017f7f81ff, 0x7f7f0100ff817f7f, q17);
17319  ASSERT_EQUAL_128(0x0000000000000000, 0x00010000ffff8001, q18);
17320  ASSERT_EQUAL_128(0x7fff7fff00010000, 0x00010000ffff8001, q19);
17321  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000001, q20);
17322  ASSERT_EQUAL_128(0x7fffffff00000001, 0x0000000080000001, q21);
17323  ASSERT_EQUAL_128(0x7fffffffffffffff, 0x8000000000000001, q22);
17324  ASSERT_EQUAL_128(0x7fffffffffffffff, 0x0000000000000000, q23);
17325
17326  ASSERT_EQUAL_128(0, 0x7f, q24);
17327  ASSERT_EQUAL_128(0, 0x8001, q25);
17328  ASSERT_EQUAL_128(0, 0x80000001, q26);
17329  ASSERT_EQUAL_128(0, 0x8000000000000001, q27);
17330
17331  TEARDOWN();
17332}
17333
17334
17335TEST(neon_2regmisc_abs) {
17336  SETUP();
17337
17338  START();
17339
17340  __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
17341  __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
17342  __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
17343  __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
17344  __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
17345
17346  __ Abs(v16.V8B(),  v0.V8B());
17347  __ Abs(v17.V16B(), v0.V16B());
17348  __ Abs(v18.V4H(),  v1.V4H());
17349  __ Abs(v19.V8H(),  v1.V8H());
17350  __ Abs(v20.V2S(),  v2.V2S());
17351  __ Abs(v21.V4S(),  v2.V4S());
17352  __ Abs(d22, d3);
17353  __ Abs(v23.V2D(),  v3.V2D());
17354  __ Abs(v24.V2D(),  v4.V2D());
17355
17356  END();
17357
17358  RUN();
17359  ASSERT_EQUAL_128(0x0000000000000000, 0x807f0100017f807f, q16);
17360  ASSERT_EQUAL_128(0x7f0100017f807f01, 0x807f0100017f807f, q17);
17361  ASSERT_EQUAL_128(0x0000000000000000, 0x0001000000017fff, q18);
17362  ASSERT_EQUAL_128(0x80007fff00010000, 0x0001000000017fff, q19);
17363  ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q20);
17364  ASSERT_EQUAL_128(0x8000000000000001, 0x000000007fffffff, q21);
17365  ASSERT_EQUAL_128(0x0000000000000000, 0x7fffffffffffffff, q22);
17366  ASSERT_EQUAL_128(0x7fffffffffffffff, 0x7fffffffffffffff, q23);
17367  ASSERT_EQUAL_128(0x8000000000000000, 0x0000000000000000, q24);
17368
17369  TEARDOWN();
17370}
17371
17372
17373TEST(neon_2regmisc_sqabs) {
17374  SETUP();
17375
17376  START();
17377
17378  __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
17379  __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
17380  __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
17381  __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
17382  __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
17383
17384  __ Sqabs(v16.V8B(),  v0.V8B());
17385  __ Sqabs(v17.V16B(), v0.V16B());
17386  __ Sqabs(v18.V4H(),  v1.V4H());
17387  __ Sqabs(v19.V8H(),  v1.V8H());
17388  __ Sqabs(v20.V2S(),  v2.V2S());
17389  __ Sqabs(v21.V4S(),  v2.V4S());
17390  __ Sqabs(v22.V2D(),  v3.V2D());
17391  __ Sqabs(v23.V2D(),  v4.V2D());
17392
17393  __ Sqabs(b24, b0);
17394  __ Sqabs(h25, h1);
17395  __ Sqabs(s26, s2);
17396  __ Sqabs(d27, d3);
17397
17398  END();
17399
17400  RUN();
17401  ASSERT_EQUAL_128(0x0000000000000000, 0x7f7f0100017f7f7f, q16);
17402  ASSERT_EQUAL_128(0x7f0100017f7f7f01, 0x7f7f0100017f7f7f, q17);
17403  ASSERT_EQUAL_128(0x0000000000000000, 0x0001000000017fff, q18);
17404  ASSERT_EQUAL_128(0x7fff7fff00010000, 0x0001000000017fff, q19);
17405  ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q20);
17406  ASSERT_EQUAL_128(0x7fffffff00000001, 0x000000007fffffff, q21);
17407  ASSERT_EQUAL_128(0x7fffffffffffffff, 0x7fffffffffffffff, q22);
17408  ASSERT_EQUAL_128(0x7fffffffffffffff, 0x0000000000000000, q23);
17409
17410  ASSERT_EQUAL_128(0, 0x7f, q24);
17411  ASSERT_EQUAL_128(0, 0x7fff, q25);
17412  ASSERT_EQUAL_128(0, 0x7fffffff, q26);
17413  ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q27);
17414
17415  TEARDOWN();
17416}
17417
17418TEST(neon_2regmisc_suqadd) {
17419  SETUP();
17420
17421  START();
17422
17423  __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
17424  __ Movi(v1.V2D(), 0x017f8081ff00017f, 0x010080ff7f0180ff);
17425
17426  __ Movi(v2.V2D(), 0x80008001ffff0000, 0xffff000000017ffd);
17427  __ Movi(v3.V2D(), 0xffff000080008001, 0x00017fffffff0001);
17428
17429  __ Movi(v4.V2D(), 0x80000000fffffffe, 0xfffffff17ffffffe);
17430  __ Movi(v5.V2D(), 0xffffffff80000000, 0x7fffffff00000002);
17431
17432  __ Movi(v6.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
17433  __ Movi(v7.V2D(), 0x8000000000000000, 0x8000000000000002);
17434
17435  __ Mov(v16.V2D(), v0.V2D());
17436  __ Mov(v17.V2D(), v0.V2D());
17437  __ Mov(v18.V2D(), v2.V2D());
17438  __ Mov(v19.V2D(), v2.V2D());
17439  __ Mov(v20.V2D(), v4.V2D());
17440  __ Mov(v21.V2D(), v4.V2D());
17441  __ Mov(v22.V2D(), v6.V2D());
17442
17443  __ Mov(v23.V2D(), v0.V2D());
17444  __ Mov(v24.V2D(), v2.V2D());
17445  __ Mov(v25.V2D(), v4.V2D());
17446  __ Mov(v26.V2D(), v6.V2D());
17447
17448  __ Suqadd(v16.V8B(), v1.V8B());
17449  __ Suqadd(v17.V16B(), v1.V16B());
17450  __ Suqadd(v18.V4H(), v3.V4H());
17451  __ Suqadd(v19.V8H(), v3.V8H());
17452  __ Suqadd(v20.V2S(), v5.V2S());
17453  __ Suqadd(v21.V4S(), v5.V4S());
17454  __ Suqadd(v22.V2D(), v7.V2D());
17455
17456  __ Suqadd(b23, b1);
17457  __ Suqadd(h24, h3);
17458  __ Suqadd(s25, s5);
17459  __ Suqadd(d26, d7);
17460
17461  END();
17462
17463  RUN();
17464  ASSERT_EQUAL_128(0x0000000000000000, 0x81817f7f7f7f007f, q16);
17465  ASSERT_EQUAL_128(0x7f7f7f7f7f807f7f, 0x81817f7f7f7f007f, q17);
17466  ASSERT_EQUAL_128(0x0000000000000000, 0x00007fff7fff7ffe, q18);
17467  ASSERT_EQUAL_128(0x7fff80017fff7fff, 0x00007fff7fff7ffe, q19);
17468  ASSERT_EQUAL_128(0x0000000000000000, 0x7ffffff07fffffff, q20);
17469  ASSERT_EQUAL_128(0x7fffffff7ffffffe, 0x7ffffff07fffffff, q21);
17470  ASSERT_EQUAL_128(0x0000000000000001, 0x7fffffffffffffff, q22);
17471
17472  ASSERT_EQUAL_128(0, 0x7f, q23);
17473  ASSERT_EQUAL_128(0, 0x7ffe, q24);
17474  ASSERT_EQUAL_128(0, 0x7fffffff, q25);
17475  ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q26);
17476  TEARDOWN();
17477}
17478
17479TEST(neon_2regmisc_usqadd) {
17480  SETUP();
17481
17482  START();
17483
17484  __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f7ffe);
17485  __ Movi(v1.V2D(), 0x017f8081ff00017f, 0x010080ff7f018002);
17486
17487  __ Movi(v2.V2D(), 0x80008001fffe0000, 0xffff000000017ffd);
17488  __ Movi(v3.V2D(), 0xffff000000028001, 0x00017fffffff0001);
17489
17490  __ Movi(v4.V2D(), 0x80000000fffffffe, 0x00000001fffffffe);
17491  __ Movi(v5.V2D(), 0xffffffff80000000, 0xfffffffe00000002);
17492
17493  __ Movi(v6.V2D(), 0x8000000000000002, 0x7fffffffffffffff);
17494  __ Movi(v7.V2D(), 0x7fffffffffffffff, 0x8000000000000000);
17495
17496  __ Mov(v16.V2D(), v0.V2D());
17497  __ Mov(v17.V2D(), v0.V2D());
17498  __ Mov(v18.V2D(), v2.V2D());
17499  __ Mov(v19.V2D(), v2.V2D());
17500  __ Mov(v20.V2D(), v4.V2D());
17501  __ Mov(v21.V2D(), v4.V2D());
17502  __ Mov(v22.V2D(), v6.V2D());
17503
17504  __ Mov(v23.V2D(), v0.V2D());
17505  __ Mov(v24.V2D(), v2.V2D());
17506  __ Mov(v25.V2D(), v4.V2D());
17507  __ Mov(v26.V2D(), v6.V2D());
17508
17509  __ Usqadd(v16.V8B(), v1.V8B());
17510  __ Usqadd(v17.V16B(), v1.V16B());
17511  __ Usqadd(v18.V4H(), v3.V4H());
17512  __ Usqadd(v19.V8H(), v3.V8H());
17513  __ Usqadd(v20.V2S(), v5.V2S());
17514  __ Usqadd(v21.V4S(), v5.V4S());
17515  __ Usqadd(v22.V2D(), v7.V2D());
17516
17517  __ Usqadd(b23, b1);
17518  __ Usqadd(h24, h3);
17519  __ Usqadd(s25, s5);
17520  __ Usqadd(d26, d7);
17521
17522  END();
17523
17524  RUN();
17525  ASSERT_EQUAL_128(0x0000000000000000, 0x81817f00808000ff, q16);
17526  ASSERT_EQUAL_128(0x8080008080808080, 0x81817f00808000ff, q17);
17527  ASSERT_EQUAL_128(0x0000000000000000, 0xffff7fff00007ffe, q18);
17528  ASSERT_EQUAL_128(0x7fff8001ffff0000, 0xffff7fff00007ffe, q19);
17529  ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffffff, q20);
17530  ASSERT_EQUAL_128(0x7fffffff7ffffffe, 0x00000000ffffffff, q21);
17531  ASSERT_EQUAL_128(0xffffffffffffffff, 0x0000000000000000, q22);
17532
17533  ASSERT_EQUAL_128(0, 0xff, q23);
17534  ASSERT_EQUAL_128(0, 0x7ffe, q24);
17535  ASSERT_EQUAL_128(0, 0xffffffff, q25);
17536  ASSERT_EQUAL_128(0, 0x0000000000000000, q26);
17537  TEARDOWN();
17538}
17539
17540
17541TEST(system_sys) {
17542  SETUP();
17543  const char* msg = "SYS test!";
17544  uintptr_t msg_addr = reinterpret_cast<uintptr_t>(msg);
17545
17546  START();
17547  __ Mov(x4, msg_addr);
17548  __ Sys(3, 0x7, 0x5, 1, x4);
17549  __ Mov(x3, x4);
17550  __ Sys(3, 0x7, 0xa, 1, x3);
17551  __ Mov(x2, x3);
17552  __ Sys(3, 0x7, 0xb, 1, x2);
17553  __ Mov(x1, x2);
17554  __ Sys(3, 0x7, 0xe, 1, x1);
17555  // TODO: Add tests to check ZVA equivalent.
17556  END();
17557
17558  RUN();
17559
17560  TEARDOWN();
17561}
17562
17563
17564TEST(system_ic) {
17565  SETUP();
17566  const char* msg = "IC test!";
17567  uintptr_t msg_addr = reinterpret_cast<uintptr_t>(msg);
17568
17569  START();
17570  __ Mov(x11, msg_addr);
17571  __ Ic(IVAU, x11);
17572  END();
17573
17574  RUN();
17575
17576  TEARDOWN();
17577}
17578
17579
17580TEST(system_dc) {
17581  SETUP();
17582  const char* msg = "DC test!";
17583  uintptr_t msg_addr = reinterpret_cast<uintptr_t>(msg);
17584
17585  START();
17586  __ Mov(x20, msg_addr);
17587  __ Dc(CVAC, x20);
17588  __ Mov(x21, x20);
17589  __ Dc(CVAU, x21);
17590  __ Mov(x22, x21);
17591  __ Dc(CIVAC, x22);
17592  // TODO: Add tests to check ZVA.
17593  END();
17594
17595  RUN();
17596
17597  TEARDOWN();
17598}
17599
17600
17601TEST(neon_2regmisc_xtn) {
17602  SETUP();
17603
17604  START();
17605
17606  __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
17607  __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
17608  __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
17609  __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
17610  __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
17611
17612  __ Xtn(v16.V8B(),   v0.V8H());
17613  __ Xtn2(v16.V16B(), v1.V8H());
17614  __ Xtn(v17.V4H(),   v1.V4S());
17615  __ Xtn2(v17.V8H(),  v2.V4S());
17616  __ Xtn(v18.V2S(),   v3.V2D());
17617  __ Xtn2(v18.V4S(),  v4.V2D());
17618
17619  END();
17620
17621  RUN();
17622  ASSERT_EQUAL_128(0x0001ff00ff0001ff, 0x01ff800181007f81, q16);
17623  ASSERT_EQUAL_128(0x0000ffff0000ffff, 0x8001000000007fff, q17);
17624  ASSERT_EQUAL_128(0x0000000000000000, 0x00000001ffffffff, q18);
17625  TEARDOWN();
17626}
17627
17628
17629TEST(neon_2regmisc_sqxtn) {
17630  SETUP();
17631
17632  START();
17633
17634  __ Movi(v0.V2D(), 0x7f01007a81807f01, 0x8081ff00017f8081);
17635  __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
17636  __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
17637  __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
17638  __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
17639
17640  __ Sqxtn(v16.V8B(),   v0.V8H());
17641  __ Sqxtn2(v16.V16B(), v1.V8H());
17642  __ Sqxtn(v17.V4H(),   v1.V4S());
17643  __ Sqxtn2(v17.V8H(),  v2.V4S());
17644  __ Sqxtn(v18.V2S(),   v3.V2D());
17645  __ Sqxtn2(v18.V4S(),  v4.V2D());
17646  __ Sqxtn(b19,  h0);
17647  __ Sqxtn(h20,  s0);
17648  __ Sqxtn(s21,  d0);
17649
17650  END();
17651
17652  RUN();
17653  ASSERT_EQUAL_128(0x8080ff00ff00017f, 0x7f7a807f80807f80, q16);
17654  ASSERT_EQUAL_128(0x8000ffff00007fff, 0x8000800080007fff, q17);
17655  ASSERT_EQUAL_128(0x8000000000000000, 0x800000007fffffff, q18);
17656  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000080, q19);
17657  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000007fff, q20);
17658  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000000, q21);
17659  TEARDOWN();
17660}
17661
17662
17663TEST(neon_2regmisc_uqxtn) {
17664  SETUP();
17665
17666  START();
17667
17668  __ Movi(v0.V2D(), 0x7f01007a81807f01, 0x8081ff00017f8081);
17669  __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
17670  __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
17671  __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
17672  __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
17673
17674  __ Uqxtn(v16.V8B(),   v0.V8H());
17675  __ Uqxtn2(v16.V16B(), v1.V8H());
17676  __ Uqxtn(v17.V4H(),   v1.V4S());
17677  __ Uqxtn2(v17.V8H(),  v2.V4S());
17678  __ Uqxtn(v18.V2S(),   v3.V2D());
17679  __ Uqxtn2(v18.V4S(),  v4.V2D());
17680  __ Uqxtn(b19,  h0);
17681  __ Uqxtn(h20,  s0);
17682  __ Uqxtn(s21,  d0);
17683
17684  END();
17685
17686  RUN();
17687  ASSERT_EQUAL_128(0xffffff00ff0001ff, 0xff7affffffffffff, q16);
17688  ASSERT_EQUAL_128(0xffffffff0000ffff, 0xffffffffffffffff, q17);
17689  ASSERT_EQUAL_128(0xffffffff00000000, 0xffffffffffffffff, q18);
17690  ASSERT_EQUAL_128(0x0000000000000000, 0x00000000000000ff, q19);
17691  ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000ffff, q20);
17692  ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffffff, q21);
17693  TEARDOWN();
17694}
17695
17696
17697TEST(neon_2regmisc_sqxtun) {
17698  SETUP();
17699
17700  START();
17701
17702  __ Movi(v0.V2D(), 0x7f01007a81807f01, 0x8081ff00017f8081);
17703  __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
17704  __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
17705  __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
17706  __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
17707
17708  __ Sqxtun(v16.V8B(),   v0.V8H());
17709  __ Sqxtun2(v16.V16B(), v1.V8H());
17710  __ Sqxtun(v17.V4H(),   v1.V4S());
17711  __ Sqxtun2(v17.V8H(),  v2.V4S());
17712  __ Sqxtun(v18.V2S(),   v3.V2D());
17713  __ Sqxtun2(v18.V4S(),  v4.V2D());
17714  __ Sqxtun(b19,  h0);
17715  __ Sqxtun(h20,  s0);
17716  __ Sqxtun(s21,  d0);
17717
17718  END();
17719
17720  RUN();
17721  ASSERT_EQUAL_128(0x00000000000001ff, 0xff7a00ff0000ff00, q16);
17722  ASSERT_EQUAL_128(0x000000000000ffff, 0x000000000000ffff, q17);
17723  ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffffff, q18);
17724  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q19);
17725  ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000ffff, q20);
17726  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q21);
17727  TEARDOWN();
17728}
17729
17730TEST(neon_3same_and) {
17731  SETUP();
17732
17733  START();
17734
17735  __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
17736  __ Movi(v1.V2D(), 0x00aa55aaff55ff00, 0xaa55ff00555500ff);
17737
17738  __ And(v16.V16B(), v0.V16B(), v0.V16B());  // self test
17739  __ And(v17.V16B(), v0.V16B(), v1.V16B());  // all combinations
17740  __ And(v24.V8B(), v0.V8B(), v0.V8B());  // self test
17741  __ And(v25.V8B(), v0.V8B(), v1.V8B());  // all combinations
17742  END();
17743
17744  RUN();
17745  ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q16);
17746  ASSERT_EQUAL_128(0x0000000000555500, 0xaa00aa00005500aa, q17);
17747  ASSERT_EQUAL_128(0, 0xff00aa5500ff55aa, q24);
17748  ASSERT_EQUAL_128(0, 0xaa00aa00005500aa, q25);
17749  TEARDOWN();
17750}
17751
17752TEST(neon_3same_bic) {
17753  SETUP();
17754
17755  START();
17756
17757  __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
17758  __ Movi(v1.V2D(), 0x00ffaa00aa55aaff, 0xffff005500ff00ff);
17759
17760  __ Bic(v16.V16B(), v0.V16B(), v0.V16B());  // self test
17761  __ Bic(v17.V16B(), v0.V16B(), v1.V16B());  // all combinations
17762  __ Bic(v24.V8B(), v0.V8B(), v0.V8B());  // self test
17763  __ Bic(v25.V8B(), v0.V8B(), v1.V8B());  // all combinations
17764  END();
17765
17766  RUN();
17767  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q16);
17768  ASSERT_EQUAL_128(0xff00005500aa5500, 0x0000aa0000005500, q17);
17769  ASSERT_EQUAL_128(0, 0x0000000000000000, q24);
17770  ASSERT_EQUAL_128(0, 0x0000aa0000005500, q25);
17771  TEARDOWN();
17772}
17773
17774TEST(neon_3same_orr) {
17775  SETUP();
17776
17777  START();
17778
17779  __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
17780  __ Movi(v1.V2D(), 0x00aa55aaff55ff00, 0xaa55ff00555500ff);
17781
17782  __ Orr(v16.V16B(), v0.V16B(), v0.V16B());  // self test
17783  __ Orr(v17.V16B(), v0.V16B(), v1.V16B());  // all combinations
17784  __ Orr(v24.V8B(), v0.V8B(), v0.V8B());  // self test
17785  __ Orr(v25.V8B(), v0.V8B(), v1.V8B());  // all combinations
17786  END();
17787
17788  RUN();
17789  ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q16);
17790  ASSERT_EQUAL_128(0xffaaffffffffffaa, 0xff55ff5555ff55ff, q17);
17791  ASSERT_EQUAL_128(0, 0xff00aa5500ff55aa, q24);
17792  ASSERT_EQUAL_128(0, 0xff55ff5555ff55ff, q25);
17793  TEARDOWN();
17794}
17795
17796TEST(neon_3same_mov) {
17797  SETUP();
17798
17799  START();
17800
17801  __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
17802
17803  __ Mov(v16.V16B(), v0.V16B());
17804  __ Mov(v17.V8H(), v0.V8H());
17805  __ Mov(v18.V4S(), v0.V4S());
17806  __ Mov(v19.V2D(), v0.V2D());
17807
17808  __ Mov(v24.V8B(), v0.V8B());
17809  __ Mov(v25.V4H(), v0.V4H());
17810  __ Mov(v26.V2S(), v0.V2S());
17811  END();
17812
17813  RUN();
17814
17815  ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q16);
17816  ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q17);
17817  ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q18);
17818  ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q19);
17819
17820  ASSERT_EQUAL_128(0x0, 0xff00aa5500ff55aa, q24);
17821  ASSERT_EQUAL_128(0x0, 0xff00aa5500ff55aa, q25);
17822  ASSERT_EQUAL_128(0x0, 0xff00aa5500ff55aa, q26);
17823
17824  TEARDOWN();
17825}
17826
17827TEST(neon_3same_orn) {
17828  SETUP();
17829
17830  START();
17831
17832  __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
17833  __ Movi(v1.V2D(), 0x00aa55aaff55ff00, 0xaa55ff00555500ff);
17834
17835  __ Orn(v16.V16B(), v0.V16B(), v0.V16B());  // self test
17836  __ Orn(v17.V16B(), v0.V16B(), v1.V16B());  // all combinations
17837  __ Orn(v24.V8B(), v0.V8B(), v0.V8B());  // self test
17838  __ Orn(v25.V8B(), v0.V8B(), v1.V8B());  // all combinations
17839  END();
17840
17841  RUN();
17842  ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q16);
17843  ASSERT_EQUAL_128(0xff55aa5500ff55ff, 0xffaaaaffaaffffaa, q17);
17844  ASSERT_EQUAL_128(0, 0xffffffffffffffff, q24);
17845  ASSERT_EQUAL_128(0, 0xffaaaaffaaffffaa, q25);
17846  TEARDOWN();
17847}
17848
17849TEST(neon_3same_eor) {
17850  SETUP();
17851
17852  START();
17853
17854  __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
17855  __ Movi(v1.V2D(), 0x00ffaa00aa55aaff, 0xffff005500ff00ff);
17856
17857  __ Eor(v16.V16B(), v0.V16B(), v0.V16B());  // self test
17858  __ Eor(v17.V16B(), v0.V16B(), v1.V16B());  // all combinations
17859  __ Eor(v24.V8B(), v0.V8B(), v0.V8B());  // self test
17860  __ Eor(v25.V8B(), v0.V8B(), v1.V8B());  // all combinations
17861  END();
17862
17863  RUN();
17864  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q16);
17865  ASSERT_EQUAL_128(0xffff0055aaaaff55, 0x00ffaa0000005555, q17);
17866  ASSERT_EQUAL_128(0, 0x0000000000000000, q24);
17867  ASSERT_EQUAL_128(0, 0x00ffaa0000005555, q25);
17868  TEARDOWN();
17869}
17870
17871TEST(neon_3same_bif) {
17872  SETUP();
17873
17874  START();
17875
17876  __ Movi(v16.V2D(), 0xffff0000ff00ffff, 0xffff00000000aaaa);
17877  __ Movi(v0.V2D(), 0xff00ff00ff005555, 0xaaaa5555aaaaaaaa);
17878  __ Movi(v1.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
17879
17880  __ Movi(v17.V2D(), 0x5555aa55cccccccc, 0x33333333f0f0f0f0);
17881  __ Movi(v2.V2D(), 0x555555aaff00ff00, 0xff00ff00ff00ff00);
17882  __ Movi(v3.V2D(), 0xaa55aa5500ffff00, 0x00ffff0000ffff00);
17883
17884  __ Movi(v18.V2D(), 0, 0xf0f0f0f00f0f0f0f);
17885  __ Movi(v4.V2D(), 0, 0xf0f0f0f0f0f0f0f0);
17886  __ Movi(v5.V2D(), 0, 0x00ffff0000ffff00);
17887
17888  __ Bif(v16.V16B(), v0.V16B(), v1.V16B());
17889  __ Bif(v17.V16B(), v2.V16B(), v3.V16B());
17890  __ Bif(v18.V8B(), v4.V8B(), v5.V8B());
17891  END();
17892
17893  RUN();
17894
17895  ASSERT_EQUAL_128(0xffffff00ff0055ff, 0xffaa0055aa00aaaa, q16);
17896  ASSERT_EQUAL_128(0x5555ffffffcccc00, 0xff333300fff0f000, q17);
17897  ASSERT_EQUAL_128(0, 0xf0f0f0f0f00f0ff0, q18);
17898  TEARDOWN();
17899}
17900
17901TEST(neon_3same_bit) {
17902  SETUP();
17903
17904  START();
17905
17906  __ Movi(v16.V2D(), 0xffff0000ff00ffff, 0xffff00000000aaaa);
17907  __ Movi(v0.V2D(), 0xff00ff00ff005555, 0xaaaa5555aaaaaaaa);
17908  __ Movi(v1.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
17909
17910  __ Movi(v17.V2D(), 0x5555aa55cccccccc, 0x33333333f0f0f0f0);
17911  __ Movi(v2.V2D(), 0x555555aaff00ff00, 0xff00ff00ff00ff00);
17912  __ Movi(v3.V2D(), 0xaa55aa5500ffff00, 0x00ffff0000ffff00);
17913
17914  __ Movi(v18.V2D(), 0, 0xf0f0f0f00f0f0f0f);
17915  __ Movi(v4.V2D(), 0, 0xf0f0f0f0f0f0f0f0);
17916  __ Movi(v5.V2D(), 0, 0x00ffff0000ffff00);
17917
17918  __ Bit(v16.V16B(), v0.V16B(), v1.V16B());
17919  __ Bit(v17.V16B(), v2.V16B(), v3.V16B());
17920  __ Bit(v18.V8B(), v4.V8B(), v5.V8B());
17921  END();
17922
17923  RUN();
17924
17925  ASSERT_EQUAL_128(0xff000000ff00ff55, 0xaaff550000aaaaaa, q16);
17926  ASSERT_EQUAL_128(0x55550000cc00ffcc, 0x3300ff33f000fff0, q17);
17927  ASSERT_EQUAL_128(0, 0xf0f0f0f00ff0f00f, q18);
17928  TEARDOWN();
17929}
17930
17931TEST(neon_3same_bsl) {
17932  SETUP();
17933
17934  START();
17935
17936  __ Movi(v16.V2D(), 0xffff0000ff00ffff, 0xffff00000000aaaa);
17937  __ Movi(v0.V2D(), 0xff00ff00ff005555, 0xaaaa5555aaaaaaaa);
17938  __ Movi(v1.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
17939
17940  __ Movi(v17.V2D(), 0x5555aa55cccccccc, 0x33333333f0f0f0f0);
17941  __ Movi(v2.V2D(), 0x555555aaff00ff00, 0xff00ff00ff00ff00);
17942  __ Movi(v3.V2D(), 0xaa55aa5500ffff00, 0x00ffff0000ffff00);
17943
17944  __ Movi(v18.V2D(), 0, 0xf0f0f0f00f0f0f0f);
17945  __ Movi(v4.V2D(), 0, 0xf0f0f0f0f0f0f0f0);
17946  __ Movi(v5.V2D(), 0, 0x00ffff0000ffff00);
17947
17948  __ Bsl(v16.V16B(), v0.V16B(), v1.V16B());
17949  __ Bsl(v17.V16B(), v2.V16B(), v3.V16B());
17950  __ Bsl(v18.V8B(), v4.V8B(), v5.V8B());
17951  END();
17952
17953  RUN();
17954
17955  ASSERT_EQUAL_128(0xff0000ffff005555, 0xaaaa55aa55aaffaa, q16);
17956  ASSERT_EQUAL_128(0xff550000cc33ff00, 0x33ccff00f00fff00, q17);
17957  ASSERT_EQUAL_128(0, 0xf0fffff000f0f000, q18);
17958  TEARDOWN();
17959}
17960
17961
17962TEST(neon_3same_smax) {
17963  SETUP();
17964
17965  START();
17966
17967  __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
17968  __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
17969
17970  __ Smax(v16.V8B(), v0.V8B(), v1.V8B());
17971  __ Smax(v18.V4H(), v0.V4H(), v1.V4H());
17972  __ Smax(v20.V2S(), v0.V2S(), v1.V2S());
17973
17974  __ Smax(v17.V16B(), v0.V16B(), v1.V16B());
17975  __ Smax(v19.V8H(), v0.V8H(), v1.V8H());
17976  __ Smax(v21.V4S(), v0.V4S(), v1.V4S());
17977  END();
17978
17979  RUN();
17980
17981  ASSERT_EQUAL_128(0x0, 0x0000000000005555, q16);
17982  ASSERT_EQUAL_128(0x0, 0x00000000000055ff, q18);
17983  ASSERT_EQUAL_128(0x0, 0x000000000000aa55, q20);
17984  ASSERT_EQUAL_128(0x55aa555555555555, 0x0000000000005555, q17);
17985  ASSERT_EQUAL_128(0x55aa555555555555, 0x00000000000055ff, q19);
17986  ASSERT_EQUAL_128(0x55aa555555555555, 0x000000000000aa55, q21);
17987  TEARDOWN();
17988}
17989
17990
17991TEST(neon_3same_smaxp) {
17992  SETUP();
17993
17994  START();
17995
17996  __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
17997  __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
17998
17999  __ Smaxp(v16.V8B(), v0.V8B(), v1.V8B());
18000  __ Smaxp(v18.V4H(), v0.V4H(), v1.V4H());
18001  __ Smaxp(v20.V2S(), v0.V2S(), v1.V2S());
18002
18003  __ Smaxp(v17.V16B(), v0.V16B(), v1.V16B());
18004  __ Smaxp(v19.V8H(), v0.V8H(), v1.V8H());
18005  __ Smaxp(v21.V4S(), v0.V4S(), v1.V4S());
18006  END();
18007
18008  RUN();
18009
18010  ASSERT_EQUAL_128(0x0, 0x0000ff55ffff0055, q16);
18011  ASSERT_EQUAL_128(0x0, 0x000055ffffff0000, q18);
18012  ASSERT_EQUAL_128(0x0, 0x000000000000aa55, q20);
18013  ASSERT_EQUAL_128(0x5555aaaa0000ff55, 0xaaaa5555ffff0055, q17);
18014  ASSERT_EQUAL_128(0x55aaaaaa000055ff, 0xaaaa5555ffff0000, q19);
18015  ASSERT_EQUAL_128(0x55aa555500000000, 0x555555550000aa55, q21);
18016  TEARDOWN();
18017}
18018
18019
18020TEST(neon_addp_scalar) {
18021  SETUP();
18022
18023  START();
18024
18025  __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
18026  __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
18027  __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
18028
18029  __ Addp(d16, v0.V2D());
18030  __ Addp(d17, v1.V2D());
18031  __ Addp(d18, v2.V2D());
18032
18033  END();
18034
18035  RUN();
18036
18037  ASSERT_EQUAL_128(0x0, 0x00224466ef66fa80, q16);
18038  ASSERT_EQUAL_128(0x0, 0x55aa5556aa5500a9, q17);
18039  ASSERT_EQUAL_128(0x0, 0xaaaaaaa96655ff55, q18);
18040  TEARDOWN();
18041}
18042
18043TEST(neon_acrosslanes_addv) {
18044  SETUP();
18045
18046  START();
18047
18048  __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
18049  __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
18050  __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
18051
18052  __ Addv(b16, v0.V8B());
18053  __ Addv(b17, v0.V16B());
18054  __ Addv(h18, v1.V4H());
18055  __ Addv(h19, v1.V8H());
18056  __ Addv(s20, v2.V4S());
18057
18058  END();
18059
18060  RUN();
18061
18062  ASSERT_EQUAL_128(0x0, 0xc7, q16);
18063  ASSERT_EQUAL_128(0x0, 0x99, q17);
18064  ASSERT_EQUAL_128(0x0, 0x55a9, q18);
18065  ASSERT_EQUAL_128(0x0, 0x55fc, q19);
18066  ASSERT_EQUAL_128(0x0, 0x1100a9fe, q20);
18067  TEARDOWN();
18068}
18069
18070
18071TEST(neon_acrosslanes_saddlv) {
18072  SETUP();
18073
18074  START();
18075
18076  __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
18077  __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
18078  __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
18079
18080  __ Saddlv(h16, v0.V8B());
18081  __ Saddlv(h17, v0.V16B());
18082  __ Saddlv(s18, v1.V4H());
18083  __ Saddlv(s19, v1.V8H());
18084  __ Saddlv(d20, v2.V4S());
18085
18086  END();
18087
18088  RUN();
18089
18090  ASSERT_EQUAL_128(0x0, 0xffc7, q16);
18091  ASSERT_EQUAL_128(0x0, 0xff99, q17);
18092  ASSERT_EQUAL_128(0x0, 0x000055a9, q18);
18093  ASSERT_EQUAL_128(0x0, 0x000055fc, q19);
18094  ASSERT_EQUAL_128(0x0, 0x0000001100a9fe, q20);
18095  TEARDOWN();
18096}
18097
18098
18099TEST(neon_acrosslanes_uaddlv) {
18100  SETUP();
18101
18102  START();
18103
18104  __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
18105  __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
18106  __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
18107
18108  __ Uaddlv(h16, v0.V8B());
18109  __ Uaddlv(h17, v0.V16B());
18110  __ Uaddlv(s18, v1.V4H());
18111  __ Uaddlv(s19, v1.V8H());
18112  __ Uaddlv(d20, v2.V4S());
18113
18114  END();
18115
18116  RUN();
18117
18118  ASSERT_EQUAL_128(0x0, 0x02c7, q16);
18119  ASSERT_EQUAL_128(0x0, 0x0599, q17);
18120  ASSERT_EQUAL_128(0x0, 0x000155a9, q18);
18121  ASSERT_EQUAL_128(0x0, 0x000355fc, q19);
18122  ASSERT_EQUAL_128(0x0, 0x000000021100a9fe, q20);
18123  TEARDOWN();
18124}
18125
18126
18127TEST(neon_acrosslanes_smaxv) {
18128  SETUP();
18129
18130  START();
18131
18132  __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
18133  __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
18134  __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
18135
18136  __ Smaxv(b16, v0.V8B());
18137  __ Smaxv(b17, v0.V16B());
18138  __ Smaxv(h18, v1.V4H());
18139  __ Smaxv(h19, v1.V8H());
18140  __ Smaxv(s20, v2.V4S());
18141
18142  END();
18143
18144  RUN();
18145
18146  ASSERT_EQUAL_128(0x0, 0x33, q16);
18147  ASSERT_EQUAL_128(0x0, 0x44, q17);
18148  ASSERT_EQUAL_128(0x0, 0x55ff, q18);
18149  ASSERT_EQUAL_128(0x0, 0x55ff, q19);
18150  ASSERT_EQUAL_128(0x0, 0x66555555, q20);
18151  TEARDOWN();
18152}
18153
18154
18155TEST(neon_acrosslanes_sminv) {
18156  SETUP();
18157
18158  START();
18159
18160  __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
18161  __ Movi(v1.V2D(), 0xfffa5555aaaaaaaa, 0x00000000ffaa55ff);
18162  __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
18163
18164  __ Sminv(b16, v0.V8B());
18165  __ Sminv(b17, v0.V16B());
18166  __ Sminv(h18, v1.V4H());
18167  __ Sminv(h19, v1.V8H());
18168  __ Sminv(s20, v2.V4S());
18169
18170  END();
18171
18172  RUN();
18173
18174  ASSERT_EQUAL_128(0x0, 0xaa, q16);
18175  ASSERT_EQUAL_128(0x0, 0x80, q17);
18176  ASSERT_EQUAL_128(0x0, 0xffaa, q18);
18177  ASSERT_EQUAL_128(0x0, 0xaaaa, q19);
18178  ASSERT_EQUAL_128(0x0, 0xaaaaaaaa, q20);
18179  TEARDOWN();
18180}
18181
18182TEST(neon_acrosslanes_umaxv) {
18183  SETUP();
18184
18185  START();
18186
18187  __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
18188  __ Movi(v1.V2D(), 0x55aa5555aaaaffab, 0x00000000ffaa55ff);
18189  __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
18190
18191  __ Umaxv(b16, v0.V8B());
18192  __ Umaxv(b17, v0.V16B());
18193  __ Umaxv(h18, v1.V4H());
18194  __ Umaxv(h19, v1.V8H());
18195  __ Umaxv(s20, v2.V4S());
18196
18197  END();
18198
18199  RUN();
18200
18201  ASSERT_EQUAL_128(0x0, 0xfc, q16);
18202  ASSERT_EQUAL_128(0x0, 0xfe, q17);
18203  ASSERT_EQUAL_128(0x0, 0xffaa, q18);
18204  ASSERT_EQUAL_128(0x0, 0xffab, q19);
18205  ASSERT_EQUAL_128(0x0, 0xffffffff, q20);
18206  TEARDOWN();
18207}
18208
18209
18210TEST(neon_acrosslanes_uminv) {
18211  SETUP();
18212
18213  START();
18214
18215  __ Movi(v0.V2D(), 0x0011223344aafe80, 0x02112233aabbfc01);
18216  __ Movi(v1.V2D(), 0xfffa5555aaaa0000, 0x00010003ffaa55ff);
18217  __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
18218
18219  __ Uminv(b16, v0.V8B());
18220  __ Uminv(b17, v0.V16B());
18221  __ Uminv(h18, v1.V4H());
18222  __ Uminv(h19, v1.V8H());
18223  __ Uminv(s20, v2.V4S());
18224
18225  END();
18226
18227  RUN();
18228
18229  ASSERT_EQUAL_128(0x0, 0x01, q16);
18230  ASSERT_EQUAL_128(0x0, 0x00, q17);
18231  ASSERT_EQUAL_128(0x0, 0x0001, q18);
18232  ASSERT_EQUAL_128(0x0, 0x0000, q19);
18233  ASSERT_EQUAL_128(0x0, 0x0000aa00, q20);
18234  TEARDOWN();
18235}
18236
18237
18238TEST(neon_3same_smin) {
18239  SETUP();
18240
18241  START();
18242
18243  __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
18244  __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
18245
18246  __ Smin(v16.V8B(), v0.V8B(), v1.V8B());
18247  __ Smin(v18.V4H(), v0.V4H(), v1.V4H());
18248  __ Smin(v20.V2S(), v0.V2S(), v1.V2S());
18249
18250  __ Smin(v17.V16B(), v0.V16B(), v1.V16B());
18251  __ Smin(v19.V8H(), v0.V8H(), v1.V8H());
18252  __ Smin(v21.V4S(), v0.V4S(), v1.V4S());
18253  END();
18254
18255  RUN();
18256
18257  ASSERT_EQUAL_128(0x0, 0xffffffffffaaaaff, q16);
18258  ASSERT_EQUAL_128(0x0, 0xffffffffffaaaa55, q18);
18259  ASSERT_EQUAL_128(0x0, 0xffffffffffaa55ff, q20);
18260  ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaaaaff, q17);
18261  ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaaaa55, q19);
18262  ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaa55ff, q21);
18263  TEARDOWN();
18264}
18265
18266
18267TEST(neon_3same_umax) {
18268  SETUP();
18269
18270  START();
18271
18272  __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
18273  __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
18274
18275  __ Umax(v16.V8B(), v0.V8B(), v1.V8B());
18276  __ Umax(v18.V4H(), v0.V4H(), v1.V4H());
18277  __ Umax(v20.V2S(), v0.V2S(), v1.V2S());
18278
18279  __ Umax(v17.V16B(), v0.V16B(), v1.V16B());
18280  __ Umax(v19.V8H(), v0.V8H(), v1.V8H());
18281  __ Umax(v21.V4S(), v0.V4S(), v1.V4S());
18282  END();
18283
18284  RUN();
18285
18286  ASSERT_EQUAL_128(0x0, 0xffffffffffaaaaff, q16);
18287  ASSERT_EQUAL_128(0x0, 0xffffffffffaaaa55, q18);
18288  ASSERT_EQUAL_128(0x0, 0xffffffffffaa55ff, q20);
18289  ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaaaaff, q17);
18290  ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaaaa55, q19);
18291  ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaa55ff, q21);
18292  TEARDOWN();
18293}
18294
18295
18296TEST(neon_3same_umin) {
18297  SETUP();
18298
18299  START();
18300
18301  __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
18302  __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
18303
18304  __ Umin(v16.V8B(), v0.V8B(), v1.V8B());
18305  __ Umin(v18.V4H(), v0.V4H(), v1.V4H());
18306  __ Umin(v20.V2S(), v0.V2S(), v1.V2S());
18307
18308  __ Umin(v17.V16B(), v0.V16B(), v1.V16B());
18309  __ Umin(v19.V8H(), v0.V8H(), v1.V8H());
18310  __ Umin(v21.V4S(), v0.V4S(), v1.V4S());
18311  END();
18312
18313  RUN();
18314
18315  ASSERT_EQUAL_128(0x0, 0x0000000000005555, q16);
18316  ASSERT_EQUAL_128(0x0, 0x00000000000055ff, q18);
18317  ASSERT_EQUAL_128(0x0, 0x000000000000aa55, q20);
18318  ASSERT_EQUAL_128(0x55aa555555555555, 0x0000000000005555, q17);
18319  ASSERT_EQUAL_128(0x55aa555555555555, 0x00000000000055ff, q19);
18320  ASSERT_EQUAL_128(0x55aa555555555555, 0x000000000000aa55, q21);
18321  TEARDOWN();
18322}
18323
18324
18325TEST(neon_2regmisc_mvn) {
18326  SETUP();
18327
18328  START();
18329
18330  __ Movi(v0.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
18331
18332  __ Mvn(v16.V16B(), v0.V16B());
18333  __ Mvn(v17.V8H(), v0.V8H());
18334  __ Mvn(v18.V4S(), v0.V4S());
18335  __ Mvn(v19.V2D(), v0.V2D());
18336
18337  __ Mvn(v24.V8B(), v0.V8B());
18338  __ Mvn(v25.V4H(), v0.V4H());
18339  __ Mvn(v26.V2S(), v0.V2S());
18340
18341  END();
18342
18343  RUN();
18344
18345  ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q16);
18346  ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q17);
18347  ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q18);
18348  ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q19);
18349
18350  ASSERT_EQUAL_128(0x0, 0xaa55aa55aa55aa55, q24);
18351  ASSERT_EQUAL_128(0x0, 0xaa55aa55aa55aa55, q25);
18352  ASSERT_EQUAL_128(0x0, 0xaa55aa55aa55aa55, q26);
18353  TEARDOWN();
18354}
18355
18356
18357TEST(neon_2regmisc_not) {
18358  SETUP();
18359
18360  START();
18361
18362  __ Movi(v0.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
18363  __ Movi(v1.V2D(), 0, 0x00ffff0000ffff00);
18364
18365  __ Not(v16.V16B(), v0.V16B());
18366  __ Not(v17.V8B(), v1.V8B());
18367  END();
18368
18369  RUN();
18370
18371  ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q16);
18372  ASSERT_EQUAL_128(0x0, 0xff0000ffff0000ff, q17);
18373  TEARDOWN();
18374}
18375
18376TEST(neon_2regmisc_cls_clz_cnt) {
18377  SETUP();
18378
18379  START();
18380
18381  __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
18382  __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
18383
18384  __ Cls(v16.V8B() , v1.V8B());
18385  __ Cls(v17.V16B(), v1.V16B());
18386  __ Cls(v18.V4H() , v1.V4H());
18387  __ Cls(v19.V8H() , v1.V8H());
18388  __ Cls(v20.V2S() , v1.V2S());
18389  __ Cls(v21.V4S() , v1.V4S());
18390
18391  __ Clz(v22.V8B() , v0.V8B());
18392  __ Clz(v23.V16B(), v0.V16B());
18393  __ Clz(v24.V4H() , v0.V4H());
18394  __ Clz(v25.V8H() , v0.V8H());
18395  __ Clz(v26.V2S() , v0.V2S());
18396  __ Clz(v27.V4S() , v0.V4S());
18397
18398  __ Cnt(v28.V8B() , v0.V8B());
18399  __ Cnt(v29.V16B(), v1.V16B());
18400
18401  END();
18402
18403  RUN();
18404
18405  ASSERT_EQUAL_128(0x0000000000000000, 0x0601000000000102, q16);
18406  ASSERT_EQUAL_128(0x0601000000000102, 0x0601000000000102, q17);
18407  ASSERT_EQUAL_128(0x0000000000000000, 0x0006000000000001, q18);
18408  ASSERT_EQUAL_128(0x0006000000000001, 0x0006000000000001, q19);
18409  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000600000000, q20);
18410  ASSERT_EQUAL_128(0x0000000600000000, 0x0000000600000000, q21);
18411
18412  ASSERT_EQUAL_128(0x0000000000000000, 0x0404040404040404, q22);
18413  ASSERT_EQUAL_128(0x0807060605050505, 0x0404040404040404, q23);
18414  ASSERT_EQUAL_128(0x0000000000000000, 0x0004000400040004, q24);
18415  ASSERT_EQUAL_128(0x000f000600050005, 0x0004000400040004, q25);
18416  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000400000004, q26);
18417  ASSERT_EQUAL_128(0x0000000f00000005, 0x0000000400000004, q27);
18418
18419  ASSERT_EQUAL_128(0x0000000000000000, 0x0102020302030304, q28);
18420  ASSERT_EQUAL_128(0x0705050305030301, 0x0103030503050507, q29);
18421
18422  TEARDOWN();
18423}
18424
18425TEST(neon_2regmisc_rev) {
18426  SETUP();
18427
18428  START();
18429
18430  __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
18431  __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
18432
18433  __ Rev16(v16.V8B() , v0.V8B());
18434  __ Rev16(v17.V16B(), v0.V16B());
18435
18436  __ Rev32(v18.V8B() , v0.V8B());
18437  __ Rev32(v19.V16B(), v0.V16B());
18438  __ Rev32(v20.V4H() , v0.V4H());
18439  __ Rev32(v21.V8H() , v0.V8H());
18440
18441  __ Rev64(v22.V8B() , v0.V8B());
18442  __ Rev64(v23.V16B(), v0.V16B());
18443  __ Rev64(v24.V4H() , v0.V4H());
18444  __ Rev64(v25.V8H() , v0.V8H());
18445  __ Rev64(v26.V2S() , v0.V2S());
18446  __ Rev64(v27.V4S() , v0.V4S());
18447
18448  __ Rbit(v28.V8B() , v1.V8B());
18449  __ Rbit(v29.V16B(), v1.V16B());
18450
18451  END();
18452
18453  RUN();
18454
18455  ASSERT_EQUAL_128(0x0000000000000000, 0x09080b0a0d0c0f0e, q16);
18456  ASSERT_EQUAL_128(0x0100030205040706, 0x09080b0a0d0c0f0e, q17);
18457
18458  ASSERT_EQUAL_128(0x0000000000000000, 0x0b0a09080f0e0d0c, q18);
18459  ASSERT_EQUAL_128(0x0302010007060504, 0x0b0a09080f0e0d0c, q19);
18460  ASSERT_EQUAL_128(0x0000000000000000, 0x0a0b08090e0f0c0d, q20);
18461  ASSERT_EQUAL_128(0x0203000106070405, 0x0a0b08090e0f0c0d, q21);
18462
18463  ASSERT_EQUAL_128(0x0000000000000000, 0x0f0e0d0c0b0a0908, q22);
18464  ASSERT_EQUAL_128(0x0706050403020100, 0x0f0e0d0c0b0a0908, q23);
18465  ASSERT_EQUAL_128(0x0000000000000000, 0x0e0f0c0d0a0b0809, q24);
18466  ASSERT_EQUAL_128(0x0607040502030001, 0x0e0f0c0d0a0b0809, q25);
18467  ASSERT_EQUAL_128(0x0000000000000000, 0x0c0d0e0f08090a0b, q26);
18468  ASSERT_EQUAL_128(0x0405060700010203, 0x0c0d0e0f08090a0b, q27);
18469
18470  ASSERT_EQUAL_128(0x0000000000000000, 0x80c4a2e691d5b3f7, q28);
18471  ASSERT_EQUAL_128(0x7f3b5d196e2a4c08, 0x80c4a2e691d5b3f7, q29);
18472
18473  TEARDOWN();
18474}
18475
18476
18477TEST(neon_sli) {
18478  SETUP();
18479
18480  START();
18481
18482  __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
18483  __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
18484
18485  __ Mov(v16.V2D(),  v0.V2D());
18486  __ Mov(v17.V2D(),  v0.V2D());
18487  __ Mov(v18.V2D(),  v0.V2D());
18488  __ Mov(v19.V2D(),  v0.V2D());
18489  __ Mov(v20.V2D(),  v0.V2D());
18490  __ Mov(v21.V2D(),  v0.V2D());
18491  __ Mov(v22.V2D(),  v0.V2D());
18492  __ Mov(v23.V2D(),  v0.V2D());
18493
18494  __ Sli(v16.V8B(),  v1.V8B(),  4);
18495  __ Sli(v17.V16B(), v1.V16B(), 7);
18496  __ Sli(v18.V4H(),  v1.V4H(),  8);
18497  __ Sli(v19.V8H(),  v1.V8H(), 15);
18498  __ Sli(v20.V2S(),  v1.V2S(),  0);
18499  __ Sli(v21.V4S(),  v1.V4S(), 31);
18500  __ Sli(v22.V2D(),  v1.V2D(), 48);
18501
18502  __ Sli(d23,  d1, 48);
18503
18504  END();
18505
18506  RUN();
18507
18508  ASSERT_EQUAL_128(0x0000000000000000, 0x18395a7b9cbddeff, q16);
18509  ASSERT_EQUAL_128(0x0001020304050607, 0x88898a8b8c8d8e8f, q17);
18510  ASSERT_EQUAL_128(0x0000000000000000, 0x2309670bab0def0f, q18);
18511  ASSERT_EQUAL_128(0x0001020304050607, 0x88098a0b8c0d8e0f, q19);
18512  ASSERT_EQUAL_128(0x0000000000000000, 0x0123456789abcdef, q20);
18513  ASSERT_EQUAL_128(0x0001020304050607, 0x88090a0b8c0d0e0f, q21);
18514  ASSERT_EQUAL_128(0x3210020304050607, 0xcdef0a0b0c0d0e0f, q22);
18515
18516  ASSERT_EQUAL_128(0x0000000000000000, 0xcdef0a0b0c0d0e0f, q23);
18517
18518
18519  TEARDOWN();
18520}
18521
18522
18523TEST(neon_sri) {
18524  SETUP();
18525
18526  START();
18527
18528  __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
18529  __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
18530
18531  __ Mov(v16.V2D(),  v0.V2D());
18532  __ Mov(v17.V2D(),  v0.V2D());
18533  __ Mov(v18.V2D(),  v0.V2D());
18534  __ Mov(v19.V2D(),  v0.V2D());
18535  __ Mov(v20.V2D(),  v0.V2D());
18536  __ Mov(v21.V2D(),  v0.V2D());
18537  __ Mov(v22.V2D(),  v0.V2D());
18538  __ Mov(v23.V2D(),  v0.V2D());
18539
18540  __ Sri(v16.V8B(),  v1.V8B(),  4);
18541  __ Sri(v17.V16B(), v1.V16B(), 7);
18542  __ Sri(v18.V4H(),  v1.V4H(),  8);
18543  __ Sri(v19.V8H(),  v1.V8H(), 15);
18544  __ Sri(v20.V2S(),  v1.V2S(),  1);
18545  __ Sri(v21.V4S(),  v1.V4S(), 31);
18546  __ Sri(v22.V2D(),  v1.V2D(), 48);
18547
18548  __ Sri(d23,  d1, 48);
18549
18550  END();
18551
18552  RUN();
18553
18554  ASSERT_EQUAL_128(0x0000000000000000, 0x00020406080a0c0e, q16);
18555  ASSERT_EQUAL_128(0x0101030304040606, 0x08080a0a0d0d0f0f, q17);
18556  ASSERT_EQUAL_128(0x0000000000000000, 0x08010a450c890ecd, q18);
18557  ASSERT_EQUAL_128(0x0001020304040606, 0x08080a0a0c0d0e0f, q19);
18558  ASSERT_EQUAL_128(0x0000000000000000, 0x0091a2b344d5e6f7, q20);
18559  ASSERT_EQUAL_128(0x0001020304050606, 0x08090a0a0c0d0e0f, q21);
18560  ASSERT_EQUAL_128(0x000102030405fedc, 0x08090a0b0c0d0123, q22);
18561
18562  ASSERT_EQUAL_128(0x0000000000000000, 0x08090a0b0c0d0123, q23);
18563
18564
18565  TEARDOWN();
18566}
18567
18568
18569TEST(neon_shrn) {
18570  SETUP();
18571
18572  START();
18573
18574  __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
18575  __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
18576  __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
18577  __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
18578  __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
18579
18580  __ Shrn(v16.V8B(),   v0.V8H(), 8);
18581  __ Shrn2(v16.V16B(), v1.V8H(), 1);
18582  __ Shrn(v17.V4H(),   v1.V4S(), 16);
18583  __ Shrn2(v17.V8H(),  v2.V4S(), 1);
18584  __ Shrn(v18.V2S(),   v3.V2D(), 32);
18585  __ Shrn2(v18.V4S(),  v3.V2D(), 1);
18586
18587  END();
18588
18589  RUN();
18590  ASSERT_EQUAL_128(0x0000ff00ff0000ff, 0x7f00817f80ff0180, q16);
18591  ASSERT_EQUAL_128(0x0000ffff0000ffff, 0x8000ffffffff0001, q17);
18592  ASSERT_EQUAL_128(0x00000000ffffffff, 0x800000007fffffff, q18);
18593  TEARDOWN();
18594}
18595
18596
18597TEST(neon_rshrn) {
18598  SETUP();
18599
18600  START();
18601
18602  __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
18603  __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
18604  __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
18605  __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
18606  __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
18607
18608  __ Rshrn(v16.V8B(),   v0.V8H(), 8);
18609  __ Rshrn2(v16.V16B(), v1.V8H(), 1);
18610  __ Rshrn(v17.V4H(),   v1.V4S(), 16);
18611  __ Rshrn2(v17.V8H(),  v2.V4S(), 1);
18612  __ Rshrn(v18.V2S(),   v3.V2D(), 32);
18613  __ Rshrn2(v18.V4S(),  v3.V2D(), 1);
18614
18615  END();
18616
18617  RUN();
18618  ASSERT_EQUAL_128(0x0001000000000100, 0x7f01827f81ff0181, q16);
18619  ASSERT_EQUAL_128(0x0000000000000000, 0x8001ffffffff0001, q17);
18620  ASSERT_EQUAL_128(0x0000000100000000, 0x8000000080000000, q18);
18621  TEARDOWN();
18622}
18623
18624
18625TEST(neon_uqshrn) {
18626  SETUP();
18627
18628  START();
18629
18630  __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
18631  __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
18632  __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
18633  __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
18634  __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
18635
18636  __ Uqshrn(v16.V8B(),   v0.V8H(), 8);
18637  __ Uqshrn2(v16.V16B(), v1.V8H(), 1);
18638  __ Uqshrn(v17.V4H(),   v1.V4S(), 16);
18639  __ Uqshrn2(v17.V8H(),  v2.V4S(), 1);
18640  __ Uqshrn(v18.V2S(),   v3.V2D(), 32);
18641  __ Uqshrn2(v18.V4S(),  v3.V2D(), 1);
18642
18643  __ Uqshrn(b19, h0, 8);
18644  __ Uqshrn(h20, s1, 16);
18645  __ Uqshrn(s21, d3, 32);
18646
18647  END();
18648
18649  RUN();
18650  ASSERT_EQUAL_128(0xffffff00ff0000ff, 0x7f00817f80ff0180, q16);
18651  ASSERT_EQUAL_128(0xffffffff0000ffff, 0x8000ffffffff0001, q17);
18652  ASSERT_EQUAL_128(0xffffffffffffffff, 0x800000007fffffff, q18);
18653  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000080, q19);
18654  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
18655  ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
18656  TEARDOWN();
18657}
18658
18659
18660TEST(neon_uqrshrn) {
18661  SETUP();
18662
18663  START();
18664
18665  __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
18666  __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
18667  __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
18668  __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
18669  __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
18670
18671  __ Uqrshrn(v16.V8B(),   v0.V8H(), 8);
18672  __ Uqrshrn2(v16.V16B(), v1.V8H(), 1);
18673  __ Uqrshrn(v17.V4H(),   v1.V4S(), 16);
18674  __ Uqrshrn2(v17.V8H(),  v2.V4S(), 1);
18675  __ Uqrshrn(v18.V2S(),   v3.V2D(), 32);
18676  __ Uqrshrn2(v18.V4S(),  v3.V2D(), 1);
18677
18678  __ Uqrshrn(b19, h0, 8);
18679  __ Uqrshrn(h20, s1, 16);
18680  __ Uqrshrn(s21, d3, 32);
18681
18682  END();
18683
18684  RUN();
18685  ASSERT_EQUAL_128(0xffffff00ff0001ff, 0x7f01827f81ff0181, q16);
18686  ASSERT_EQUAL_128(0xffffffff0000ffff, 0x8001ffffffff0001, q17);
18687  ASSERT_EQUAL_128(0xffffffffffffffff, 0x8000000080000000, q18);
18688  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000081, q19);
18689  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
18690  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000000, q21);
18691  TEARDOWN();
18692}
18693
18694
18695TEST(neon_sqshrn) {
18696  SETUP();
18697
18698  START();
18699
18700  __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
18701  __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
18702  __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
18703  __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
18704  __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
18705
18706  __ Sqshrn(v16.V8B(),   v0.V8H(), 8);
18707  __ Sqshrn2(v16.V16B(), v1.V8H(), 1);
18708  __ Sqshrn(v17.V4H(),   v1.V4S(), 16);
18709  __ Sqshrn2(v17.V8H(),  v2.V4S(), 1);
18710  __ Sqshrn(v18.V2S(),   v3.V2D(), 32);
18711  __ Sqshrn2(v18.V4S(),  v3.V2D(), 1);
18712
18713  __ Sqshrn(b19, h0, 8);
18714  __ Sqshrn(h20, s1, 16);
18715  __ Sqshrn(s21, d3, 32);
18716
18717  END();
18718
18719  RUN();
18720  ASSERT_EQUAL_128(0x8080ff00ff00007f, 0x7f00817f80ff0180, q16);
18721  ASSERT_EQUAL_128(0x8000ffff00007fff, 0x8000ffffffff0001, q17);
18722  ASSERT_EQUAL_128(0x800000007fffffff, 0x800000007fffffff, q18);
18723  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000080, q19);
18724  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
18725  ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
18726  TEARDOWN();
18727}
18728
18729
18730TEST(neon_sqrshrn) {
18731  SETUP();
18732
18733  START();
18734
18735  __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
18736  __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
18737  __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
18738  __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
18739  __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
18740
18741  __ Sqrshrn(v16.V8B(),   v0.V8H(), 8);
18742  __ Sqrshrn2(v16.V16B(), v1.V8H(), 1);
18743  __ Sqrshrn(v17.V4H(),   v1.V4S(), 16);
18744  __ Sqrshrn2(v17.V8H(),  v2.V4S(), 1);
18745  __ Sqrshrn(v18.V2S(),   v3.V2D(), 32);
18746  __ Sqrshrn2(v18.V4S(),  v3.V2D(), 1);
18747
18748  __ Sqrshrn(b19, h0, 8);
18749  __ Sqrshrn(h20, s1, 16);
18750  __ Sqrshrn(s21, d3, 32);
18751
18752  END();
18753
18754  RUN();
18755  ASSERT_EQUAL_128(0x808000000000017f, 0x7f01827f81ff0181, q16);
18756  ASSERT_EQUAL_128(0x8000000000007fff, 0x8001ffffffff0001, q17);
18757  ASSERT_EQUAL_128(0x800000007fffffff, 0x800000007fffffff, q18);
18758  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000081, q19);
18759  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
18760  ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
18761  TEARDOWN();
18762}
18763
18764
18765TEST(neon_sqshrun) {
18766  SETUP();
18767
18768  START();
18769
18770  __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
18771  __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
18772  __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
18773  __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
18774  __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
18775
18776  __ Sqshrun(v16.V8B(),   v0.V8H(), 8);
18777  __ Sqshrun2(v16.V16B(), v1.V8H(), 1);
18778  __ Sqshrun(v17.V4H(),   v1.V4S(), 16);
18779  __ Sqshrun2(v17.V8H(),  v2.V4S(), 1);
18780  __ Sqshrun(v18.V2S(),   v3.V2D(), 32);
18781  __ Sqshrun2(v18.V4S(),  v3.V2D(), 1);
18782
18783  __ Sqshrun(b19, h0, 8);
18784  __ Sqshrun(h20, s1, 16);
18785  __ Sqshrun(s21, d3, 32);
18786
18787  END();
18788
18789  RUN();
18790  ASSERT_EQUAL_128(0x00000000000000ff, 0x7f00007f00000100, q16);
18791  ASSERT_EQUAL_128(0x000000000000ffff, 0x0000000000000001, q17);
18792  ASSERT_EQUAL_128(0x00000000ffffffff, 0x000000007fffffff, q18);
18793  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q19);
18794  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
18795  ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
18796  TEARDOWN();
18797}
18798
18799
18800TEST(neon_sqrshrun) {
18801  SETUP();
18802
18803  START();
18804
18805  __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
18806  __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
18807  __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
18808  __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
18809  __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
18810
18811  __ Sqrshrun(v16.V8B(),   v0.V8H(), 8);
18812  __ Sqrshrun2(v16.V16B(), v1.V8H(), 1);
18813  __ Sqrshrun(v17.V4H(),   v1.V4S(), 16);
18814  __ Sqrshrun2(v17.V8H(),  v2.V4S(), 1);
18815  __ Sqrshrun(v18.V2S(),   v3.V2D(), 32);
18816  __ Sqrshrun2(v18.V4S(),  v3.V2D(), 1);
18817
18818  __ Sqrshrun(b19, h0, 8);
18819  __ Sqrshrun(h20, s1, 16);
18820  __ Sqrshrun(s21, d3, 32);
18821
18822  END();
18823
18824  RUN();
18825  ASSERT_EQUAL_128(0x00000000000001ff, 0x7f01007f00000100, q16);
18826  ASSERT_EQUAL_128(0x000000000000ffff, 0x0000000000000001, q17);
18827  ASSERT_EQUAL_128(0x00000000ffffffff, 0x0000000080000000, q18);
18828  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q19);
18829  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
18830  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000000, q21);
18831  TEARDOWN();
18832}
18833
18834TEST(neon_modimm_bic) {
18835  SETUP();
18836
18837  START();
18838
18839  __ Movi(v16.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
18840  __ Movi(v17.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
18841  __ Movi(v18.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
18842  __ Movi(v19.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
18843  __ Movi(v20.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
18844  __ Movi(v21.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
18845  __ Movi(v22.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
18846  __ Movi(v23.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
18847  __ Movi(v24.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
18848  __ Movi(v25.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
18849  __ Movi(v26.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
18850  __ Movi(v27.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
18851
18852  __ Bic(v16.V4H(), 0x00, 0);
18853  __ Bic(v17.V4H(), 0xff, 8);
18854  __ Bic(v18.V8H(), 0x00, 0);
18855  __ Bic(v19.V8H(), 0xff, 8);
18856
18857  __ Bic(v20.V2S(), 0x00, 0);
18858  __ Bic(v21.V2S(), 0xff, 8);
18859  __ Bic(v22.V2S(), 0x00, 16);
18860  __ Bic(v23.V2S(), 0xff, 24);
18861
18862  __ Bic(v24.V4S(), 0xff, 0);
18863  __ Bic(v25.V4S(), 0x00, 8);
18864  __ Bic(v26.V4S(), 0xff, 16);
18865  __ Bic(v27.V4S(), 0x00, 24);
18866
18867  END();
18868
18869  RUN();
18870
18871  ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q16);
18872  ASSERT_EQUAL_128(0x0, 0x005500ff000000aa, q17);
18873  ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q18);
18874  ASSERT_EQUAL_128(0x00aa0055000000aa, 0x005500ff000000aa, q19);
18875
18876  ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q20);
18877  ASSERT_EQUAL_128(0x0, 0x555500ff000000aa, q21);
18878  ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q22);
18879  ASSERT_EQUAL_128(0x0, 0x0055ffff0000aaaa, q23);
18880
18881  ASSERT_EQUAL_128(0x00aaff00ff005500, 0x5555ff000000aa00, q24);
18882  ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q25);
18883  ASSERT_EQUAL_128(0x0000ff55ff0055aa, 0x5500ffff0000aaaa, q26);
18884  ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q27);
18885
18886  TEARDOWN();
18887}
18888
18889
18890TEST(neon_modimm_movi_16bit_any) {
18891  SETUP();
18892
18893  START();
18894
18895  __ Movi(v0.V4H(), 0xabab);
18896  __ Movi(v1.V4H(), 0xab00);
18897  __ Movi(v2.V4H(), 0xabff);
18898  __ Movi(v3.V8H(), 0x00ab);
18899  __ Movi(v4.V8H(), 0xffab);
18900  __ Movi(v5.V8H(), 0xabcd);
18901
18902  END();
18903
18904  RUN();
18905
18906  ASSERT_EQUAL_128(0x0, 0xabababababababab, q0);
18907  ASSERT_EQUAL_128(0x0, 0xab00ab00ab00ab00, q1);
18908  ASSERT_EQUAL_128(0x0, 0xabffabffabffabff, q2);
18909  ASSERT_EQUAL_128(0x00ab00ab00ab00ab, 0x00ab00ab00ab00ab, q3);
18910  ASSERT_EQUAL_128(0xffabffabffabffab, 0xffabffabffabffab, q4);
18911  ASSERT_EQUAL_128(0xabcdabcdabcdabcd, 0xabcdabcdabcdabcd, q5);
18912
18913  TEARDOWN();
18914}
18915
18916
18917TEST(neon_modimm_movi_32bit_any) {
18918  SETUP();
18919
18920  START();
18921
18922  __ Movi(v0.V2S(), 0x000000ab);
18923  __ Movi(v1.V2S(), 0x0000ab00);
18924  __ Movi(v2.V4S(), 0x00ab0000);
18925  __ Movi(v3.V4S(), 0xab000000);
18926
18927  __ Movi(v4.V2S(), 0xffffffab);
18928  __ Movi(v5.V2S(), 0xffffabff);
18929  __ Movi(v6.V4S(), 0xffabffff);
18930  __ Movi(v7.V4S(), 0xabffffff);
18931
18932  __ Movi(v16.V2S(), 0x0000abff);
18933  __ Movi(v17.V2S(), 0x00abffff);
18934  __ Movi(v18.V4S(), 0xffab0000);
18935  __ Movi(v19.V4S(), 0xffffab00);
18936
18937  __ Movi(v20.V4S(), 0xabababab);
18938  __ Movi(v21.V4S(), 0xabcdabcd);
18939  __ Movi(v22.V4S(), 0xabcdef01);
18940  __ Movi(v23.V4S(), 0x00ffff00);
18941
18942  END();
18943
18944  RUN();
18945
18946  ASSERT_EQUAL_128(0x0, 0x000000ab000000ab, q0);
18947  ASSERT_EQUAL_128(0x0, 0x0000ab000000ab00, q1);
18948  ASSERT_EQUAL_128(0x00ab000000ab0000, 0x00ab000000ab0000, q2);
18949  ASSERT_EQUAL_128(0xab000000ab000000, 0xab000000ab000000, q3);
18950
18951  ASSERT_EQUAL_128(0x0, 0xffffffabffffffab, q4);
18952  ASSERT_EQUAL_128(0x0, 0xffffabffffffabff, q5);
18953  ASSERT_EQUAL_128(0xffabffffffabffff, 0xffabffffffabffff, q6);
18954  ASSERT_EQUAL_128(0xabffffffabffffff, 0xabffffffabffffff, q7);
18955
18956  ASSERT_EQUAL_128(0x0, 0x0000abff0000abff, q16);
18957  ASSERT_EQUAL_128(0x0, 0x00abffff00abffff, q17);
18958  ASSERT_EQUAL_128(0xffab0000ffab0000, 0xffab0000ffab0000, q18);
18959  ASSERT_EQUAL_128(0xffffab00ffffab00, 0xffffab00ffffab00, q19);
18960
18961  ASSERT_EQUAL_128(0xabababababababab, 0xabababababababab, q20);
18962  ASSERT_EQUAL_128(0xabcdabcdabcdabcd, 0xabcdabcdabcdabcd, q21);
18963  ASSERT_EQUAL_128(0xabcdef01abcdef01, 0xabcdef01abcdef01, q22);
18964  ASSERT_EQUAL_128(0x00ffff0000ffff00, 0x00ffff0000ffff00, q23);
18965  TEARDOWN();
18966}
18967
18968
18969TEST(neon_modimm_movi_64bit_any) {
18970  SETUP();
18971
18972  START();
18973
18974  __ Movi(v0.V1D(), 0x00ffff0000ffffff);
18975  __ Movi(v1.V2D(), 0xabababababababab);
18976  __ Movi(v2.V2D(), 0xabcdabcdabcdabcd);
18977  __ Movi(v3.V2D(), 0xabcdef01abcdef01);
18978  __ Movi(v4.V1D(), 0xabcdef0123456789);
18979  __ Movi(v5.V2D(), 0xabcdef0123456789);
18980
18981  END();
18982
18983  RUN();
18984
18985  ASSERT_EQUAL_128(0x0, 0x00ffff0000ffffff, q0);
18986  ASSERT_EQUAL_128(0xabababababababab, 0xabababababababab, q1);
18987  ASSERT_EQUAL_128(0xabcdabcdabcdabcd, 0xabcdabcdabcdabcd, q2);
18988  ASSERT_EQUAL_128(0xabcdef01abcdef01, 0xabcdef01abcdef01, q3);
18989  ASSERT_EQUAL_128(0x0, 0xabcdef0123456789, q4);
18990  ASSERT_EQUAL_128(0xabcdef0123456789, 0xabcdef0123456789, q5);
18991
18992  TEARDOWN();
18993}
18994
18995
18996TEST(neon_modimm_movi) {
18997  SETUP();
18998
18999  START();
19000
19001  __ Movi(v0.V8B(),  0xaa);
19002  __ Movi(v1.V16B(), 0x55);
19003
19004  __ Movi(d2,       0x00ffff0000ffffff);
19005  __ Movi(v3.V2D(), 0x00ffff0000ffffff);
19006
19007  __ Movi(v16.V4H(), 0x00, LSL, 0);
19008  __ Movi(v17.V4H(), 0xff, LSL, 8);
19009  __ Movi(v18.V8H(), 0x00, LSL, 0);
19010  __ Movi(v19.V8H(), 0xff, LSL, 8);
19011
19012  __ Movi(v20.V2S(), 0x00, LSL, 0);
19013  __ Movi(v21.V2S(), 0xff, LSL, 8);
19014  __ Movi(v22.V2S(), 0x00, LSL, 16);
19015  __ Movi(v23.V2S(), 0xff, LSL, 24);
19016
19017  __ Movi(v24.V4S(), 0xff, LSL, 0);
19018  __ Movi(v25.V4S(), 0x00, LSL, 8);
19019  __ Movi(v26.V4S(), 0xff, LSL, 16);
19020  __ Movi(v27.V4S(), 0x00, LSL, 24);
19021
19022  __ Movi(v28.V2S(), 0xaa, MSL, 8);
19023  __ Movi(v29.V2S(), 0x55, MSL, 16);
19024  __ Movi(v30.V4S(), 0xff, MSL, 8);
19025  __ Movi(v31.V4S(), 0x00, MSL, 16);
19026
19027  END();
19028
19029  RUN();
19030
19031  ASSERT_EQUAL_128(0x0, 0xaaaaaaaaaaaaaaaa, q0);
19032  ASSERT_EQUAL_128(0x5555555555555555, 0x5555555555555555, q1);
19033
19034  ASSERT_EQUAL_128(0x0, 0x00ffff0000ffffff, q2);
19035  ASSERT_EQUAL_128(0x00ffff0000ffffff, 0x00ffff0000ffffff, q3);
19036
19037  ASSERT_EQUAL_128(0x0, 0x0000000000000000, q16);
19038  ASSERT_EQUAL_128(0x0, 0xff00ff00ff00ff00, q17);
19039  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q18);
19040  ASSERT_EQUAL_128(0xff00ff00ff00ff00, 0xff00ff00ff00ff00, q19);
19041
19042  ASSERT_EQUAL_128(0x0, 0x0000000000000000, q20);
19043  ASSERT_EQUAL_128(0x0, 0x0000ff000000ff00, q21);
19044  ASSERT_EQUAL_128(0x0, 0x0000000000000000, q22);
19045  ASSERT_EQUAL_128(0x0, 0xff000000ff000000, q23);
19046
19047  ASSERT_EQUAL_128(0x000000ff000000ff, 0x000000ff000000ff, q24);
19048  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25);
19049  ASSERT_EQUAL_128(0x00ff000000ff0000, 0x00ff000000ff0000, q26);
19050  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q27);
19051
19052  ASSERT_EQUAL_128(0x0, 0x0000aaff0000aaff, q28);
19053  ASSERT_EQUAL_128(0x0, 0x0055ffff0055ffff, q29);
19054  ASSERT_EQUAL_128(0x0000ffff0000ffff, 0x0000ffff0000ffff, q30);
19055  ASSERT_EQUAL_128(0x0000ffff0000ffff, 0x0000ffff0000ffff, q31);
19056
19057  TEARDOWN();
19058}
19059
19060
19061TEST(neon_modimm_mvni) {
19062  SETUP();
19063
19064  START();
19065
19066  __ Mvni(v16.V4H(), 0x00, LSL, 0);
19067  __ Mvni(v17.V4H(), 0xff, LSL, 8);
19068  __ Mvni(v18.V8H(), 0x00, LSL, 0);
19069  __ Mvni(v19.V8H(), 0xff, LSL, 8);
19070
19071  __ Mvni(v20.V2S(), 0x00, LSL, 0);
19072  __ Mvni(v21.V2S(), 0xff, LSL, 8);
19073  __ Mvni(v22.V2S(), 0x00, LSL, 16);
19074  __ Mvni(v23.V2S(), 0xff, LSL, 24);
19075
19076  __ Mvni(v24.V4S(), 0xff, LSL, 0);
19077  __ Mvni(v25.V4S(), 0x00, LSL, 8);
19078  __ Mvni(v26.V4S(), 0xff, LSL, 16);
19079  __ Mvni(v27.V4S(), 0x00, LSL, 24);
19080
19081  __ Mvni(v28.V2S(), 0xaa, MSL, 8);
19082  __ Mvni(v29.V2S(), 0x55, MSL, 16);
19083  __ Mvni(v30.V4S(), 0xff, MSL, 8);
19084  __ Mvni(v31.V4S(), 0x00, MSL, 16);
19085
19086  END();
19087
19088  RUN();
19089
19090  ASSERT_EQUAL_128(0x0, 0xffffffffffffffff, q16);
19091  ASSERT_EQUAL_128(0x0, 0x00ff00ff00ff00ff, q17);
19092  ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q18);
19093  ASSERT_EQUAL_128(0x00ff00ff00ff00ff, 0x00ff00ff00ff00ff, q19);
19094
19095  ASSERT_EQUAL_128(0x0, 0xffffffffffffffff, q20);
19096  ASSERT_EQUAL_128(0x0, 0xffff00ffffff00ff, q21);
19097  ASSERT_EQUAL_128(0x0, 0xffffffffffffffff, q22);
19098  ASSERT_EQUAL_128(0x0, 0x00ffffff00ffffff, q23);
19099
19100  ASSERT_EQUAL_128(0xffffff00ffffff00, 0xffffff00ffffff00, q24);
19101  ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25);
19102  ASSERT_EQUAL_128(0xff00ffffff00ffff, 0xff00ffffff00ffff, q26);
19103  ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q27);
19104
19105  ASSERT_EQUAL_128(0x0, 0xffff5500ffff5500, q28);
19106  ASSERT_EQUAL_128(0x0, 0xffaa0000ffaa0000, q29);
19107  ASSERT_EQUAL_128(0xffff0000ffff0000, 0xffff0000ffff0000, q30);
19108  ASSERT_EQUAL_128(0xffff0000ffff0000, 0xffff0000ffff0000, q31);
19109
19110  TEARDOWN();
19111}
19112
19113
19114TEST(neon_modimm_orr) {
19115  SETUP();
19116
19117  START();
19118
19119  __ Movi(v16.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
19120  __ Movi(v17.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
19121  __ Movi(v18.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
19122  __ Movi(v19.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
19123  __ Movi(v20.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
19124  __ Movi(v21.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
19125  __ Movi(v22.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
19126  __ Movi(v23.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
19127  __ Movi(v24.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
19128  __ Movi(v25.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
19129  __ Movi(v26.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
19130  __ Movi(v27.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
19131
19132  __ Orr(v16.V4H(), 0x00, 0);
19133  __ Orr(v17.V4H(), 0xff, 8);
19134  __ Orr(v18.V8H(), 0x00, 0);
19135  __ Orr(v19.V8H(), 0xff, 8);
19136
19137  __ Orr(v20.V2S(), 0x00, 0);
19138  __ Orr(v21.V2S(), 0xff, 8);
19139  __ Orr(v22.V2S(), 0x00, 16);
19140  __ Orr(v23.V2S(), 0xff, 24);
19141
19142  __ Orr(v24.V4S(), 0xff, 0);
19143  __ Orr(v25.V4S(), 0x00, 8);
19144  __ Orr(v26.V4S(), 0xff, 16);
19145  __ Orr(v27.V4S(), 0x00, 24);
19146
19147  END();
19148
19149  RUN();
19150
19151  ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q16);
19152  ASSERT_EQUAL_128(0x0, 0xff55ffffff00ffaa, q17);
19153  ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q18);
19154  ASSERT_EQUAL_128(0xffaaff55ff00ffaa, 0xff55ffffff00ffaa, q19);
19155
19156  ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q20);
19157  ASSERT_EQUAL_128(0x0, 0x5555ffff0000ffaa, q21);
19158  ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q22);
19159  ASSERT_EQUAL_128(0x0, 0xff55ffffff00aaaa, q23);
19160
19161  ASSERT_EQUAL_128(0x00aaffffff0055ff, 0x5555ffff0000aaff, q24);
19162  ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q25);
19163  ASSERT_EQUAL_128(0x00ffff55ffff55aa, 0x55ffffff00ffaaaa, q26);
19164  ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q27);
19165
19166  TEARDOWN();
19167}
19168
19169
19170// TODO: add arbitrary values once load literal to Q registers is supported.
19171TEST(neon_modimm_fmov) {
19172  SETUP();
19173
19174  // Immediates which can be encoded in the instructions.
19175  const float kOne = 1.0f;
19176  const float kPointFive = 0.5f;
19177  const double kMinusThirteen = -13.0;
19178  // Immediates which cannot be encoded in the instructions.
19179  const float kNonImmFP32 = 255.0f;
19180  const double kNonImmFP64 = 12.3456;
19181
19182  START();
19183  __ Fmov(v11.V2S(), kOne);
19184  __ Fmov(v12.V4S(), kPointFive);
19185  __ Fmov(v22.V2D(), kMinusThirteen);
19186  __ Fmov(v13.V2S(), kNonImmFP32);
19187  __ Fmov(v14.V4S(), kNonImmFP32);
19188  __ Fmov(v23.V2D(), kNonImmFP64);
19189  __ Fmov(v1.V2S(), 0.0);
19190  __ Fmov(v2.V4S(), 0.0);
19191  __ Fmov(v3.V2D(), 0.0);
19192  __ Fmov(v4.V2S(), kFP32PositiveInfinity);
19193  __ Fmov(v5.V4S(), kFP32PositiveInfinity);
19194  __ Fmov(v6.V2D(), kFP64PositiveInfinity);
19195  END();
19196
19197  RUN();
19198
19199  const uint64_t kOne1S = float_to_rawbits(1.0);
19200  const uint64_t kOne2S = (kOne1S << 32) | kOne1S;
19201  const uint64_t kPointFive1S = float_to_rawbits(0.5);
19202  const uint64_t kPointFive2S = (kPointFive1S << 32) | kPointFive1S;
19203  const uint64_t kMinusThirteen1D = double_to_rawbits(-13.0);
19204  const uint64_t kNonImmFP321S = float_to_rawbits(kNonImmFP32);
19205  const uint64_t kNonImmFP322S = (kNonImmFP321S << 32) | kNonImmFP321S;
19206  const uint64_t kNonImmFP641D = double_to_rawbits(kNonImmFP64);
19207  const uint64_t kFP32Inf1S = float_to_rawbits(kFP32PositiveInfinity);
19208  const uint64_t kFP32Inf2S = (kFP32Inf1S << 32) | kFP32Inf1S;
19209  const uint64_t kFP64Inf1D = double_to_rawbits(kFP64PositiveInfinity);
19210
19211  ASSERT_EQUAL_128(0x0, kOne2S, q11);
19212  ASSERT_EQUAL_128(kPointFive2S, kPointFive2S, q12);
19213  ASSERT_EQUAL_128(kMinusThirteen1D, kMinusThirteen1D, q22);
19214  ASSERT_EQUAL_128(0x0, kNonImmFP322S, q13);
19215  ASSERT_EQUAL_128(kNonImmFP322S, kNonImmFP322S, q14);
19216  ASSERT_EQUAL_128(kNonImmFP641D, kNonImmFP641D, q23);
19217  ASSERT_EQUAL_128(0x0, 0x0, q1);
19218  ASSERT_EQUAL_128(0x0, 0x0, q2);
19219  ASSERT_EQUAL_128(0x0, 0x0, q3);
19220  ASSERT_EQUAL_128(0x0, kFP32Inf2S, q4);
19221  ASSERT_EQUAL_128(kFP32Inf2S, kFP32Inf2S, q5);
19222  ASSERT_EQUAL_128(kFP64Inf1D, kFP64Inf1D, q6);
19223
19224  TEARDOWN();
19225}
19226
19227
19228TEST(neon_perm) {
19229  SETUP();
19230
19231  START();
19232
19233  __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
19234  __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f);
19235
19236  __ Trn1(v16.V16B(), v0.V16B(), v1.V16B());
19237  __ Trn2(v17.V16B(), v0.V16B(), v1.V16B());
19238  __ Zip1(v18.V16B(), v0.V16B(), v1.V16B());
19239  __ Zip2(v19.V16B(), v0.V16B(), v1.V16B());
19240  __ Uzp1(v20.V16B(), v0.V16B(), v1.V16B());
19241  __ Uzp2(v21.V16B(), v0.V16B(), v1.V16B());
19242
19243  END();
19244
19245  RUN();
19246
19247  ASSERT_EQUAL_128(0x1101130315051707, 0x19091b0b1d0d1f0f, q16);
19248  ASSERT_EQUAL_128(0x1000120214041606, 0x18081a0a1c0c1e0e, q17);
19249  ASSERT_EQUAL_128(0x180819091a0a1b0b, 0x1c0c1d0d1e0e1f0f, q18);
19250  ASSERT_EQUAL_128(0x1000110112021303, 0x1404150516061707, q19);
19251  ASSERT_EQUAL_128(0x11131517191b1d1f, 0x01030507090b0d0f, q20);
19252  ASSERT_EQUAL_128(0x10121416181a1c1e, 0x00020406080a0c0e, q21);
19253
19254  TEARDOWN();
19255}
19256
19257
19258TEST(neon_copy_dup_element) {
19259  SETUP();
19260
19261  START();
19262
19263  __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
19264  __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100);
19265  __ Movi(v2.V2D(), 0xffeddccbbaae9988, 0x0011223344556677);
19266  __ Movi(v3.V2D(), 0x7766554433221100, 0x8899aabbccddeeff);
19267  __ Movi(v4.V2D(), 0x7766554433221100, 0x0123456789abcdef);
19268  __ Movi(v5.V2D(), 0x0011223344556677, 0x0123456789abcdef);
19269
19270  __ Dup(v16.V16B(), v0.B(), 0);
19271  __ Dup(v17.V8H(),  v1.H(), 7);
19272  __ Dup(v18.V4S(),  v1.S(), 3);
19273  __ Dup(v19.V2D(),  v0.D(), 0);
19274
19275  __ Dup(v20.V8B(), v0.B(), 0);
19276  __ Dup(v21.V4H(), v1.H(), 7);
19277  __ Dup(v22.V2S(), v1.S(), 3);
19278
19279  __ Dup(v23.B(), v0.B(), 0);
19280  __ Dup(v24.H(), v1.H(), 7);
19281  __ Dup(v25.S(), v1.S(), 3);
19282  __ Dup(v26.D(), v0.D(), 0);
19283
19284  __ Dup(v2.V16B(), v2.B(), 0);
19285  __ Dup(v3.V8H(),  v3.H(), 7);
19286  __ Dup(v4.V4S(),  v4.S(), 0);
19287  __ Dup(v5.V2D(),  v5.D(), 1);
19288
19289  END();
19290
19291  RUN();
19292
19293  ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q16);
19294  ASSERT_EQUAL_128(0xffedffedffedffed, 0xffedffedffedffed, q17);
19295  ASSERT_EQUAL_128(0xffeddccbffeddccb, 0xffeddccbffeddccb, q18);
19296  ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x8899aabbccddeeff, q19);
19297
19298  ASSERT_EQUAL_128(0, 0xffffffffffffffff, q20);
19299  ASSERT_EQUAL_128(0, 0xffedffedffedffed, q21);
19300  ASSERT_EQUAL_128(0, 0xffeddccbffeddccb, q22);
19301
19302  ASSERT_EQUAL_128(0, 0x00000000000000ff, q23);
19303  ASSERT_EQUAL_128(0, 0x000000000000ffed, q24);
19304  ASSERT_EQUAL_128(0, 0x00000000ffeddccb, q25);
19305  ASSERT_EQUAL_128(0, 0x8899aabbccddeeff, q26);
19306
19307  ASSERT_EQUAL_128(0x7777777777777777, 0x7777777777777777, q2);
19308  ASSERT_EQUAL_128(0x7766776677667766, 0x7766776677667766, q3);
19309  ASSERT_EQUAL_128(0x89abcdef89abcdef, 0x89abcdef89abcdef, q4);
19310  ASSERT_EQUAL_128(0x0011223344556677, 0x0011223344556677, q5);
19311  TEARDOWN();
19312}
19313
19314
19315TEST(neon_copy_dup_general) {
19316  SETUP();
19317
19318  START();
19319
19320  __ Mov(x0, 0x0011223344556677);
19321
19322  __ Dup(v16.V16B(), w0);
19323  __ Dup(v17.V8H(),  w0);
19324  __ Dup(v18.V4S(),  w0);
19325  __ Dup(v19.V2D(),  x0);
19326
19327  __ Dup(v20.V8B(), w0);
19328  __ Dup(v21.V4H(), w0);
19329  __ Dup(v22.V2S(), w0);
19330
19331  __ Dup(v2.V16B(), wzr);
19332  __ Dup(v3.V8H(),  wzr);
19333  __ Dup(v4.V4S(),  wzr);
19334  __ Dup(v5.V2D(),  xzr);
19335
19336  END();
19337
19338  RUN();
19339
19340  ASSERT_EQUAL_128(0x7777777777777777, 0x7777777777777777, q16);
19341  ASSERT_EQUAL_128(0x6677667766776677, 0x6677667766776677, q17);
19342  ASSERT_EQUAL_128(0x4455667744556677, 0x4455667744556677, q18);
19343  ASSERT_EQUAL_128(0x0011223344556677, 0x0011223344556677, q19);
19344
19345  ASSERT_EQUAL_128(0, 0x7777777777777777, q20);
19346  ASSERT_EQUAL_128(0, 0x6677667766776677, q21);
19347  ASSERT_EQUAL_128(0, 0x4455667744556677, q22);
19348
19349  ASSERT_EQUAL_128(0, 0, q2);
19350  ASSERT_EQUAL_128(0, 0, q3);
19351  ASSERT_EQUAL_128(0, 0, q4);
19352  ASSERT_EQUAL_128(0, 0, q5);
19353  TEARDOWN();
19354}
19355
19356
19357TEST(neon_copy_ins_element) {
19358  SETUP();
19359
19360  START();
19361
19362  __ Movi(v0.V2D(),  0x0011223344556677, 0x8899aabbccddeeff);
19363  __ Movi(v1.V2D(),  0xffeddccbbaae9988, 0x7766554433221100);
19364  __ Movi(v16.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
19365  __ Movi(v17.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
19366  __ Movi(v18.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
19367  __ Movi(v19.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
19368
19369  __ Movi(v2.V2D(), 0, 0x0011223344556677);
19370  __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff);
19371  __ Movi(v4.V2D(), 0, 0x0123456789abcdef);
19372  __ Movi(v5.V2D(), 0, 0x0123456789abcdef);
19373
19374  __ Ins(v16.V16B(), 15, v0.V16B(), 0);
19375  __ Ins(v17.V8H(),  0,  v1.V8H(), 7);
19376  __ Ins(v18.V4S(),  3,  v1.V4S(), 0);
19377  __ Ins(v19.V2D(),  1,  v0.V2D(), 0);
19378
19379  __ Ins(v2.V16B(), 2, v2.V16B(), 0);
19380  __ Ins(v3.V8H(),  0,  v3.V8H(), 7);
19381  __ Ins(v4.V4S(),  3,  v4.V4S(), 0);
19382  __ Ins(v5.V2D(),  0,  v5.V2D(), 1);
19383
19384  END();
19385
19386  RUN();
19387
19388  ASSERT_EQUAL_128(0xff23456789abcdef, 0xfedcba9876543210, q16);
19389  ASSERT_EQUAL_128(0xfedcba9876543210, 0x0123456789abffed, q17);
19390  ASSERT_EQUAL_128(0x3322110044556677, 0x8899aabbccddeeff, q18);
19391  ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x8899aabbccddeeff, q19);
19392
19393  ASSERT_EQUAL_128(0, 0x0011223344776677, q2);
19394  ASSERT_EQUAL_128(0, 0x8899aabbccdd0000, q3);
19395  ASSERT_EQUAL_128(0x89abcdef00000000, 0x0123456789abcdef, q4);
19396  ASSERT_EQUAL_128(0, 0, q5);
19397  TEARDOWN();
19398}
19399
19400
19401TEST(neon_copy_mov_element) {
19402  SETUP();
19403
19404  START();
19405
19406  __ Movi(v0.V2D(),  0x0011223344556677, 0x8899aabbccddeeff);
19407  __ Movi(v1.V2D(),  0xffeddccbbaae9988, 0x7766554433221100);
19408  __ Movi(v16.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
19409  __ Movi(v17.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
19410  __ Movi(v18.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
19411  __ Movi(v19.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
19412
19413  __ Movi(v2.V2D(), 0, 0x0011223344556677);
19414  __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff);
19415  __ Movi(v4.V2D(), 0, 0x0123456789abcdef);
19416  __ Movi(v5.V2D(), 0, 0x0123456789abcdef);
19417
19418  __ Mov(v16.V16B(), 15, v0.V16B(), 0);
19419  __ Mov(v17.V8H(),  0,  v1.V8H(), 7);
19420  __ Mov(v18.V4S(),  3,  v1.V4S(), 0);
19421  __ Mov(v19.V2D(),  1,  v0.V2D(), 0);
19422
19423  __ Mov(v2.V16B(), 2, v2.V16B(), 0);
19424  __ Mov(v3.V8H(),  0,  v3.V8H(), 7);
19425  __ Mov(v4.V4S(),  3,  v4.V4S(), 0);
19426  __ Mov(v5.V2D(),  0,  v5.V2D(), 1);
19427
19428  END();
19429
19430  RUN();
19431
19432  ASSERT_EQUAL_128(0xff23456789abcdef, 0xfedcba9876543210, q16);
19433  ASSERT_EQUAL_128(0xfedcba9876543210, 0x0123456789abffed, q17);
19434  ASSERT_EQUAL_128(0x3322110044556677, 0x8899aabbccddeeff, q18);
19435  ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x8899aabbccddeeff, q19);
19436
19437  ASSERT_EQUAL_128(0, 0x0011223344776677, q2);
19438  ASSERT_EQUAL_128(0, 0x8899aabbccdd0000, q3);
19439  ASSERT_EQUAL_128(0x89abcdef00000000, 0x0123456789abcdef, q4);
19440  ASSERT_EQUAL_128(0, 0, q5);
19441  TEARDOWN();
19442}
19443
19444
19445TEST(neon_copy_smov) {
19446  SETUP();
19447
19448  START();
19449
19450  __ Movi(v0.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
19451
19452  __ Smov(w0, v0.B(),  7);
19453  __ Smov(w1, v0.B(), 15);
19454
19455  __ Smov(w2, v0.H(),  0);
19456  __ Smov(w3, v0.H(),  3);
19457
19458  __ Smov(x4, v0.B(),  7);
19459  __ Smov(x5, v0.B(),  15);
19460
19461  __ Smov(x6, v0.H(),  0);
19462  __ Smov(x7, v0.H(),  3);
19463
19464  __ Smov(x16, v0.S(),  0);
19465  __ Smov(x17, v0.S(),  1);
19466
19467  END();
19468
19469  RUN();
19470
19471  ASSERT_EQUAL_32(0xfffffffe, w0);
19472  ASSERT_EQUAL_32(0x00000001, w1);
19473  ASSERT_EQUAL_32(0x00003210, w2);
19474  ASSERT_EQUAL_32(0xfffffedc, w3);
19475  ASSERT_EQUAL_64(0xfffffffffffffffe, x4);
19476  ASSERT_EQUAL_64(0x0000000000000001, x5);
19477  ASSERT_EQUAL_64(0x0000000000003210, x6);
19478  ASSERT_EQUAL_64(0xfffffffffffffedc, x7);
19479  ASSERT_EQUAL_64(0x0000000076543210, x16);
19480  ASSERT_EQUAL_64(0xfffffffffedcba98, x17);
19481
19482  TEARDOWN();
19483}
19484
19485
19486TEST(neon_copy_umov_mov) {
19487  SETUP();
19488
19489  START();
19490
19491  __ Movi(v0.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
19492
19493  __ Umov(w0, v0.B(), 15);
19494  __ Umov(w1, v0.H(),  0);
19495  __ Umov(w2, v0.S(),  3);
19496  __ Umov(x3, v0.D(),  1);
19497
19498  __ Mov(w4, v0.S(),  3);
19499  __ Mov(x5, v0.D(),  1);
19500
19501  END();
19502
19503  RUN();
19504
19505  ASSERT_EQUAL_32(0x00000001, w0);
19506  ASSERT_EQUAL_32(0x00003210, w1);
19507  ASSERT_EQUAL_32(0x01234567, w2);
19508  ASSERT_EQUAL_64(0x0123456789abcdef, x3);
19509  ASSERT_EQUAL_32(0x01234567, w4);
19510  ASSERT_EQUAL_64(0x0123456789abcdef, x5);
19511
19512  TEARDOWN();
19513}
19514
19515
19516TEST(neon_copy_ins_general) {
19517  SETUP();
19518
19519  START();
19520
19521  __ Mov(x0, 0x0011223344556677);
19522  __ Movi(v16.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
19523  __ Movi(v17.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
19524  __ Movi(v18.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
19525  __ Movi(v19.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
19526
19527  __ Movi(v2.V2D(), 0, 0x0011223344556677);
19528  __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff);
19529  __ Movi(v4.V2D(), 0, 0x0123456789abcdef);
19530  __ Movi(v5.V2D(), 0, 0x0123456789abcdef);
19531
19532  __ Ins(v16.V16B(), 15, w0);
19533  __ Ins(v17.V8H(),  0,  w0);
19534  __ Ins(v18.V4S(),  3,  w0);
19535  __ Ins(v19.V2D(),  0,  x0);
19536
19537  __ Ins(v2.V16B(), 2, w0);
19538  __ Ins(v3.V8H(),  0, w0);
19539  __ Ins(v4.V4S(),  3, w0);
19540  __ Ins(v5.V2D(),  1, x0);
19541
19542  END();
19543
19544  RUN();
19545
19546  ASSERT_EQUAL_128(0x7723456789abcdef, 0xfedcba9876543210, q16);
19547  ASSERT_EQUAL_128(0xfedcba9876543210, 0x0123456789ab6677, q17);
19548  ASSERT_EQUAL_128(0x4455667744556677, 0x8899aabbccddeeff, q18);
19549  ASSERT_EQUAL_128(0x0011223344556677, 0x0011223344556677, q19);
19550
19551  ASSERT_EQUAL_128(0, 0x0011223344776677, q2);
19552  ASSERT_EQUAL_128(0, 0x8899aabbccdd6677, q3);
19553  ASSERT_EQUAL_128(0x4455667700000000, 0x0123456789abcdef, q4);
19554  ASSERT_EQUAL_128(0x0011223344556677, 0x0123456789abcdef, q5);
19555  TEARDOWN();
19556}
19557
19558
19559TEST(neon_extract_ext) {
19560  SETUP();
19561
19562  START();
19563
19564  __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
19565  __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100);
19566
19567  __ Movi(v2.V2D(), 0, 0x0011223344556677);
19568  __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff);
19569
19570  __ Ext(v16.V16B(), v0.V16B(), v1.V16B(), 0);
19571  __ Ext(v17.V16B(), v0.V16B(), v1.V16B(), 15);
19572  __ Ext(v1.V16B(), v0.V16B(), v1.V16B(), 8);  // Dest is same as one Src
19573  __ Ext(v0.V16B(), v0.V16B(), v0.V16B(), 8);  // All reg are the same
19574
19575  __ Ext(v18.V8B(), v2.V8B(), v3.V8B(), 0);
19576  __ Ext(v19.V8B(), v2.V8B(), v3.V8B(), 7);
19577  __ Ext(v2.V8B(), v2.V8B(), v3.V8B(), 4);     // Dest is same as one Src
19578  __ Ext(v3.V8B(), v3.V8B(), v3.V8B(), 4);     // All reg are the same
19579
19580  END();
19581
19582  RUN();
19583
19584  ASSERT_EQUAL_128(0x0011223344556677, 0x8899aabbccddeeff, q16);
19585  ASSERT_EQUAL_128(0xeddccbbaae998877, 0x6655443322110000, q17);
19586  ASSERT_EQUAL_128(0x7766554433221100, 0x0011223344556677, q1);
19587  ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x0011223344556677, q0);
19588
19589  ASSERT_EQUAL_128(0, 0x0011223344556677, q18);
19590  ASSERT_EQUAL_128(0, 0x99aabbccddeeff00, q19);
19591  ASSERT_EQUAL_128(0, 0xccddeeff00112233, q2);
19592  ASSERT_EQUAL_128(0, 0xccddeeff8899aabb, q3);
19593  TEARDOWN();
19594}
19595
19596
19597TEST(neon_3different_uaddl) {
19598  SETUP();
19599
19600  START();
19601
19602  __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);
19603  __ Movi(v1.V2D(), 0, 0x00010280810e0fff);
19604  __ Movi(v2.V2D(), 0, 0x0101010101010101);
19605
19606  __ Movi(v3.V2D(), 0x0000000000000000, 0x0000000000000000);
19607  __ Movi(v4.V2D(), 0x0000000000000000, 0x0000000000000000);
19608  __ Movi(v5.V2D(), 0, 0x0000000180008001);
19609  __ Movi(v6.V2D(), 0, 0x000e000ff000ffff);
19610  __ Movi(v7.V2D(), 0, 0x0001000100010001);
19611
19612  __ Movi(v16.V2D(), 0x0000000000000000, 0x0000000000000000);
19613  __ Movi(v17.V2D(), 0x0000000000000000, 0x0000000000000000);
19614  __ Movi(v18.V2D(), 0, 0x0000000000000001);
19615  __ Movi(v19.V2D(), 0, 0x80000001ffffffff);
19616  __ Movi(v20.V2D(), 0, 0x0000000100000001);
19617
19618  __ Uaddl(v0.V8H(), v1.V8B(), v2.V8B());
19619
19620  __ Uaddl(v3.V4S(), v5.V4H(), v7.V4H());
19621  __ Uaddl(v4.V4S(), v6.V4H(), v7.V4H());
19622
19623  __ Uaddl(v16.V2D(), v18.V2S(), v20.V2S());
19624  __ Uaddl(v17.V2D(), v19.V2S(), v20.V2S());
19625
19626
19627  END();
19628
19629  RUN();
19630
19631  ASSERT_EQUAL_128(0x0001000200030081, 0x0082000f00100100, q0);
19632  ASSERT_EQUAL_128(0x0000000100000002, 0x0000800100008002, q3);
19633  ASSERT_EQUAL_128(0x0000000f00000010, 0x0000f00100010000, q4);
19634  ASSERT_EQUAL_128(0x0000000000000001, 0x0000000000000002, q16);
19635  ASSERT_EQUAL_128(0x0000000080000002, 0x0000000100000000, q17);
19636  TEARDOWN();
19637}
19638
19639
19640TEST(neon_3different_addhn_subhn) {
19641  SETUP();
19642
19643  START();
19644
19645  __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
19646  __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
19647  __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
19648  __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
19649  __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
19650
19651  __ Addhn(v16.V8B(),   v0.V8H(), v1.V8H());
19652  __ Addhn2(v16.V16B(), v2.V8H(), v3.V8H());
19653  __ Raddhn(v17.V8B(),   v0.V8H(), v1.V8H());
19654  __ Raddhn2(v17.V16B(), v2.V8H(), v3.V8H());
19655  __ Subhn(v18.V8B(),   v0.V8H(), v1.V8H());
19656  __ Subhn2(v18.V16B(), v2.V8H(), v3.V8H());
19657  __ Rsubhn(v19.V8B(),   v0.V8H(), v1.V8H());
19658  __ Rsubhn2(v19.V16B(), v2.V8H(), v3.V8H());
19659
19660  END();
19661
19662  RUN();
19663
19664  ASSERT_EQUAL_128(0x0000ff007fff7fff, 0xff81817f80ff0100, q16);
19665  ASSERT_EQUAL_128(0x0000000080008000, 0xff81817f81ff0201, q17);
19666  ASSERT_EQUAL_128(0x0000ffff80008000, 0xff80817f80ff0100, q18);
19667  ASSERT_EQUAL_128(0x0000000080008000, 0xff81827f81ff0101, q19);
19668  TEARDOWN();
19669}
19670
19671TEST(neon_d_only_scalar) {
19672  SETUP();
19673
19674  START();
19675
19676  __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
19677  __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
19678  __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x1000000010001010);
19679  __ Movi(v3.V2D(), 0xffffffffffffffff, 2);
19680  __ Movi(v4.V2D(), 0xffffffffffffffff, -2);
19681
19682  __ Add(d16, d0, d0);
19683  __ Add(d17, d1, d1);
19684  __ Add(d18, d2, d2);
19685  __ Sub(d19, d0, d0);
19686  __ Sub(d20, d0, d1);
19687  __ Sub(d21, d1, d0);
19688  __ Ushl(d22, d0, d3);
19689  __ Ushl(d23, d0, d4);
19690  __ Sshl(d24, d0, d3);
19691  __ Sshl(d25, d0, d4);
19692  __ Ushr(d26, d0, 1);
19693  __ Sshr(d27, d0, 3);
19694  __ Shl(d28, d0, 0);
19695  __ Shl(d29, d0, 16);
19696
19697  END();
19698
19699  RUN();
19700
19701  ASSERT_EQUAL_128(0, 0xe0000001e001e1e0, q16);
19702  ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q17);
19703  ASSERT_EQUAL_128(0, 0x2000000020002020, q18);
19704  ASSERT_EQUAL_128(0, 0, q19);
19705  ASSERT_EQUAL_128(0, 0x7000000170017171, q20);
19706  ASSERT_EQUAL_128(0, 0x8ffffffe8ffe8e8f, q21);
19707  ASSERT_EQUAL_128(0, 0xc0000003c003c3c0, q22);
19708  ASSERT_EQUAL_128(0, 0x3c0000003c003c3c, q23);
19709  ASSERT_EQUAL_128(0, 0xc0000003c003c3c0, q24);
19710  ASSERT_EQUAL_128(0, 0xfc0000003c003c3c, q25);
19711  ASSERT_EQUAL_128(0, 0x7800000078007878, q26);
19712  ASSERT_EQUAL_128(0, 0xfe0000001e001e1e, q27);
19713  ASSERT_EQUAL_128(0, 0xf0000000f000f0f0, q28);
19714  ASSERT_EQUAL_128(0, 0x0000f000f0f00000, q29);
19715
19716  TEARDOWN();
19717}
19718
19719
19720TEST(neon_sqshl_imm_scalar) {
19721  SETUP();
19722
19723  START();
19724
19725  __ Movi(v0.V2D(), 0x0, 0x7f);
19726  __ Movi(v1.V2D(), 0x0, 0x80);
19727  __ Movi(v2.V2D(), 0x0, 0x01);
19728  __ Sqshl(b16, b0, 1);
19729  __ Sqshl(b17, b1, 1);
19730  __ Sqshl(b18, b2, 1);
19731
19732  __ Movi(v0.V2D(), 0x0, 0x7fff);
19733  __ Movi(v1.V2D(), 0x0, 0x8000);
19734  __ Movi(v2.V2D(), 0x0, 0x0001);
19735  __ Sqshl(h19, h0, 1);
19736  __ Sqshl(h20, h1, 1);
19737  __ Sqshl(h21, h2, 1);
19738
19739  __ Movi(v0.V2D(), 0x0, 0x7fffffff);
19740  __ Movi(v1.V2D(), 0x0, 0x80000000);
19741  __ Movi(v2.V2D(), 0x0, 0x00000001);
19742  __ Sqshl(s22, s0, 1);
19743  __ Sqshl(s23, s1, 1);
19744  __ Sqshl(s24, s2, 1);
19745
19746  __ Movi(v0.V2D(), 0x0, 0x7fffffffffffffff);
19747  __ Movi(v1.V2D(), 0x0, 0x8000000000000000);
19748  __ Movi(v2.V2D(), 0x0, 0x0000000000000001);
19749  __ Sqshl(d25, d0, 1);
19750  __ Sqshl(d26, d1, 1);
19751  __ Sqshl(d27, d2, 1);
19752
19753  END();
19754
19755  RUN();
19756
19757  ASSERT_EQUAL_128(0, 0x7f, q16);
19758  ASSERT_EQUAL_128(0, 0x80, q17);
19759  ASSERT_EQUAL_128(0, 0x02, q18);
19760
19761  ASSERT_EQUAL_128(0, 0x7fff, q19);
19762  ASSERT_EQUAL_128(0, 0x8000, q20);
19763  ASSERT_EQUAL_128(0, 0x0002, q21);
19764
19765  ASSERT_EQUAL_128(0, 0x7fffffff, q22);
19766  ASSERT_EQUAL_128(0, 0x80000000, q23);
19767  ASSERT_EQUAL_128(0, 0x00000002, q24);
19768
19769  ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q25);
19770  ASSERT_EQUAL_128(0, 0x8000000000000000, q26);
19771  ASSERT_EQUAL_128(0, 0x0000000000000002, q27);
19772
19773  TEARDOWN();
19774}
19775
19776
19777TEST(neon_uqshl_imm_scalar) {
19778  SETUP();
19779
19780  START();
19781
19782  __ Movi(v0.V2D(), 0x0, 0x7f);
19783  __ Movi(v1.V2D(), 0x0, 0x80);
19784  __ Movi(v2.V2D(), 0x0, 0x01);
19785  __ Uqshl(b16, b0, 1);
19786  __ Uqshl(b17, b1, 1);
19787  __ Uqshl(b18, b2, 1);
19788
19789  __ Movi(v0.V2D(), 0x0, 0x7fff);
19790  __ Movi(v1.V2D(), 0x0, 0x8000);
19791  __ Movi(v2.V2D(), 0x0, 0x0001);
19792  __ Uqshl(h19, h0, 1);
19793  __ Uqshl(h20, h1, 1);
19794  __ Uqshl(h21, h2, 1);
19795
19796  __ Movi(v0.V2D(), 0x0, 0x7fffffff);
19797  __ Movi(v1.V2D(), 0x0, 0x80000000);
19798  __ Movi(v2.V2D(), 0x0, 0x00000001);
19799  __ Uqshl(s22, s0, 1);
19800  __ Uqshl(s23, s1, 1);
19801  __ Uqshl(s24, s2, 1);
19802
19803  __ Movi(v0.V2D(), 0x0, 0x7fffffffffffffff);
19804  __ Movi(v1.V2D(), 0x0, 0x8000000000000000);
19805  __ Movi(v2.V2D(), 0x0, 0x0000000000000001);
19806  __ Uqshl(d25, d0, 1);
19807  __ Uqshl(d26, d1, 1);
19808  __ Uqshl(d27, d2, 1);
19809
19810  END();
19811
19812  RUN();
19813
19814  ASSERT_EQUAL_128(0, 0xfe, q16);
19815  ASSERT_EQUAL_128(0, 0xff, q17);
19816  ASSERT_EQUAL_128(0, 0x02, q18);
19817
19818  ASSERT_EQUAL_128(0, 0xfffe, q19);
19819  ASSERT_EQUAL_128(0, 0xffff, q20);
19820  ASSERT_EQUAL_128(0, 0x0002, q21);
19821
19822  ASSERT_EQUAL_128(0, 0xfffffffe, q22);
19823  ASSERT_EQUAL_128(0, 0xffffffff, q23);
19824  ASSERT_EQUAL_128(0, 0x00000002, q24);
19825
19826  ASSERT_EQUAL_128(0, 0xfffffffffffffffe, q25);
19827  ASSERT_EQUAL_128(0, 0xffffffffffffffff, q26);
19828  ASSERT_EQUAL_128(0, 0x0000000000000002, q27);
19829
19830  TEARDOWN();
19831}
19832
19833
19834TEST(neon_sqshlu_scalar) {
19835  SETUP();
19836
19837  START();
19838
19839  __ Movi(v0.V2D(), 0x0, 0x7f);
19840  __ Movi(v1.V2D(), 0x0, 0x80);
19841  __ Movi(v2.V2D(), 0x0, 0x01);
19842  __ Sqshlu(b16, b0, 2);
19843  __ Sqshlu(b17, b1, 2);
19844  __ Sqshlu(b18, b2, 2);
19845
19846  __ Movi(v0.V2D(), 0x0, 0x7fff);
19847  __ Movi(v1.V2D(), 0x0, 0x8000);
19848  __ Movi(v2.V2D(), 0x0, 0x0001);
19849  __ Sqshlu(h19, h0, 2);
19850  __ Sqshlu(h20, h1, 2);
19851  __ Sqshlu(h21, h2, 2);
19852
19853  __ Movi(v0.V2D(), 0x0, 0x7fffffff);
19854  __ Movi(v1.V2D(), 0x0, 0x80000000);
19855  __ Movi(v2.V2D(), 0x0, 0x00000001);
19856  __ Sqshlu(s22, s0, 2);
19857  __ Sqshlu(s23, s1, 2);
19858  __ Sqshlu(s24, s2, 2);
19859
19860  __ Movi(v0.V2D(), 0x0, 0x7fffffffffffffff);
19861  __ Movi(v1.V2D(), 0x0, 0x8000000000000000);
19862  __ Movi(v2.V2D(), 0x0, 0x0000000000000001);
19863  __ Sqshlu(d25, d0, 2);
19864  __ Sqshlu(d26, d1, 2);
19865  __ Sqshlu(d27, d2, 2);
19866
19867  END();
19868
19869  RUN();
19870
19871  ASSERT_EQUAL_128(0, 0xff, q16);
19872  ASSERT_EQUAL_128(0, 0x00, q17);
19873  ASSERT_EQUAL_128(0, 0x04, q18);
19874
19875  ASSERT_EQUAL_128(0, 0xffff, q19);
19876  ASSERT_EQUAL_128(0, 0x0000, q20);
19877  ASSERT_EQUAL_128(0, 0x0004, q21);
19878
19879  ASSERT_EQUAL_128(0, 0xffffffff, q22);
19880  ASSERT_EQUAL_128(0, 0x00000000, q23);
19881  ASSERT_EQUAL_128(0, 0x00000004, q24);
19882
19883  ASSERT_EQUAL_128(0, 0xffffffffffffffff, q25);
19884  ASSERT_EQUAL_128(0, 0x0000000000000000, q26);
19885  ASSERT_EQUAL_128(0, 0x0000000000000004, q27);
19886
19887  TEARDOWN();
19888}
19889
19890
19891TEST(neon_sshll) {
19892  SETUP();
19893
19894  START();
19895
19896  __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
19897  __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
19898  __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
19899
19900  __ Sshll(v16.V8H(), v0.V8B(),  4);
19901  __ Sshll2(v17.V8H(), v0.V16B(), 4);
19902
19903  __ Sshll(v18.V4S(),  v1.V4H(), 8);
19904  __ Sshll2(v19.V4S(),  v1.V8H(), 8);
19905
19906  __ Sshll(v20.V2D(),  v2.V2S(), 16);
19907  __ Sshll2(v21.V2D(),  v2.V4S(), 16);
19908
19909  END();
19910
19911  RUN();
19912
19913  ASSERT_EQUAL_128(0xf800f810fff00000, 0x001007f0f800f810, q16);
19914  ASSERT_EQUAL_128(0x07f000100000fff0, 0xf810f80007f00010, q17);
19915  ASSERT_EQUAL_128(0xffffff0000000000, 0x00000100007fff00, q18);
19916  ASSERT_EQUAL_128(0xff800000ff800100, 0xffffff0000000000, q19);
19917  ASSERT_EQUAL_128(0x0000000000000000, 0x00007fffffff0000, q20);
19918  ASSERT_EQUAL_128(0xffff800000000000, 0xffffffffffff0000, q21);
19919  TEARDOWN();
19920}
19921
19922TEST(neon_shll) {
19923  SETUP();
19924
19925  START();
19926
19927  __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
19928  __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
19929  __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
19930
19931  __ Shll(v16.V8H(), v0.V8B(),  8);
19932  __ Shll2(v17.V8H(), v0.V16B(), 8);
19933
19934  __ Shll(v18.V4S(),  v1.V4H(), 16);
19935  __ Shll2(v19.V4S(),  v1.V8H(), 16);
19936
19937  __ Shll(v20.V2D(),  v2.V2S(), 32);
19938  __ Shll2(v21.V2D(),  v2.V4S(), 32);
19939
19940  END();
19941
19942  RUN();
19943
19944  ASSERT_EQUAL_128(0x80008100ff000000, 0x01007f0080008100, q16);
19945  ASSERT_EQUAL_128(0x7f0001000000ff00, 0x810080007f000100, q17);
19946  ASSERT_EQUAL_128(0xffff000000000000, 0x000100007fff0000, q18);
19947  ASSERT_EQUAL_128(0x8000000080010000, 0xffff000000000000, q19);
19948  ASSERT_EQUAL_128(0x0000000000000000, 0x7fffffff00000000, q20);
19949  ASSERT_EQUAL_128(0x8000000000000000, 0xffffffff00000000, q21);
19950  TEARDOWN();
19951}
19952
19953TEST(neon_ushll) {
19954  SETUP();
19955
19956  START();
19957
19958  __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
19959  __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
19960  __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
19961
19962  __ Ushll(v16.V8H(), v0.V8B(),  4);
19963  __ Ushll2(v17.V8H(), v0.V16B(), 4);
19964
19965  __ Ushll(v18.V4S(),  v1.V4H(), 8);
19966  __ Ushll2(v19.V4S(),  v1.V8H(), 8);
19967
19968  __ Ushll(v20.V2D(),  v2.V2S(), 16);
19969  __ Ushll2(v21.V2D(),  v2.V4S(), 16);
19970
19971  END();
19972
19973  RUN();
19974
19975  ASSERT_EQUAL_128(0x080008100ff00000, 0x001007f008000810, q16);
19976  ASSERT_EQUAL_128(0x07f0001000000ff0, 0x0810080007f00010, q17);
19977  ASSERT_EQUAL_128(0x00ffff0000000000, 0x00000100007fff00, q18);
19978  ASSERT_EQUAL_128(0x0080000000800100, 0x00ffff0000000000, q19);
19979  ASSERT_EQUAL_128(0x0000000000000000, 0x00007fffffff0000, q20);
19980  ASSERT_EQUAL_128(0x0000800000000000, 0x0000ffffffff0000, q21);
19981  TEARDOWN();
19982}
19983
19984
19985TEST(neon_sxtl) {
19986  SETUP();
19987
19988  START();
19989
19990  __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
19991  __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
19992  __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
19993
19994  __ Sxtl(v16.V8H(), v0.V8B());
19995  __ Sxtl2(v17.V8H(), v0.V16B());
19996
19997  __ Sxtl(v18.V4S(),  v1.V4H());
19998  __ Sxtl2(v19.V4S(),  v1.V8H());
19999
20000  __ Sxtl(v20.V2D(),  v2.V2S());
20001  __ Sxtl2(v21.V2D(),  v2.V4S());
20002
20003  END();
20004
20005  RUN();
20006
20007  ASSERT_EQUAL_128(0xff80ff81ffff0000, 0x0001007fff80ff81, q16);
20008  ASSERT_EQUAL_128(0x007f00010000ffff, 0xff81ff80007f0001, q17);
20009  ASSERT_EQUAL_128(0xffffffff00000000, 0x0000000100007fff, q18);
20010  ASSERT_EQUAL_128(0xffff8000ffff8001, 0xffffffff00000000, q19);
20011  ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q20);
20012  ASSERT_EQUAL_128(0xffffffff80000000, 0xffffffffffffffff, q21);
20013  TEARDOWN();
20014}
20015
20016
20017TEST(neon_uxtl) {
20018  SETUP();
20019
20020  START();
20021
20022  __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
20023  __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
20024  __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
20025
20026  __ Uxtl(v16.V8H(), v0.V8B());
20027  __ Uxtl2(v17.V8H(), v0.V16B());
20028
20029  __ Uxtl(v18.V4S(),  v1.V4H());
20030  __ Uxtl2(v19.V4S(),  v1.V8H());
20031
20032  __ Uxtl(v20.V2D(),  v2.V2S());
20033  __ Uxtl2(v21.V2D(),  v2.V4S());
20034
20035  END();
20036
20037  RUN();
20038
20039  ASSERT_EQUAL_128(0x0080008100ff0000, 0x0001007f00800081, q16);
20040  ASSERT_EQUAL_128(0x007f0001000000ff, 0x00810080007f0001, q17);
20041  ASSERT_EQUAL_128(0x0000ffff00000000, 0x0000000100007fff, q18);
20042  ASSERT_EQUAL_128(0x0000800000008001, 0x0000ffff00000000, q19);
20043  ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q20);
20044  ASSERT_EQUAL_128(0x0000000080000000, 0x00000000ffffffff, q21);
20045  TEARDOWN();
20046}
20047
20048
20049TEST(neon_ssra) {
20050  SETUP();
20051
20052  START();
20053
20054  __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
20055  __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
20056  __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
20057  __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
20058  __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
20059
20060  __ Mov(v16.V2D(),   v0.V2D());
20061  __ Mov(v17.V2D(),   v0.V2D());
20062  __ Mov(v18.V2D(),   v1.V2D());
20063  __ Mov(v19.V2D(),   v1.V2D());
20064  __ Mov(v20.V2D(),   v2.V2D());
20065  __ Mov(v21.V2D(),   v2.V2D());
20066  __ Mov(v22.V2D(),   v3.V2D());
20067  __ Mov(v23.V2D(),   v4.V2D());
20068  __ Mov(v24.V2D(),   v3.V2D());
20069  __ Mov(v25.V2D(),   v4.V2D());
20070
20071  __ Ssra(v16.V8B(),  v0.V8B(),  4);
20072  __ Ssra(v17.V16B(), v0.V16B(), 4);
20073
20074  __ Ssra(v18.V4H(),  v1.V4H(), 8);
20075  __ Ssra(v19.V8H(),  v1.V8H(), 8);
20076
20077  __ Ssra(v20.V2S(),  v2.V2S(), 16);
20078  __ Ssra(v21.V4S(),  v2.V4S(), 16);
20079
20080  __ Ssra(v22.V2D(),  v3.V2D(), 32);
20081  __ Ssra(v23.V2D(),  v4.V2D(), 32);
20082
20083  __ Ssra(d24,  d3, 48);
20084
20085  END();
20086
20087  RUN();
20088
20089  ASSERT_EQUAL_128(0x0000000000000000, 0x7879fe0001867879, q16);
20090  ASSERT_EQUAL_128(0x860100fe79788601, 0x7879fe0001867879, q17);
20091  ASSERT_EQUAL_128(0x0000000000000000, 0xfffe00000001807e, q18);
20092  ASSERT_EQUAL_128(0x7f807f81fffe0000, 0xfffe00000001807e, q19);
20093  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080007ffe, q20);
20094  ASSERT_EQUAL_128(0x7fff8000fffffffe, 0x0000000080007ffe, q21);
20095  ASSERT_EQUAL_128(0x7fffffff80000001, 0x800000007ffffffe, q22);
20096  ASSERT_EQUAL_128(0x7fffffff80000000, 0x0000000000000000, q23);
20097  ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000007ffe, q24);
20098  TEARDOWN();
20099}
20100
20101TEST(neon_srsra) {
20102  SETUP();
20103
20104  START();
20105
20106  __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
20107  __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
20108  __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
20109  __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
20110  __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
20111
20112  __ Mov(v16.V2D(),   v0.V2D());
20113  __ Mov(v17.V2D(),   v0.V2D());
20114  __ Mov(v18.V2D(),   v1.V2D());
20115  __ Mov(v19.V2D(),   v1.V2D());
20116  __ Mov(v20.V2D(),   v2.V2D());
20117  __ Mov(v21.V2D(),   v2.V2D());
20118  __ Mov(v22.V2D(),   v3.V2D());
20119  __ Mov(v23.V2D(),   v4.V2D());
20120  __ Mov(v24.V2D(),   v3.V2D());
20121  __ Mov(v25.V2D(),   v4.V2D());
20122
20123  __ Srsra(v16.V8B(),  v0.V8B(),  4);
20124  __ Srsra(v17.V16B(), v0.V16B(), 4);
20125
20126  __ Srsra(v18.V4H(),  v1.V4H(), 8);
20127  __ Srsra(v19.V8H(),  v1.V8H(), 8);
20128
20129  __ Srsra(v20.V2S(),  v2.V2S(), 16);
20130  __ Srsra(v21.V4S(),  v2.V4S(), 16);
20131
20132  __ Srsra(v22.V2D(),  v3.V2D(), 32);
20133  __ Srsra(v23.V2D(),  v4.V2D(), 32);
20134
20135  __ Srsra(d24,  d3, 48);
20136
20137  END();
20138
20139  RUN();
20140
20141  ASSERT_EQUAL_128(0x0000000000000000, 0x7879ff0001877879, q16);
20142  ASSERT_EQUAL_128(0x870100ff79788701, 0x7879ff0001877879, q17);
20143  ASSERT_EQUAL_128(0x0000000000000000, 0xffff00000001807f, q18);
20144  ASSERT_EQUAL_128(0x7f807f81ffff0000, 0xffff00000001807f, q19);
20145  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080007fff, q20);
20146  ASSERT_EQUAL_128(0x7fff8000ffffffff, 0x0000000080007fff, q21);
20147  ASSERT_EQUAL_128(0x7fffffff80000001, 0x800000007fffffff, q22);
20148  ASSERT_EQUAL_128(0x7fffffff80000000, 0x0000000000000000, q23);
20149  ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000007fff, q24);
20150
20151  TEARDOWN();
20152}
20153
20154TEST(neon_usra) {
20155  SETUP();
20156
20157  START();
20158
20159  __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
20160  __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
20161  __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
20162  __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
20163  __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
20164
20165  __ Mov(v16.V2D(),   v0.V2D());
20166  __ Mov(v17.V2D(),   v0.V2D());
20167  __ Mov(v18.V2D(),   v1.V2D());
20168  __ Mov(v19.V2D(),   v1.V2D());
20169  __ Mov(v20.V2D(),   v2.V2D());
20170  __ Mov(v21.V2D(),   v2.V2D());
20171  __ Mov(v22.V2D(),   v3.V2D());
20172  __ Mov(v23.V2D(),   v4.V2D());
20173  __ Mov(v24.V2D(),   v3.V2D());
20174  __ Mov(v25.V2D(),   v4.V2D());
20175
20176  __ Usra(v16.V8B(),  v0.V8B(),  4);
20177  __ Usra(v17.V16B(), v0.V16B(), 4);
20178
20179  __ Usra(v18.V4H(),  v1.V4H(), 8);
20180  __ Usra(v19.V8H(),  v1.V8H(), 8);
20181
20182  __ Usra(v20.V2S(),  v2.V2S(), 16);
20183  __ Usra(v21.V4S(),  v2.V4S(), 16);
20184
20185  __ Usra(v22.V2D(),  v3.V2D(), 32);
20186  __ Usra(v23.V2D(),  v4.V2D(), 32);
20187
20188  __ Usra(d24,  d3, 48);
20189
20190  END();
20191
20192  RUN();
20193
20194  ASSERT_EQUAL_128(0x0000000000000000, 0x88890e0001868889, q16);
20195  ASSERT_EQUAL_128(0x8601000e89888601, 0x88890e0001868889, q17);
20196  ASSERT_EQUAL_128(0x0000000000000000, 0x00fe00000001807e, q18);
20197  ASSERT_EQUAL_128(0x8080808100fe0000, 0x00fe00000001807e, q19);
20198  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080007ffe, q20);
20199  ASSERT_EQUAL_128(0x800080000000fffe, 0x0000000080007ffe, q21);
20200  ASSERT_EQUAL_128(0x8000000080000001, 0x800000007ffffffe, q22);
20201  ASSERT_EQUAL_128(0x8000000080000000, 0x0000000000000000, q23);
20202  ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000007ffe, q24);
20203
20204  TEARDOWN();
20205}
20206
20207TEST(neon_ursra) {
20208  SETUP();
20209
20210  START();
20211
20212  __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
20213  __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
20214  __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
20215  __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
20216  __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
20217
20218  __ Mov(v16.V2D(),   v0.V2D());
20219  __ Mov(v17.V2D(),   v0.V2D());
20220  __ Mov(v18.V2D(),   v1.V2D());
20221  __ Mov(v19.V2D(),   v1.V2D());
20222  __ Mov(v20.V2D(),   v2.V2D());
20223  __ Mov(v21.V2D(),   v2.V2D());
20224  __ Mov(v22.V2D(),   v3.V2D());
20225  __ Mov(v23.V2D(),   v4.V2D());
20226  __ Mov(v24.V2D(),   v3.V2D());
20227  __ Mov(v25.V2D(),   v4.V2D());
20228
20229  __ Ursra(v16.V8B(),  v0.V8B(),  4);
20230  __ Ursra(v17.V16B(), v0.V16B(), 4);
20231
20232  __ Ursra(v18.V4H(),  v1.V4H(), 8);
20233  __ Ursra(v19.V8H(),  v1.V8H(), 8);
20234
20235  __ Ursra(v20.V2S(),  v2.V2S(), 16);
20236  __ Ursra(v21.V4S(),  v2.V4S(), 16);
20237
20238  __ Ursra(v22.V2D(),  v3.V2D(), 32);
20239  __ Ursra(v23.V2D(),  v4.V2D(), 32);
20240
20241  __ Ursra(d24,  d3, 48);
20242
20243  END();
20244
20245  RUN();
20246
20247  ASSERT_EQUAL_128(0x0000000000000000, 0x88890f0001878889, q16);
20248  ASSERT_EQUAL_128(0x8701000f89888701, 0x88890f0001878889, q17);
20249  ASSERT_EQUAL_128(0x0000000000000000, 0x00ff00000001807f, q18);
20250  ASSERT_EQUAL_128(0x8080808100ff0000, 0x00ff00000001807f, q19);
20251  ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080007fff, q20);
20252  ASSERT_EQUAL_128(0x800080000000ffff, 0x0000000080007fff, q21);
20253  ASSERT_EQUAL_128(0x8000000080000001, 0x800000007fffffff, q22);
20254  ASSERT_EQUAL_128(0x8000000080000000, 0x0000000000000000, q23);
20255  ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000007fff, q24);
20256  TEARDOWN();
20257}
20258
20259
20260TEST(neon_uqshl_scalar) {
20261  SETUP();
20262
20263  START();
20264
20265  __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
20266  __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
20267  __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
20268  __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
20269
20270  __ Uqshl(b16, b0, b2);
20271  __ Uqshl(b17, b0, b3);
20272  __ Uqshl(b18, b1, b2);
20273  __ Uqshl(b19, b1, b3);
20274  __ Uqshl(h20, h0, h2);
20275  __ Uqshl(h21, h0, h3);
20276  __ Uqshl(h22, h1, h2);
20277  __ Uqshl(h23, h1, h3);
20278  __ Uqshl(s24, s0, s2);
20279  __ Uqshl(s25, s0, s3);
20280  __ Uqshl(s26, s1, s2);
20281  __ Uqshl(s27, s1, s3);
20282  __ Uqshl(d28, d0, d2);
20283  __ Uqshl(d29, d0, d3);
20284  __ Uqshl(d30, d1, d2);
20285  __ Uqshl(d31, d1, d3);
20286
20287  END();
20288
20289  RUN();
20290
20291  ASSERT_EQUAL_128(0, 0xff, q16);
20292  ASSERT_EQUAL_128(0, 0x78, q17);
20293  ASSERT_EQUAL_128(0, 0xfe, q18);
20294  ASSERT_EQUAL_128(0, 0x3f, q19);
20295  ASSERT_EQUAL_128(0, 0xffff, q20);
20296  ASSERT_EQUAL_128(0, 0x7878, q21);
20297  ASSERT_EQUAL_128(0, 0xfefe, q22);
20298  ASSERT_EQUAL_128(0, 0x3fbf, q23);
20299  ASSERT_EQUAL_128(0, 0xffffffff, q24);
20300  ASSERT_EQUAL_128(0, 0x78007878, q25);
20301  ASSERT_EQUAL_128(0, 0xfffefefe, q26);
20302  ASSERT_EQUAL_128(0, 0x3fffbfbf, q27);
20303  ASSERT_EQUAL_128(0, 0xffffffffffffffff, q28);
20304  ASSERT_EQUAL_128(0, 0x7800000078007878, q29);
20305  ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q30);
20306  ASSERT_EQUAL_128(0, 0x3fffffffbfffbfbf, q31);
20307
20308  TEARDOWN();
20309}
20310
20311
20312TEST(neon_sqshl_scalar) {
20313  SETUP();
20314
20315  START();
20316
20317  __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xbfffffffbfffbfbf);
20318  __ Movi(v1.V2D(), 0x5555555555555555, 0x4000000040004040);
20319  __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
20320  __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
20321
20322  __ Sqshl(b16, b0, b2);
20323  __ Sqshl(b17, b0, b3);
20324  __ Sqshl(b18, b1, b2);
20325  __ Sqshl(b19, b1, b3);
20326  __ Sqshl(h20, h0, h2);
20327  __ Sqshl(h21, h0, h3);
20328  __ Sqshl(h22, h1, h2);
20329  __ Sqshl(h23, h1, h3);
20330  __ Sqshl(s24, s0, s2);
20331  __ Sqshl(s25, s0, s3);
20332  __ Sqshl(s26, s1, s2);
20333  __ Sqshl(s27, s1, s3);
20334  __ Sqshl(d28, d0, d2);
20335  __ Sqshl(d29, d0, d3);
20336  __ Sqshl(d30, d1, d2);
20337  __ Sqshl(d31, d1, d3);
20338
20339  END();
20340
20341  RUN();
20342
20343  ASSERT_EQUAL_128(0, 0x80, q16);
20344  ASSERT_EQUAL_128(0, 0xdf, q17);
20345  ASSERT_EQUAL_128(0, 0x7f, q18);
20346  ASSERT_EQUAL_128(0, 0x20, q19);
20347  ASSERT_EQUAL_128(0, 0x8000, q20);
20348  ASSERT_EQUAL_128(0, 0xdfdf, q21);
20349  ASSERT_EQUAL_128(0, 0x7fff, q22);
20350  ASSERT_EQUAL_128(0, 0x2020, q23);
20351  ASSERT_EQUAL_128(0, 0x80000000, q24);
20352  ASSERT_EQUAL_128(0, 0xdfffdfdf, q25);
20353  ASSERT_EQUAL_128(0, 0x7fffffff, q26);
20354  ASSERT_EQUAL_128(0, 0x20002020, q27);
20355  ASSERT_EQUAL_128(0, 0x8000000000000000, q28);
20356  ASSERT_EQUAL_128(0, 0xdfffffffdfffdfdf, q29);
20357  ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q30);
20358  ASSERT_EQUAL_128(0, 0x2000000020002020, q31);
20359
20360  TEARDOWN();
20361}
20362
20363
20364TEST(neon_urshl_scalar) {
20365  SETUP();
20366
20367  START();
20368
20369  __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
20370  __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
20371  __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
20372  __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
20373
20374  __ Urshl(d28, d0, d2);
20375  __ Urshl(d29, d0, d3);
20376  __ Urshl(d30, d1, d2);
20377  __ Urshl(d31, d1, d3);
20378
20379  END();
20380
20381  RUN();
20382
20383  ASSERT_EQUAL_128(0, 0xe0000001e001e1e0, q28);
20384  ASSERT_EQUAL_128(0, 0x7800000078007878, q29);
20385  ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q30);
20386  ASSERT_EQUAL_128(0, 0x3fffffffbfffbfc0, q31);
20387
20388  TEARDOWN();
20389}
20390
20391
20392TEST(neon_srshl_scalar) {
20393  SETUP();
20394
20395  START();
20396
20397  __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xbfffffffbfffbfbf);
20398  __ Movi(v1.V2D(), 0x5555555555555555, 0x4000000040004040);
20399  __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
20400  __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
20401
20402  __ Srshl(d28, d0, d2);
20403  __ Srshl(d29, d0, d3);
20404  __ Srshl(d30, d1, d2);
20405  __ Srshl(d31, d1, d3);
20406
20407  END();
20408
20409  RUN();
20410
20411  ASSERT_EQUAL_128(0, 0x7fffffff7fff7f7e, q28);
20412  ASSERT_EQUAL_128(0, 0xdfffffffdfffdfe0, q29);
20413  ASSERT_EQUAL_128(0, 0x8000000080008080, q30);
20414  ASSERT_EQUAL_128(0, 0x2000000020002020, q31);
20415
20416  TEARDOWN();
20417}
20418
20419
20420TEST(neon_uqrshl_scalar) {
20421  SETUP();
20422
20423  START();
20424
20425  __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
20426  __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
20427  __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
20428  __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
20429
20430  __ Uqrshl(b16, b0, b2);
20431  __ Uqrshl(b17, b0, b3);
20432  __ Uqrshl(b18, b1, b2);
20433  __ Uqrshl(b19, b1, b3);
20434  __ Uqrshl(h20, h0, h2);
20435  __ Uqrshl(h21, h0, h3);
20436  __ Uqrshl(h22, h1, h2);
20437  __ Uqrshl(h23, h1, h3);
20438  __ Uqrshl(s24, s0, s2);
20439  __ Uqrshl(s25, s0, s3);
20440  __ Uqrshl(s26, s1, s2);
20441  __ Uqrshl(s27, s1, s3);
20442  __ Uqrshl(d28, d0, d2);
20443  __ Uqrshl(d29, d0, d3);
20444  __ Uqrshl(d30, d1, d2);
20445  __ Uqrshl(d31, d1, d3);
20446
20447  END();
20448
20449  RUN();
20450
20451  ASSERT_EQUAL_128(0, 0xff, q16);
20452  ASSERT_EQUAL_128(0, 0x78, q17);
20453  ASSERT_EQUAL_128(0, 0xfe, q18);
20454  ASSERT_EQUAL_128(0, 0x40, q19);
20455  ASSERT_EQUAL_128(0, 0xffff, q20);
20456  ASSERT_EQUAL_128(0, 0x7878, q21);
20457  ASSERT_EQUAL_128(0, 0xfefe, q22);
20458  ASSERT_EQUAL_128(0, 0x3fc0, q23);
20459  ASSERT_EQUAL_128(0, 0xffffffff, q24);
20460  ASSERT_EQUAL_128(0, 0x78007878, q25);
20461  ASSERT_EQUAL_128(0, 0xfffefefe, q26);
20462  ASSERT_EQUAL_128(0, 0x3fffbfc0, q27);
20463  ASSERT_EQUAL_128(0, 0xffffffffffffffff, q28);
20464  ASSERT_EQUAL_128(0, 0x7800000078007878, q29);
20465  ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q30);
20466  ASSERT_EQUAL_128(0, 0x3fffffffbfffbfc0, q31);
20467
20468  TEARDOWN();
20469}
20470
20471
20472TEST(neon_sqrshl_scalar) {
20473  SETUP();
20474
20475  START();
20476
20477  __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xbfffffffbfffbfbf);
20478  __ Movi(v1.V2D(), 0x5555555555555555, 0x4000000040004040);
20479  __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
20480  __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
20481
20482  __ Sqrshl(b16, b0, b2);
20483  __ Sqrshl(b17, b0, b3);
20484  __ Sqrshl(b18, b1, b2);
20485  __ Sqrshl(b19, b1, b3);
20486  __ Sqrshl(h20, h0, h2);
20487  __ Sqrshl(h21, h0, h3);
20488  __ Sqrshl(h22, h1, h2);
20489  __ Sqrshl(h23, h1, h3);
20490  __ Sqrshl(s24, s0, s2);
20491  __ Sqrshl(s25, s0, s3);
20492  __ Sqrshl(s26, s1, s2);
20493  __ Sqrshl(s27, s1, s3);
20494  __ Sqrshl(d28, d0, d2);
20495  __ Sqrshl(d29, d0, d3);
20496  __ Sqrshl(d30, d1, d2);
20497  __ Sqrshl(d31, d1, d3);
20498
20499  END();
20500
20501  RUN();
20502
20503  ASSERT_EQUAL_128(0, 0x80, q16);
20504  ASSERT_EQUAL_128(0, 0xe0, q17);
20505  ASSERT_EQUAL_128(0, 0x7f, q18);
20506  ASSERT_EQUAL_128(0, 0x20, q19);
20507  ASSERT_EQUAL_128(0, 0x8000, q20);
20508  ASSERT_EQUAL_128(0, 0xdfe0, q21);
20509  ASSERT_EQUAL_128(0, 0x7fff, q22);
20510  ASSERT_EQUAL_128(0, 0x2020, q23);
20511  ASSERT_EQUAL_128(0, 0x80000000, q24);
20512  ASSERT_EQUAL_128(0, 0xdfffdfe0, q25);
20513  ASSERT_EQUAL_128(0, 0x7fffffff, q26);
20514  ASSERT_EQUAL_128(0, 0x20002020, q27);
20515  ASSERT_EQUAL_128(0, 0x8000000000000000, q28);
20516  ASSERT_EQUAL_128(0, 0xdfffffffdfffdfe0, q29);
20517  ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q30);
20518  ASSERT_EQUAL_128(0, 0x2000000020002020, q31);
20519
20520  TEARDOWN();
20521}
20522
20523
20524TEST(neon_uqadd_scalar) {
20525  SETUP();
20526
20527  START();
20528
20529  __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
20530  __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
20531  __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x1000000010001010);
20532
20533  __ Uqadd(b16, b0, b0);
20534  __ Uqadd(b17, b1, b1);
20535  __ Uqadd(b18, b2, b2);
20536  __ Uqadd(h19, h0, h0);
20537  __ Uqadd(h20, h1, h1);
20538  __ Uqadd(h21, h2, h2);
20539  __ Uqadd(s22, s0, s0);
20540  __ Uqadd(s23, s1, s1);
20541  __ Uqadd(s24, s2, s2);
20542  __ Uqadd(d25, d0, d0);
20543  __ Uqadd(d26, d1, d1);
20544  __ Uqadd(d27, d2, d2);
20545
20546  END();
20547
20548  RUN();
20549
20550  ASSERT_EQUAL_128(0, 0xff, q16);
20551  ASSERT_EQUAL_128(0, 0xfe, q17);
20552  ASSERT_EQUAL_128(0, 0x20, q18);
20553  ASSERT_EQUAL_128(0, 0xffff, q19);
20554  ASSERT_EQUAL_128(0, 0xfefe, q20);
20555  ASSERT_EQUAL_128(0, 0x2020, q21);
20556  ASSERT_EQUAL_128(0, 0xffffffff, q22);
20557  ASSERT_EQUAL_128(0, 0xfffefefe, q23);
20558  ASSERT_EQUAL_128(0, 0x20002020, q24);
20559  ASSERT_EQUAL_128(0, 0xffffffffffffffff, q25);
20560  ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q26);
20561  ASSERT_EQUAL_128(0, 0x2000000020002020, q27);
20562
20563  TEARDOWN();
20564}
20565
20566
20567TEST(neon_sqadd_scalar) {
20568  SETUP();
20569
20570  START();
20571
20572  __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0x8000000180018181);
20573  __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
20574  __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x1000000010001010);
20575
20576  __ Sqadd(b16, b0, b0);
20577  __ Sqadd(b17, b1, b1);
20578  __ Sqadd(b18, b2, b2);
20579  __ Sqadd(h19, h0, h0);
20580  __ Sqadd(h20, h1, h1);
20581  __ Sqadd(h21, h2, h2);
20582  __ Sqadd(s22, s0, s0);
20583  __ Sqadd(s23, s1, s1);
20584  __ Sqadd(s24, s2, s2);
20585  __ Sqadd(d25, d0, d0);
20586  __ Sqadd(d26, d1, d1);
20587  __ Sqadd(d27, d2, d2);
20588
20589  END();
20590
20591  RUN();
20592
20593  ASSERT_EQUAL_128(0, 0x80, q16);
20594  ASSERT_EQUAL_128(0, 0x7f, q17);
20595  ASSERT_EQUAL_128(0, 0x20, q18);
20596  ASSERT_EQUAL_128(0, 0x8000, q19);
20597  ASSERT_EQUAL_128(0, 0x7fff, q20);
20598  ASSERT_EQUAL_128(0, 0x2020, q21);
20599  ASSERT_EQUAL_128(0, 0x80000000, q22);
20600  ASSERT_EQUAL_128(0, 0x7fffffff, q23);
20601  ASSERT_EQUAL_128(0, 0x20002020, q24);
20602  ASSERT_EQUAL_128(0, 0x8000000000000000, q25);
20603  ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q26);
20604  ASSERT_EQUAL_128(0, 0x2000000020002020, q27);
20605
20606  TEARDOWN();
20607}
20608
20609
20610TEST(neon_uqsub_scalar) {
20611  SETUP();
20612
20613  START();
20614
20615  __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
20616  __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
20617
20618  __ Uqsub(b16, b0, b0);
20619  __ Uqsub(b17, b0, b1);
20620  __ Uqsub(b18, b1, b0);
20621  __ Uqsub(h19, h0, h0);
20622  __ Uqsub(h20, h0, h1);
20623  __ Uqsub(h21, h1, h0);
20624  __ Uqsub(s22, s0, s0);
20625  __ Uqsub(s23, s0, s1);
20626  __ Uqsub(s24, s1, s0);
20627  __ Uqsub(d25, d0, d0);
20628  __ Uqsub(d26, d0, d1);
20629  __ Uqsub(d27, d1, d0);
20630
20631  END();
20632
20633  RUN();
20634
20635  ASSERT_EQUAL_128(0, 0, q16);
20636  ASSERT_EQUAL_128(0, 0x71, q17);
20637  ASSERT_EQUAL_128(0, 0, q18);
20638
20639  ASSERT_EQUAL_128(0, 0, q19);
20640  ASSERT_EQUAL_128(0, 0x7171, q20);
20641  ASSERT_EQUAL_128(0, 0, q21);
20642
20643  ASSERT_EQUAL_128(0, 0, q22);
20644  ASSERT_EQUAL_128(0, 0x70017171, q23);
20645  ASSERT_EQUAL_128(0, 0, q24);
20646
20647  ASSERT_EQUAL_128(0, 0, q25);
20648  ASSERT_EQUAL_128(0, 0x7000000170017171, q26);
20649  ASSERT_EQUAL_128(0, 0, q27);
20650
20651  TEARDOWN();
20652}
20653
20654
20655TEST(neon_sqsub_scalar) {
20656  SETUP();
20657
20658  START();
20659
20660  __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
20661  __ Movi(v1.V2D(), 0x5555555555555555, 0x7eeeeeee7eee7e7e);
20662
20663  __ Sqsub(b16, b0, b0);
20664  __ Sqsub(b17, b0, b1);
20665  __ Sqsub(b18, b1, b0);
20666  __ Sqsub(h19, h0, h0);
20667  __ Sqsub(h20, h0, h1);
20668  __ Sqsub(h21, h1, h0);
20669  __ Sqsub(s22, s0, s0);
20670  __ Sqsub(s23, s0, s1);
20671  __ Sqsub(s24, s1, s0);
20672  __ Sqsub(d25, d0, d0);
20673  __ Sqsub(d26, d0, d1);
20674  __ Sqsub(d27, d1, d0);
20675
20676  END();
20677
20678  RUN();
20679
20680  ASSERT_EQUAL_128(0, 0, q16);
20681  ASSERT_EQUAL_128(0, 0x80, q17);
20682  ASSERT_EQUAL_128(0, 0x7f, q18);
20683
20684  ASSERT_EQUAL_128(0, 0, q19);
20685  ASSERT_EQUAL_128(0, 0x8000, q20);
20686  ASSERT_EQUAL_128(0, 0x7fff, q21);
20687
20688  ASSERT_EQUAL_128(0, 0, q22);
20689  ASSERT_EQUAL_128(0, 0x80000000, q23);
20690  ASSERT_EQUAL_128(0, 0x7fffffff, q24);
20691
20692  ASSERT_EQUAL_128(0, 0, q25);
20693  ASSERT_EQUAL_128(0, 0x8000000000000000, q26);
20694  ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q27);
20695
20696  TEARDOWN();
20697}
20698
20699
20700TEST(neon_fmla_fmls) {
20701  SETUP();
20702
20703  START();
20704  __ Movi(v0.V2D(), 0x3f80000040000000, 0x4100000000000000);
20705  __ Movi(v1.V2D(), 0x400000003f800000, 0x000000003f800000);
20706  __ Movi(v2.V2D(), 0x3f800000ffffffff, 0x7f800000ff800000);
20707  __ Mov(v16.V16B(), v0.V16B());
20708  __ Mov(v17.V16B(), v0.V16B());
20709  __ Mov(v18.V16B(), v0.V16B());
20710  __ Mov(v19.V16B(), v0.V16B());
20711  __ Mov(v20.V16B(), v0.V16B());
20712  __ Mov(v21.V16B(), v0.V16B());
20713
20714  __ Fmla(v16.V2S(), v1.V2S(), v2.V2S());
20715  __ Fmla(v17.V4S(), v1.V4S(), v2.V4S());
20716  __ Fmla(v18.V2D(), v1.V2D(), v2.V2D());
20717  __ Fmls(v19.V2S(), v1.V2S(), v2.V2S());
20718  __ Fmls(v20.V4S(), v1.V4S(), v2.V4S());
20719  __ Fmls(v21.V2D(), v1.V2D(), v2.V2D());
20720  END();
20721
20722  RUN();
20723
20724  ASSERT_EQUAL_128(0x0000000000000000, 0x7fc00000ff800000, q16);
20725  ASSERT_EQUAL_128(0x40400000ffffffff, 0x7fc00000ff800000, q17);
20726  ASSERT_EQUAL_128(0x3f9800015f8003f7, 0x41000000000000fe, q18);
20727  ASSERT_EQUAL_128(0x0000000000000000, 0x7fc000007f800000, q19);
20728  ASSERT_EQUAL_128(0xbf800000ffffffff, 0x7fc000007f800000, q20);
20729  ASSERT_EQUAL_128(0xbf8000023f0007ee, 0x40fffffffffffe04, q21);
20730
20731  TEARDOWN();
20732}
20733
20734
20735TEST(neon_fmulx_scalar) {
20736  SETUP();
20737
20738  START();
20739  __ Fmov(s0, 2.0);
20740  __ Fmov(s1, 0.5);
20741  __ Fmov(s2, 0.0);
20742  __ Fmov(s3, -0.0);
20743  __ Fmov(s4, kFP32PositiveInfinity);
20744  __ Fmov(s5, kFP32NegativeInfinity);
20745  __ Fmulx(s16, s0, s1);
20746  __ Fmulx(s17, s2, s4);
20747  __ Fmulx(s18, s2, s5);
20748  __ Fmulx(s19, s3, s4);
20749  __ Fmulx(s20, s3, s5);
20750
20751  __ Fmov(d21, 2.0);
20752  __ Fmov(d22, 0.5);
20753  __ Fmov(d23, 0.0);
20754  __ Fmov(d24, -0.0);
20755  __ Fmov(d25, kFP64PositiveInfinity);
20756  __ Fmov(d26, kFP64NegativeInfinity);
20757  __ Fmulx(d27, d21, d22);
20758  __ Fmulx(d28, d23, d25);
20759  __ Fmulx(d29, d23, d26);
20760  __ Fmulx(d30, d24, d25);
20761  __ Fmulx(d31, d24, d26);
20762  END();
20763
20764  RUN();
20765
20766  ASSERT_EQUAL_FP32(1.0, s16);
20767  ASSERT_EQUAL_FP32(2.0, s17);
20768  ASSERT_EQUAL_FP32(-2.0, s18);
20769  ASSERT_EQUAL_FP32(-2.0, s19);
20770  ASSERT_EQUAL_FP32(2.0, s20);
20771  ASSERT_EQUAL_FP64(1.0, d27);
20772  ASSERT_EQUAL_FP64(2.0, d28);
20773  ASSERT_EQUAL_FP64(-2.0, d29);
20774  ASSERT_EQUAL_FP64(-2.0, d30);
20775  ASSERT_EQUAL_FP64(2.0, d31);
20776
20777  TEARDOWN();
20778}
20779
20780
20781TEST(crc32b) {
20782  SETUP();
20783  START();
20784
20785  __ Mov(w0, 0);
20786  __ Mov(w1, 0);
20787  __ Crc32b(w10, w0, w1);
20788
20789  __ Mov(w0, 0x1);
20790  __ Mov(w1, 0x138);
20791  __ Crc32b(w11, w0, w1);
20792
20793  __ Mov(w0, 0x1);
20794  __ Mov(w1, 0x38);
20795  __ Crc32b(w12, w0, w1);
20796
20797  __ Mov(w0, 0);
20798  __ Mov(w1, 128);
20799  __ Crc32b(w13, w0, w1);
20800
20801  __ Mov(w0, UINT32_MAX);
20802  __ Mov(w1, 255);
20803  __ Crc32b(w14, w0, w1);
20804
20805  __ Mov(w0, 0x00010001);
20806  __ Mov(w1, 0x10001000);
20807  __ Crc32b(w15, w0, w1);
20808
20809  END();
20810  RUN();
20811
20812  ASSERT_EQUAL_64(0x0,        x10);
20813  ASSERT_EQUAL_64(0x5f058808, x11);
20814  ASSERT_EQUAL_64(0x5f058808, x12);
20815  ASSERT_EQUAL_64(0xedb88320, x13);
20816  ASSERT_EQUAL_64(0x00ffffff, x14);
20817  ASSERT_EQUAL_64(0x77073196, x15);
20818
20819  TEARDOWN();
20820}
20821
20822
20823TEST(crc32h) {
20824  SETUP();
20825  START();
20826
20827  __ Mov(w0, 0);
20828  __ Mov(w1, 0);
20829  __ Crc32h(w10, w0, w1);
20830
20831  __ Mov(w0, 0x1);
20832  __ Mov(w1, 0x10038);
20833  __ Crc32h(w11, w0, w1);
20834
20835  __ Mov(w0, 0x1);
20836  __ Mov(w1, 0x38);
20837  __ Crc32h(w12, w0, w1);
20838
20839  __ Mov(w0, 0);
20840  __ Mov(w1, 128);
20841  __ Crc32h(w13, w0, w1);
20842
20843  __ Mov(w0, UINT32_MAX);
20844  __ Mov(w1, 255);
20845  __ Crc32h(w14, w0, w1);
20846
20847  __ Mov(w0, 0x00010001);
20848  __ Mov(w1, 0x10001000);
20849  __ Crc32h(w15, w0, w1);
20850
20851  END();
20852  RUN();
20853
20854  ASSERT_EQUAL_64(0x0,        x10);
20855  ASSERT_EQUAL_64(0x0e848dba, x11);
20856  ASSERT_EQUAL_64(0x0e848dba, x12);
20857  ASSERT_EQUAL_64(0x3b83984b, x13);
20858  ASSERT_EQUAL_64(0x2d021072, x14);
20859  ASSERT_EQUAL_64(0x04ac2124, x15);
20860
20861  TEARDOWN();
20862}
20863
20864
20865TEST(crc32w) {
20866  SETUP();
20867  START();
20868
20869  __ Mov(w0, 0);
20870  __ Mov(w1, 0);
20871  __ Crc32w(w10, w0, w1);
20872
20873  __ Mov(w0, 0x1);
20874  __ Mov(w1, 0x80000031);
20875  __ Crc32w(w11, w0, w1);
20876
20877  __ Mov(w0, 0);
20878  __ Mov(w1, 128);
20879  __ Crc32w(w13, w0, w1);
20880
20881  __ Mov(w0, UINT32_MAX);
20882  __ Mov(w1, 255);
20883  __ Crc32w(w14, w0, w1);
20884
20885  __ Mov(w0, 0x00010001);
20886  __ Mov(w1, 0x10001000);
20887  __ Crc32w(w15, w0, w1);
20888
20889  END();
20890  RUN();
20891
20892  ASSERT_EQUAL_64(0x0,        x10);
20893  ASSERT_EQUAL_64(0x1d937b81, x11);
20894  ASSERT_EQUAL_64(0xed59b63b, x13);
20895  ASSERT_EQUAL_64(0x00be2612, x14);
20896  ASSERT_EQUAL_64(0xa036e530, x15);
20897
20898  TEARDOWN();
20899}
20900
20901
20902TEST(crc32x) {
20903  SETUP();
20904  START();
20905
20906  __ Mov(w0, 0);
20907  __ Mov(x1, 0);
20908  __ Crc32x(w10, w0, x1);
20909
20910  __ Mov(w0, 0x1);
20911  __ Mov(x1, UINT64_C(0x0000000800000031));
20912  __ Crc32x(w11, w0, x1);
20913
20914  __ Mov(w0, 0);
20915  __ Mov(x1, 128);
20916  __ Crc32x(w13, w0, x1);
20917
20918  __ Mov(w0, UINT32_MAX);
20919  __ Mov(x1, 255);
20920  __ Crc32x(w14, w0, x1);
20921
20922  __ Mov(w0, 0x00010001);
20923  __ Mov(x1, UINT64_C(0x1000100000000000));
20924  __ Crc32x(w15, w0, x1);
20925
20926  END();
20927  RUN();
20928
20929  ASSERT_EQUAL_64(0x0,        x10);
20930  ASSERT_EQUAL_64(0x40797b92, x11);
20931  ASSERT_EQUAL_64(0x533b85da, x13);
20932  ASSERT_EQUAL_64(0xbc962670, x14);
20933  ASSERT_EQUAL_64(0x0667602f, x15);
20934
20935  TEARDOWN();
20936}
20937
20938
20939TEST(crc32cb) {
20940  SETUP();
20941  START();
20942
20943  __ Mov(w0, 0);
20944  __ Mov(w1, 0);
20945  __ Crc32cb(w10, w0, w1);
20946
20947  __ Mov(w0, 0x1);
20948  __ Mov(w1, 0x138);
20949  __ Crc32cb(w11, w0, w1);
20950
20951  __ Mov(w0, 0x1);
20952  __ Mov(w1, 0x38);
20953  __ Crc32cb(w12, w0, w1);
20954
20955  __ Mov(w0, 0);
20956  __ Mov(w1, 128);
20957  __ Crc32cb(w13, w0, w1);
20958
20959  __ Mov(w0, UINT32_MAX);
20960  __ Mov(w1, 255);
20961  __ Crc32cb(w14, w0, w1);
20962
20963  __ Mov(w0, 0x00010001);
20964  __ Mov(w1, 0x10001000);
20965  __ Crc32cb(w15, w0, w1);
20966
20967  END();
20968  RUN();
20969
20970  ASSERT_EQUAL_64(0x0,        x10);
20971  ASSERT_EQUAL_64(0x4851927d, x11);
20972  ASSERT_EQUAL_64(0x4851927d, x12);
20973  ASSERT_EQUAL_64(0x82f63b78, x13);
20974  ASSERT_EQUAL_64(0x00ffffff, x14);
20975  ASSERT_EQUAL_64(0xf26b8203, x15);
20976
20977  TEARDOWN();
20978}
20979
20980
20981TEST(crc32ch) {
20982  SETUP();
20983  START();
20984
20985  __ Mov(w0, 0);
20986  __ Mov(w1, 0);
20987  __ Crc32ch(w10, w0, w1);
20988
20989  __ Mov(w0, 0x1);
20990  __ Mov(w1, 0x10038);
20991  __ Crc32ch(w11, w0, w1);
20992
20993  __ Mov(w0, 0x1);
20994  __ Mov(w1, 0x38);
20995  __ Crc32ch(w12, w0, w1);
20996
20997  __ Mov(w0, 0);
20998  __ Mov(w1, 128);
20999  __ Crc32ch(w13, w0, w1);
21000
21001  __ Mov(w0, UINT32_MAX);
21002  __ Mov(w1, 255);
21003  __ Crc32ch(w14, w0, w1);
21004
21005  __ Mov(w0, 0x00010001);
21006  __ Mov(w1, 0x10001000);
21007  __ Crc32ch(w15, w0, w1);
21008
21009  END();
21010  RUN();
21011
21012  ASSERT_EQUAL_64(0x0,        x10);
21013  ASSERT_EQUAL_64(0xcef8494c, x11);
21014  ASSERT_EQUAL_64(0xcef8494c, x12);
21015  ASSERT_EQUAL_64(0xfbc3faf9, x13);
21016  ASSERT_EQUAL_64(0xad7dacae, x14);
21017  ASSERT_EQUAL_64(0x03fc5f19, x15);
21018
21019  TEARDOWN();
21020}
21021
21022
21023TEST(crc32cw) {
21024  SETUP();
21025  START();
21026
21027  __ Mov(w0, 0);
21028  __ Mov(w1, 0);
21029  __ Crc32cw(w10, w0, w1);
21030
21031  __ Mov(w0, 0x1);
21032  __ Mov(w1, 0x80000031);
21033  __ Crc32cw(w11, w0, w1);
21034
21035  __ Mov(w0, 0);
21036  __ Mov(w1, 128);
21037  __ Crc32cw(w13, w0, w1);
21038
21039  __ Mov(w0, UINT32_MAX);
21040  __ Mov(w1, 255);
21041  __ Crc32cw(w14, w0, w1);
21042
21043  __ Mov(w0, 0x00010001);
21044  __ Mov(w1, 0x10001000);
21045  __ Crc32cw(w15, w0, w1);
21046
21047  END();
21048  RUN();
21049
21050  ASSERT_EQUAL_64(0x0,        x10);
21051  ASSERT_EQUAL_64(0xbcb79ece, x11);
21052  ASSERT_EQUAL_64(0x52a0c93f, x13);
21053  ASSERT_EQUAL_64(0x9f9b5c7a, x14);
21054  ASSERT_EQUAL_64(0xae1b882a, x15);
21055
21056  TEARDOWN();
21057}
21058
21059
21060TEST(crc32cx) {
21061  SETUP();
21062  START();
21063
21064  __ Mov(w0, 0);
21065  __ Mov(x1, 0);
21066  __ Crc32cx(w10, w0, x1);
21067
21068  __ Mov(w0, 0x1);
21069  __ Mov(x1, UINT64_C(0x0000000800000031));
21070  __ Crc32cx(w11, w0, x1);
21071
21072  __ Mov(w0, 0);
21073  __ Mov(x1, 128);
21074  __ Crc32cx(w13, w0, x1);
21075
21076  __ Mov(w0, UINT32_MAX);
21077  __ Mov(x1, 255);
21078  __ Crc32cx(w14, w0, x1);
21079
21080  __ Mov(w0, 0x00010001);
21081  __ Mov(x1, UINT64_C(0x1000100000000000));
21082  __ Crc32cx(w15, w0, x1);
21083
21084  END();
21085  RUN();
21086
21087  ASSERT_EQUAL_64(0x0,        x10);
21088  ASSERT_EQUAL_64(0x7f320fcb, x11);
21089  ASSERT_EQUAL_64(0x34019664, x13);
21090  ASSERT_EQUAL_64(0x6cc27dd0, x14);
21091  ASSERT_EQUAL_64(0xc6f0acdb, x15);
21092
21093  TEARDOWN();
21094}
21095
21096
21097TEST(neon_fabd_scalar) {
21098  SETUP();
21099
21100  START();
21101  __ Fmov(s0, 2.0);
21102  __ Fmov(s1, 0.5);
21103  __ Fmov(s2, 0.0);
21104  __ Fmov(s3, -0.0);
21105  __ Fmov(s4, kFP32PositiveInfinity);
21106  __ Fmov(s5, kFP32NegativeInfinity);
21107  __ Fabd(s16, s1, s0);
21108  __ Fabd(s17, s2, s3);
21109  __ Fabd(s18, s2, s5);
21110  __ Fabd(s19, s3, s4);
21111  __ Fabd(s20, s3, s5);
21112
21113  __ Fmov(d21, 2.0);
21114  __ Fmov(d22, 0.5);
21115  __ Fmov(d23, 0.0);
21116  __ Fmov(d24, -0.0);
21117  __ Fmov(d25, kFP64PositiveInfinity);
21118  __ Fmov(d26, kFP64NegativeInfinity);
21119  __ Fabd(d27, d21, d22);
21120  __ Fabd(d28, d23, d24);
21121  __ Fabd(d29, d23, d26);
21122  __ Fabd(d30, d24, d25);
21123  __ Fabd(d31, d24, d26);
21124  END();
21125
21126  RUN();
21127
21128  ASSERT_EQUAL_FP32(1.5, s16);
21129  ASSERT_EQUAL_FP32(0.0, s17);
21130  ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s18);
21131  ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s19);
21132  ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s20);
21133  ASSERT_EQUAL_FP64(1.5, d27);
21134  ASSERT_EQUAL_FP64(0.0, d28);
21135  ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d29);
21136  ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d30);
21137  ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d31);
21138
21139  TEARDOWN();
21140}
21141
21142
21143TEST(neon_faddp_scalar) {
21144  SETUP();
21145
21146  START();
21147  __ Movi(d0, 0x3f80000040000000);
21148  __ Movi(d1, 0xff8000007f800000);
21149  __ Movi(d2, 0x0000000080000000);
21150  __ Faddp(s0, v0.V2S());
21151  __ Faddp(s1, v1.V2S());
21152  __ Faddp(s2, v2.V2S());
21153
21154  __ Movi(v3.V2D(), 0xc000000000000000, 0x4000000000000000);
21155  __ Movi(v4.V2D(), 0xfff8000000000000, 0x7ff8000000000000);
21156  __ Movi(v5.V2D(), 0x0000000000000000, 0x8000000000000000);
21157  __ Faddp(d3, v3.V2D());
21158  __ Faddp(d4, v4.V2D());
21159  __ Faddp(d5, v5.V2D());
21160  END();
21161
21162  RUN();
21163
21164  ASSERT_EQUAL_FP32(3.0, s0);
21165  ASSERT_EQUAL_FP32(kFP32DefaultNaN, s1);
21166  ASSERT_EQUAL_FP32(0.0, s2);
21167  ASSERT_EQUAL_FP64(0.0, d3);
21168  ASSERT_EQUAL_FP64(kFP64DefaultNaN, d4);
21169  ASSERT_EQUAL_FP64(0.0, d5);
21170
21171  TEARDOWN();
21172}
21173
21174
21175TEST(neon_fmaxp_scalar) {
21176  SETUP();
21177
21178  START();
21179  __ Movi(d0, 0x3f80000040000000);
21180  __ Movi(d1, 0xff8000007f800000);
21181  __ Movi(d2, 0x7fc00000ff800000);
21182  __ Fmaxp(s0, v0.V2S());
21183  __ Fmaxp(s1, v1.V2S());
21184  __ Fmaxp(s2, v2.V2S());
21185
21186  __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000);
21187  __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000);
21188  __ Movi(v5.V2D(), 0x7ff0000000000000, 0x7ff8000000000000);
21189  __ Fmaxp(d3, v3.V2D());
21190  __ Fmaxp(d4, v4.V2D());
21191  __ Fmaxp(d5, v5.V2D());
21192  END();
21193
21194  RUN();
21195
21196  ASSERT_EQUAL_FP32(2.0, s0);
21197  ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s1);
21198  ASSERT_EQUAL_FP32(kFP32DefaultNaN, s2);
21199  ASSERT_EQUAL_FP64(2.0, d3);
21200  ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d4);
21201  ASSERT_EQUAL_FP64(kFP64DefaultNaN, d5);
21202
21203  TEARDOWN();
21204}
21205
21206
21207TEST(neon_fmaxnmp_scalar) {
21208  SETUP();
21209
21210  START();
21211  __ Movi(d0, 0x3f80000040000000);
21212  __ Movi(d1, 0xff8000007f800000);
21213  __ Movi(d2, 0x7fc00000ff800000);
21214  __ Fmaxnmp(s0, v0.V2S());
21215  __ Fmaxnmp(s1, v1.V2S());
21216  __ Fmaxnmp(s2, v2.V2S());
21217
21218  __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000);
21219  __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000);
21220  __ Movi(v5.V2D(), 0x7ff8000000000000, 0xfff0000000000000);
21221  __ Fmaxnmp(d3, v3.V2D());
21222  __ Fmaxnmp(d4, v4.V2D());
21223  __ Fmaxnmp(d5, v5.V2D());
21224  END();
21225
21226  RUN();
21227
21228  ASSERT_EQUAL_FP32(2.0, s0);
21229  ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s1);
21230  ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s2);
21231  ASSERT_EQUAL_FP64(2.0, d3);
21232  ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d4);
21233  ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d5);
21234
21235  TEARDOWN();
21236}
21237
21238
21239TEST(neon_fminp_scalar) {
21240  SETUP();
21241
21242  START();
21243  __ Movi(d0, 0x3f80000040000000);
21244  __ Movi(d1, 0xff8000007f800000);
21245  __ Movi(d2, 0x7fc00000ff800000);
21246  __ Fminp(s0, v0.V2S());
21247  __ Fminp(s1, v1.V2S());
21248  __ Fminp(s2, v2.V2S());
21249
21250  __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000);
21251  __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000);
21252  __ Movi(v5.V2D(), 0x7ff0000000000000, 0x7ff8000000000000);
21253  __ Fminp(d3, v3.V2D());
21254  __ Fminp(d4, v4.V2D());
21255  __ Fminp(d5, v5.V2D());
21256  END();
21257
21258  RUN();
21259
21260  ASSERT_EQUAL_FP32(1.0, s0);
21261  ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s1);
21262  ASSERT_EQUAL_FP32(kFP32DefaultNaN, s2);
21263  ASSERT_EQUAL_FP64(1.0, d3);
21264  ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d4);
21265  ASSERT_EQUAL_FP64(kFP64DefaultNaN, d5);
21266
21267  TEARDOWN();
21268}
21269
21270
21271TEST(neon_fminnmp_scalar) {
21272  SETUP();
21273
21274  START();
21275  __ Movi(d0, 0x3f80000040000000);
21276  __ Movi(d1, 0xff8000007f800000);
21277  __ Movi(d2, 0x7fc00000ff800000);
21278  __ Fminnmp(s0, v0.V2S());
21279  __ Fminnmp(s1, v1.V2S());
21280  __ Fminnmp(s2, v2.V2S());
21281
21282  __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000);
21283  __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000);
21284  __ Movi(v5.V2D(), 0x7ff8000000000000, 0xfff0000000000000);
21285  __ Fminnmp(d3, v3.V2D());
21286  __ Fminnmp(d4, v4.V2D());
21287  __ Fminnmp(d5, v5.V2D());
21288  END();
21289
21290  RUN();
21291
21292  ASSERT_EQUAL_FP32(1.0, s0);
21293  ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s1);
21294  ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s2);
21295  ASSERT_EQUAL_FP64(1.0, d3);
21296  ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d4);
21297  ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d5);
21298
21299  TEARDOWN();
21300}
21301
21302
21303TEST(neon_tbl) {
21304  SETUP();
21305
21306  START();
21307  __ Movi(v30.V2D(), 0xbf561e188b1280e9, 0xbd542b8cbd24e8e8);
21308  __ Movi(v31.V2D(), 0xb5e9883d2c88a46d, 0x12276d5b614c915e);
21309  __ Movi(v0.V2D(), 0xc45b7782bc5ecd72, 0x5dd4fe5a4bc6bf5e);
21310  __ Movi(v1.V2D(), 0x1e3254094bd1746a, 0xf099ecf50e861c80);
21311
21312  __ Movi(v4.V2D(), 0xf80c030100031f16, 0x00070504031201ff);
21313  __ Movi(v5.V2D(), 0x1f01001afc14202a, 0x2a081e1b0c02020c);
21314  __ Movi(v6.V2D(), 0x353f1a13022a2360, 0x2c464a00203a0a33);
21315  __ Movi(v7.V2D(), 0x64801a1c054cf30d, 0x793a2c052e213739);
21316
21317  __ Movi(v8.V2D(), 0xb7f60ad7d7d88f13, 0x13eefc240496e842);
21318  __ Movi(v9.V2D(), 0x1be199c7c69b47ec, 0x8e4b9919f6eed443);
21319  __ Movi(v10.V2D(), 0x9bd2e1654c69e48f, 0x2143d089e426c6d2);
21320  __ Movi(v11.V2D(), 0xc31dbdc4a0393065, 0x1ecc2077caaf64d8);
21321  __ Movi(v12.V2D(), 0x29b24463967bc6eb, 0xdaf59970df01c93b);
21322  __ Movi(v13.V2D(), 0x3e20a4a4cb6813f4, 0x20a5832713dae669);
21323  __ Movi(v14.V2D(), 0xc5ff9a94041b1fdf, 0x2f46cde38cba2682);
21324  __ Movi(v15.V2D(), 0xd8cc5b0e61f387e6, 0xe69d6d314971e8fd);
21325
21326  __ Tbl(v8.V16B(), v1.V16B(), v4.V16B());
21327  __ Tbl(v9.V16B(), v0.V16B(), v1.V16B(), v5.V16B());
21328  __ Tbl(v10.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V16B());
21329  __ Tbl(v11.V16B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V16B());
21330  __ Tbl(v12.V8B(), v1.V16B(), v4.V8B());
21331  __ Tbl(v13.V8B(), v0.V16B(), v1.V16B(), v5.V8B());
21332  __ Tbl(v14.V8B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V8B());
21333  __ Tbl(v15.V8B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V8B());
21334
21335  __ Movi(v16.V2D(), 0xb7f60ad7d7d88f13, 0x13eefc240496e842);
21336  __ Movi(v17.V2D(), 0x1be199c7c69b47ec, 0x8e4b9919f6eed443);
21337  __ Movi(v18.V2D(), 0x9bd2e1654c69e48f, 0x2143d089e426c6d2);
21338  __ Movi(v19.V2D(), 0xc31dbdc4a0393065, 0x1ecc2077caaf64d8);
21339  __ Movi(v20.V2D(), 0x29b24463967bc6eb, 0xdaf59970df01c93b);
21340  __ Movi(v21.V2D(), 0x3e20a4a4cb6813f4, 0x20a5832713dae669);
21341  __ Movi(v22.V2D(), 0xc5ff9a94041b1fdf, 0x2f46cde38cba2682);
21342  __ Movi(v23.V2D(), 0xd8cc5b0e61f387e6, 0xe69d6d314971e8fd);
21343
21344  __ Tbx(v16.V16B(), v1.V16B(), v4.V16B());
21345  __ Tbx(v17.V16B(), v0.V16B(), v1.V16B(), v5.V16B());
21346  __ Tbx(v18.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V16B());
21347  __ Tbx(v19.V16B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V16B());
21348  __ Tbx(v20.V8B(), v1.V16B(), v4.V8B());
21349  __ Tbx(v21.V8B(), v0.V16B(), v1.V16B(), v5.V8B());
21350  __ Tbx(v22.V8B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V8B());
21351  __ Tbx(v23.V8B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V8B());
21352  END();
21353
21354  RUN();
21355
21356  ASSERT_EQUAL_128(0x00090e1c800e0000, 0x80f0ecf50e001c00, v8);
21357  ASSERT_EQUAL_128(0x1ebf5ed100f50000, 0x0072324b82c6c682, v9);
21358  ASSERT_EQUAL_128(0x00005e4b4cd10e00, 0x0900005e80008800, v10);
21359  ASSERT_EQUAL_128(0x0000883d2b00001e, 0x00d1822b5bbff074, v11);
21360  ASSERT_EQUAL_128(0x0000000000000000, 0x80f0ecf50e001c00, v12);
21361  ASSERT_EQUAL_128(0x0000000000000000, 0x0072324b82c6c682, v13);
21362  ASSERT_EQUAL_128(0x0000000000000000, 0x0900005e80008800, v14);
21363  ASSERT_EQUAL_128(0x0000000000000000, 0x00d1822b5bbff074, v15);
21364
21365  ASSERT_EQUAL_128(0xb7090e1c800e8f13, 0x80f0ecf50e961c42, v16);
21366  ASSERT_EQUAL_128(0x1ebf5ed1c6f547ec, 0x8e72324b82c6c682, v17);
21367  ASSERT_EQUAL_128(0x9bd25e4b4cd10e8f, 0x0943d05e802688d2, v18);
21368  ASSERT_EQUAL_128(0xc31d883d2b39301e, 0x1ed1822b5bbff074, v19);
21369  ASSERT_EQUAL_128(0x0000000000000000, 0x80f0ecf50e011c3b, v20);
21370  ASSERT_EQUAL_128(0x0000000000000000, 0x2072324b82c6c682, v21);
21371  ASSERT_EQUAL_128(0x0000000000000000, 0x0946cd5e80ba8882, v22);
21372  ASSERT_EQUAL_128(0x0000000000000000, 0xe6d1822b5bbff074, v23);
21373
21374  TEARDOWN();
21375}
21376
21377
21378TEST(regress_cmp_shift_imm) {
21379  SETUP();
21380
21381  START();
21382
21383  __ Mov(x0, 0x3d720c8d);
21384  __ Cmp(x0, Operand(0x3d720c8d));
21385
21386  END();
21387  RUN();
21388
21389  ASSERT_EQUAL_NZCV(ZCFlag);
21390
21391  TEARDOWN();
21392}
21393
21394
21395TEST(compute_address) {
21396  SETUP();
21397
21398  START();
21399  int64_t base_address = INT64_C(0x123000000abc);
21400  int64_t reg_offset = INT64_C(0x1087654321);
21401  Register base = x0;
21402  Register offset = x1;
21403
21404  __ Mov(base, base_address);
21405  __ Mov(offset, reg_offset);
21406
21407
21408  __ ComputeAddress(x2, MemOperand(base, 0));
21409  __ ComputeAddress(x3, MemOperand(base, 8));
21410  __ ComputeAddress(x4, MemOperand(base, -100));
21411
21412  __ ComputeAddress(x5, MemOperand(base, offset));
21413  __ ComputeAddress(x6, MemOperand(base, offset, LSL, 2));
21414  __ ComputeAddress(x7, MemOperand(base, offset, LSL, 4));
21415  __ ComputeAddress(x8, MemOperand(base, offset, LSL, 8));
21416
21417  __ ComputeAddress(x9, MemOperand(base, offset, SXTW));
21418  __ ComputeAddress(x10, MemOperand(base, offset, UXTW, 1));
21419  __ ComputeAddress(x11, MemOperand(base, offset, SXTW, 2));
21420  __ ComputeAddress(x12, MemOperand(base, offset, UXTW, 3));
21421
21422  END();
21423
21424  RUN();
21425
21426  ASSERT_EQUAL_64(base_address, base);
21427
21428  ASSERT_EQUAL_64(INT64_C(0x123000000abc), x2);
21429  ASSERT_EQUAL_64(INT64_C(0x123000000ac4), x3);
21430  ASSERT_EQUAL_64(INT64_C(0x123000000a58), x4);
21431
21432  ASSERT_EQUAL_64(INT64_C(0x124087654ddd), x5);
21433  ASSERT_EQUAL_64(INT64_C(0x12721d951740), x6);
21434  ASSERT_EQUAL_64(INT64_C(0x133876543ccc), x7);
21435  ASSERT_EQUAL_64(INT64_C(0x22b765432bbc), x8);
21436
21437  ASSERT_EQUAL_64(INT64_C(0x122f87654ddd), x9);
21438  ASSERT_EQUAL_64(INT64_C(0x12310eca90fe), x10);
21439  ASSERT_EQUAL_64(INT64_C(0x122e1d951740), x11);
21440  ASSERT_EQUAL_64(INT64_C(0x12343b2a23c4), x12);
21441
21442  TEARDOWN();
21443}
21444
21445
21446TEST(far_branch_backward) {
21447  // Test that the MacroAssembler correctly resolves backward branches to labels
21448  // that are outside the immediate range of branch instructions.
21449  // Take into account that backward branches can reach one instruction further
21450  // than forward branches.
21451  const int overflow_size = kInstructionSize +
21452    std::max(Instruction::ImmBranchForwardRange(TestBranchType),
21453             std::max(Instruction::ImmBranchForwardRange(CompareBranchType),
21454                      Instruction::ImmBranchForwardRange(CondBranchType)));
21455
21456  SETUP();
21457  START();
21458
21459  Label done, fail;
21460  Label test_tbz, test_cbz, test_bcond;
21461  Label success_tbz, success_cbz, success_bcond;
21462
21463  __ Mov(x0, 0);
21464  __ Mov(x1, 1);
21465  __ Mov(x10, 0);
21466
21467  __ B(&test_tbz);
21468  __ Bind(&success_tbz);
21469  __ Orr(x0, x0, 1 << 0);
21470  __ B(&test_cbz);
21471  __ Bind(&success_cbz);
21472  __ Orr(x0, x0, 1 << 1);
21473  __ B(&test_bcond);
21474  __ Bind(&success_bcond);
21475  __ Orr(x0, x0, 1 << 2);
21476
21477  __ B(&done);
21478
21479  // Generate enough code to overflow the immediate range of the three types of
21480  // branches below.
21481  for (unsigned i = 0; i < overflow_size / kInstructionSize; ++i) {
21482    if (i % 100 == 0) {
21483      // If we do land in this code, we do not want to execute so many nops
21484      // before reaching the end of test (especially if tracing is activated).
21485      __ B(&fail);
21486    } else {
21487      __ Nop();
21488    }
21489  }
21490  __ B(&fail);
21491
21492  __ Bind(&test_tbz);
21493  __ Tbz(x10, 7, &success_tbz);
21494  __ Bind(&test_cbz);
21495  __ Cbz(x10, &success_cbz);
21496  __ Bind(&test_bcond);
21497  __ Cmp(x10, 0);
21498  __ B(eq, &success_bcond);
21499
21500  // For each out-of-range branch instructions, at least two instructions should
21501  // have been generated.
21502  VIXL_CHECK(masm.SizeOfCodeGeneratedSince(&test_tbz) >= 7 * kInstructionSize);
21503
21504  __ Bind(&fail);
21505  __ Mov(x1, 0);
21506  __ Bind(&done);
21507
21508  END();
21509  RUN();
21510
21511  ASSERT_EQUAL_64(0x7, x0);
21512  ASSERT_EQUAL_64(0x1, x1);
21513
21514  TEARDOWN();
21515}
21516
21517
21518TEST(single_veneer) {
21519  SETUP();
21520  START();
21521
21522  const int max_range = Instruction::ImmBranchForwardRange(TestBranchType);
21523
21524  Label success, fail, done;
21525
21526  __ Mov(x0, 0);
21527  __ Mov(x1, 1);
21528  __ Mov(x10, 0);
21529
21530  __ Tbz(x10, 7, &success);
21531
21532  // Generate enough code to overflow the immediate range of the `tbz`.
21533  for (unsigned i = 0; i < max_range / kInstructionSize + 1; ++i) {
21534    if (i % 100 == 0) {
21535      // If we do land in this code, we do not want to execute so many nops
21536      // before reaching the end of test (especially if tracing is activated).
21537      __ B(&fail);
21538    } else {
21539      __ Nop();
21540    }
21541  }
21542  __ B(&fail);
21543
21544  __ Bind(&success);
21545  __ Mov(x0, 1);
21546
21547  __ B(&done);
21548  __ Bind(&fail);
21549  __ Mov(x1, 0);
21550  __ Bind(&done);
21551
21552  END();
21553  RUN();
21554
21555  ASSERT_EQUAL_64(1, x0);
21556  ASSERT_EQUAL_64(1, x1);
21557
21558  TEARDOWN();
21559}
21560
21561
21562TEST(simple_veneers) {
21563  // Test that the MacroAssembler correctly emits veneers for forward branches
21564  // to labels that are outside the immediate range of branch instructions.
21565  const int max_range =
21566    std::max(Instruction::ImmBranchForwardRange(TestBranchType),
21567             std::max(Instruction::ImmBranchForwardRange(CompareBranchType),
21568                      Instruction::ImmBranchForwardRange(CondBranchType)));
21569
21570  SETUP();
21571  START();
21572
21573  Label done, fail;
21574  Label test_tbz, test_cbz, test_bcond;
21575  Label success_tbz, success_cbz, success_bcond;
21576
21577  __ Mov(x0, 0);
21578  __ Mov(x1, 1);
21579  __ Mov(x10, 0);
21580
21581  __ Bind(&test_tbz);
21582  __ Tbz(x10, 7, &success_tbz);
21583  __ Bind(&test_cbz);
21584  __ Cbz(x10, &success_cbz);
21585  __ Bind(&test_bcond);
21586  __ Cmp(x10, 0);
21587  __ B(eq, &success_bcond);
21588
21589  // Generate enough code to overflow the immediate range of the three types of
21590  // branches below.
21591  for (unsigned i = 0; i < max_range / kInstructionSize + 1; ++i) {
21592    if (i % 100 == 0) {
21593      // If we do land in this code, we do not want to execute so many nops
21594      // before reaching the end of test (especially if tracing is activated).
21595      __ B(&fail);
21596    } else {
21597      __ Nop();
21598    }
21599  }
21600  __ B(&fail);
21601
21602  __ Bind(&success_tbz);
21603  __ Orr(x0, x0, 1 << 0);
21604  __ B(&test_cbz);
21605  __ Bind(&success_cbz);
21606  __ Orr(x0, x0, 1 << 1);
21607  __ B(&test_bcond);
21608  __ Bind(&success_bcond);
21609  __ Orr(x0, x0, 1 << 2);
21610
21611  __ B(&done);
21612  __ Bind(&fail);
21613  __ Mov(x1, 0);
21614  __ Bind(&done);
21615
21616  END();
21617  RUN();
21618
21619  ASSERT_EQUAL_64(0x7, x0);
21620  ASSERT_EQUAL_64(0x1, x1);
21621
21622  TEARDOWN();
21623}
21624
21625
21626TEST(veneers_stress) {
21627  SETUP();
21628  START();
21629
21630  // This is a code generation test stressing the emission of veneers. The code
21631  // generated is not executed.
21632
21633  Label target;
21634  const unsigned max_range = Instruction::ImmBranchForwardRange(CondBranchType);
21635  const unsigned iterations =
21636      (max_range + max_range / 4) / (4 * kInstructionSize);
21637  for (unsigned i = 0; i < iterations; i++) {
21638    __ B(&target);
21639    __ B(eq, &target);
21640    __ Cbz(x0, &target);
21641    __ Tbz(x0, 0, &target);
21642  }
21643  __ Bind(&target);
21644
21645  END();
21646  TEARDOWN();
21647}
21648
21649
21650TEST(veneers_two_out_of_range) {
21651  SETUP();
21652  START();
21653
21654  // This is a code generation test. The code generated is not executed.
21655  // Ensure that the MacroAssembler considers unresolved branches to chose when
21656  // a veneer pool should be emitted. We generate two branches that go out of
21657  // range at the same offset. When the MacroAssembler decides to emit the
21658  // veneer pool, the emission of a first veneer should not cause the other
21659  // branch to go out of range.
21660
21661  int range_cbz = Instruction::ImmBranchForwardRange(CompareBranchType);
21662  int range_tbz = Instruction::ImmBranchForwardRange(TestBranchType);
21663  int max_target = static_cast<int>(masm.CursorOffset()) + range_cbz;
21664
21665  Label done;
21666
21667  // We use different labels to prevent the MacroAssembler from sharing veneers.
21668  Label target_cbz, target_tbz;
21669
21670  __ Cbz(x0, &target_cbz);
21671  while (masm.CursorOffset() < max_target - range_tbz) {
21672    __ Nop();
21673  }
21674  __ Tbz(x0, 0, &target_tbz);
21675  while (masm.CursorOffset() < max_target) {
21676    __ Nop();
21677  }
21678
21679  // This additional nop makes the branches go out of range.
21680  __ Nop();
21681
21682  __ Bind(&target_cbz);
21683  __ Bind(&target_tbz);
21684
21685  END();
21686  TEARDOWN();
21687}
21688
21689
21690TEST(veneers_hanging) {
21691  SETUP();
21692  START();
21693
21694  // This is a code generation test. The code generated is not executed.
21695  // Ensure that the MacroAssembler considers unresolved branches to chose when
21696  // a veneer pool should be emitted. This is similar to the
21697  // 'veneers_two_out_of_range' test. We try to trigger the following situation:
21698  //   b.eq label
21699  //   b.eq label
21700  //   ...
21701  //   nop
21702  //   ...
21703  //   cbz x0, label
21704  //   cbz x0, label
21705  //   ...
21706  //   tbz x0, 0 label
21707  //   nop
21708  //   ...
21709  //   nop    <- From here the `b.eq` and `cbz` instructions run out of range,
21710  //             so a literal pool is required.
21711  //   veneer
21712  //   veneer
21713  //   veneer <- The `tbz` runs out of range somewhere in the middle of the
21714  //   veneer    veneer pool.
21715  //   veneer
21716
21717  const int range_bcond = Instruction::ImmBranchForwardRange(CondBranchType);
21718  const int range_cbz = Instruction::ImmBranchForwardRange(CompareBranchType);
21719  const int range_tbz = Instruction::ImmBranchForwardRange(TestBranchType);
21720  const int max_target = static_cast<int>(masm.CursorOffset()) + range_bcond;
21721
21722  Label done;
21723  const int n_bcond = 100;
21724  const int n_cbz = 100;
21725  const int n_tbz = 1;
21726  const int kNTotalBranches = n_bcond + n_cbz + n_tbz;
21727
21728  // We use different labels to prevent the MacroAssembler from sharing veneers.
21729  Label labels[kNTotalBranches];
21730  for (int i = 0; i < kNTotalBranches; i++) {
21731    new(&labels[i]) Label();
21732  }
21733
21734  for (int i = 0; i < n_bcond; i++) {
21735    __ B(eq, &labels[i]);
21736  }
21737
21738  while (masm.CursorOffset() < max_target - range_cbz) {
21739    __ Nop();
21740  }
21741
21742  for (int i = 0; i < n_cbz; i++) {
21743    __ Cbz(x0, &labels[n_bcond + i]);
21744  }
21745
21746  // Ensure the 'tbz' will go out of range after some of the previously
21747  // generated branches.
21748  int margin = (n_bcond / 2) * kInstructionSize;
21749  while (masm.CursorOffset() < max_target - range_tbz + margin) {
21750    __ Nop();
21751  }
21752
21753  __ Tbz(x0, 0, &labels[n_bcond + n_cbz]);
21754
21755  while (masm.CursorOffset() < max_target) {
21756    __ Nop();
21757  }
21758
21759  // This additional nop makes the 'b.eq' and 'cbz' instructions go out of range
21760  // and forces the emission of a veneer pool. The 'tbz' is not yet out of
21761  // range, but will go out of range while veneers are emitted for the other
21762  // branches.
21763  // The MacroAssembler should ensure that veneers are correctly emitted for all
21764  // the branches, including the 'tbz'. Checks will fail if the target of a
21765  // branch is out of range.
21766  __ Nop();
21767
21768  for (int i = 0; i < kNTotalBranches; i++) {
21769    __ Bind(&labels[i]);
21770  }
21771
21772  END();
21773  TEARDOWN();
21774}
21775
21776
21777TEST(collision_literal_veneer_pools) {
21778  SETUP();
21779  START();
21780
21781  // This is a code generation test. The code generated is not executed.
21782
21783  // Make sure the literal pool is empty;
21784  masm.EmitLiteralPool(LiteralPool::kBranchRequired);
21785  ASSERT_LITERAL_POOL_SIZE(0);
21786
21787  // We chose the offsets below to (try to) trigger the following situation:
21788  // buffer offset
21789  //              0:   tbz x0, 0, target_tbz ----------------------------------.
21790  //              4:   nop                                                     |
21791  //                   ...                                                     |
21792  //                   nop                                                     |
21793  //    literal gen:   ldr s0, [pc + ...]   ; load from `pool start + 0`       |
21794  //                   ldr s0, [pc + ...]   ; load from `pool start + 4`       |
21795  //                   ...                                                     |
21796  //                   ldr s0, [pc + ...]                                      |
21797  //     pool start:   floating-point literal (0.1)                            |
21798  //                   floating-point literal (1.1)                            |
21799  //                   ...                                                     |
21800  //                   floating-point literal (<n>.1)     <-----tbz-max-range--'
21801  //                   floating-point literal (<n+1>.1)
21802  //                   ...
21803
21804  const int range_tbz = Instruction::ImmBranchForwardRange(TestBranchType);
21805  const int max_target = static_cast<int>(masm.CursorOffset()) + range_tbz;
21806
21807  const size_t target_literal_pool_size = 100 * kInstructionSize;
21808  const int offset_start_literal_gen =
21809      target_literal_pool_size + target_literal_pool_size / 2;
21810
21811
21812  Label target_tbz;
21813
21814  __ Tbz(x0, 0, &target_tbz);
21815  VIXL_CHECK(masm.NumberOfPotentialVeneers() == 1);
21816  while (masm.CursorOffset() < max_target - offset_start_literal_gen) {
21817    __ Nop();
21818  }
21819  VIXL_CHECK(masm.NumberOfPotentialVeneers() == 1);
21820
21821  for (int i = 0; i < 100; i++) {
21822    // Use a different value to force one literal pool entry per iteration.
21823    __ Ldr(s0, i + 0.1);
21824  }
21825  VIXL_CHECK(masm.LiteralPoolSize() >= target_literal_pool_size);
21826
21827  // Force emission of a literal pool.
21828  masm.EmitLiteralPool(LiteralPool::kBranchRequired);
21829  ASSERT_LITERAL_POOL_SIZE(0);
21830
21831  // The branch should not have gone out of range during the emission of the
21832  // literal pool.
21833  __ Bind(&target_tbz);
21834
21835  VIXL_CHECK(masm.NumberOfPotentialVeneers() == 0);
21836
21837  END();
21838  TEARDOWN();
21839}
21840
21841
21842TEST(ldr_literal_explicit) {
21843  SETUP();
21844
21845  START();
21846  Literal<int64_t> automatically_placed_literal(1, masm.GetLiteralPool());
21847  Literal<int64_t> manually_placed_literal(2);
21848  {
21849    CodeBufferCheckScope scope(&masm,
21850                               kInstructionSize + sizeof(int64_t),
21851                               CodeBufferCheckScope::kCheck,
21852                               CodeBufferCheckScope::kExactSize);
21853    Label over_literal;
21854    __ b(&over_literal);
21855    __ place(&manually_placed_literal);
21856    __ bind(&over_literal);
21857  }
21858  __ Ldr(x1, &manually_placed_literal);
21859  __ Ldr(x2, &automatically_placed_literal);
21860  __ Add(x0, x1, x2);
21861  END();
21862
21863  RUN();
21864
21865  ASSERT_EQUAL_64(3, x0);
21866
21867  TEARDOWN();
21868}
21869
21870
21871TEST(ldr_literal_automatically_placed) {
21872  SETUP();
21873
21874  START();
21875
21876  // We start with an empty literal pool.
21877  ASSERT_LITERAL_POOL_SIZE(0);
21878
21879  // Create a literal that should be placed by the literal pool.
21880  Literal<int64_t> explicit_literal(2, masm.GetLiteralPool());
21881  // It should not appear in the literal pool until its first use.
21882  ASSERT_LITERAL_POOL_SIZE(0);
21883
21884  // Check that using standard literals does not break the use of explicitly
21885  // created literals.
21886  __ Ldr(d1, 1.1);
21887  ASSERT_LITERAL_POOL_SIZE(8);
21888  masm.EmitLiteralPool(LiteralPool::kBranchRequired);
21889  ASSERT_LITERAL_POOL_SIZE(0);
21890
21891  __ Ldr(x2, &explicit_literal);
21892  ASSERT_LITERAL_POOL_SIZE(8);
21893  masm.EmitLiteralPool(LiteralPool::kBranchRequired);
21894  ASSERT_LITERAL_POOL_SIZE(0);
21895
21896  __ Ldr(d3, 3.3);
21897  ASSERT_LITERAL_POOL_SIZE(8);
21898  masm.EmitLiteralPool(LiteralPool::kBranchRequired);
21899  ASSERT_LITERAL_POOL_SIZE(0);
21900
21901  // Re-use our explicitly created literal. It has already been placed, so it
21902  // should not impact the literal pool.
21903  __ Ldr(x4, &explicit_literal);
21904  ASSERT_LITERAL_POOL_SIZE(0);
21905
21906  END();
21907
21908  RUN();
21909
21910  ASSERT_EQUAL_FP64(1.1, d1);
21911  ASSERT_EQUAL_64(2, x2);
21912  ASSERT_EQUAL_FP64(3.3, d3);
21913  ASSERT_EQUAL_64(2, x4);
21914
21915  TEARDOWN();
21916}
21917
21918
21919TEST(literal_update_overwrite) {
21920  SETUP();
21921
21922  START();
21923
21924  ASSERT_LITERAL_POOL_SIZE(0);
21925  LiteralPool* literal_pool = masm.GetLiteralPool();
21926
21927  Literal<int32_t> lit_32_update_before_pool(0xbad, literal_pool);
21928  Literal<int32_t> lit_32_update_after_pool(0xbad, literal_pool);
21929  Literal<int64_t> lit_64_update_before_pool(0xbad, literal_pool);
21930  Literal<int64_t> lit_64_update_after_pool(0xbad, literal_pool);
21931
21932  ASSERT_LITERAL_POOL_SIZE(0);
21933
21934  lit_32_update_before_pool.UpdateValue(32);
21935  lit_64_update_before_pool.UpdateValue(64);
21936
21937  __ Ldr(w1, &lit_32_update_before_pool);
21938  __ Ldr(x2, &lit_64_update_before_pool);
21939  __ Ldr(w3, &lit_32_update_after_pool);
21940  __ Ldr(x4, &lit_64_update_after_pool);
21941
21942  masm.EmitLiteralPool(LiteralPool::kBranchRequired);
21943
21944  VIXL_ASSERT(lit_32_update_after_pool.IsPlaced());
21945  VIXL_ASSERT(lit_64_update_after_pool.IsPlaced());
21946  lit_32_update_after_pool.UpdateValue(128, &masm);
21947  lit_64_update_after_pool.UpdateValue(256, &masm);
21948
21949  END();
21950
21951  RUN();
21952
21953  ASSERT_EQUAL_64(32, x1);
21954  ASSERT_EQUAL_64(64, x2);
21955  ASSERT_EQUAL_64(128, x3);
21956  ASSERT_EQUAL_64(256, x4);
21957
21958  TEARDOWN();
21959}
21960
21961
21962TEST(literal_deletion_policies) {
21963  SETUP();
21964
21965  START();
21966
21967  // We cannot check exactly when the deletion of the literals occur, but we
21968  // check that usage of the deletion policies is not broken.
21969
21970  ASSERT_LITERAL_POOL_SIZE(0);
21971  LiteralPool* literal_pool = masm.GetLiteralPool();
21972
21973  Literal<int32_t> lit_manual(0xbad, literal_pool);
21974  Literal<int32_t>* lit_deleted_on_placement =
21975      new Literal<int32_t>(0xbad,
21976                           literal_pool,
21977                           RawLiteral::kDeletedOnPlacementByPool);
21978  Literal<int32_t>* lit_deleted_on_pool_destruction =
21979      new Literal<int32_t>(0xbad,
21980                           literal_pool,
21981                           RawLiteral::kDeletedOnPoolDestruction);
21982
21983  ASSERT_LITERAL_POOL_SIZE(0);
21984
21985  lit_manual.UpdateValue(32);
21986  lit_deleted_on_placement->UpdateValue(64);
21987
21988  __ Ldr(w1, &lit_manual);
21989  __ Ldr(w2, lit_deleted_on_placement);
21990  __ Ldr(w3, lit_deleted_on_pool_destruction);
21991
21992  masm.EmitLiteralPool(LiteralPool::kBranchRequired);
21993
21994  VIXL_ASSERT(lit_manual.IsPlaced());
21995  VIXL_ASSERT(lit_deleted_on_pool_destruction->IsPlaced());
21996  lit_deleted_on_pool_destruction->UpdateValue(128, &masm);
21997
21998  END();
21999
22000  RUN();
22001
22002  ASSERT_EQUAL_64(32, x1);
22003  ASSERT_EQUAL_64(64, x2);
22004  ASSERT_EQUAL_64(128, x3);
22005
22006  TEARDOWN();
22007}
22008
22009
22010}  // namespace vixl
22011