1
2//===- subzero/unittest/unittest/AssemblerX8664/TestUtil.h ------*- C++ -*-===//
3//
4//                        The Subzero Code Generator
5//
6// This file is distributed under the University of Illinois Open Source
7// License. See LICENSE.TXT for details.
8//
9//===----------------------------------------------------------------------===//
10//
11// Utility classes for testing the X8664 Assembler.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef ASSEMBLERX8664_TESTUTIL_H_
16#define ASSEMBLERX8664_TESTUTIL_H_
17
18#include "IceAssemblerX8664.h"
19
20#include "gtest/gtest.h"
21
22#if defined(__unix__)
23#include <sys/mman.h>
24#elif defined(_WIN32)
25#define NOMINMAX
26#include <Windows.h>
27#else
28#error "Platform unsupported"
29#endif
30
31#include <cassert>
32
33namespace Ice {
34namespace X8664 {
35namespace Test {
36
37class AssemblerX8664TestBase : public ::testing::Test {
38protected:
39  using Address = AssemblerX8664::Traits::Address;
40  using Cond = AssemblerX8664::Traits::Cond;
41  using GPRRegister = AssemblerX8664::Traits::GPRRegister;
42  using ByteRegister = AssemblerX8664::Traits::ByteRegister;
43  using Traits = AssemblerX8664::Traits;
44  using XmmRegister = AssemblerX8664::Traits::XmmRegister;
45
46// The following are "nicknames" for all possible GPRs in x86-64. With those, we
47// can use, e.g.,
48//
49//  Encoded_GPR_al()
50//
51// instead of GPRRegister::Encoded_Reg_eax for 8 bit operands. They also
52// introduce "regular" nicknames for legacy x86-32 register (e.g., eax becomes
53// r1; esp, r0).
54#define LegacyRegAliases(NewName, Name64, Name32, Name16, Name8)               \
55  static constexpr GPRRegister Encoded_GPR_##NewName() {                       \
56    return GPRRegister::Encoded_Reg_##Name32;                                  \
57  }                                                                            \
58  static constexpr GPRRegister Encoded_GPR_##NewName##q() {                    \
59    return GPRRegister::Encoded_Reg_##Name32;                                  \
60  }                                                                            \
61  static constexpr GPRRegister Encoded_GPR_##NewName##d() {                    \
62    return GPRRegister::Encoded_Reg_##Name32;                                  \
63  }                                                                            \
64  static constexpr GPRRegister Encoded_GPR_##NewName##w() {                    \
65    return GPRRegister::Encoded_Reg_##Name32;                                  \
66  }                                                                            \
67  static constexpr GPRRegister Encoded_GPR_##NewName##l() {                    \
68    return GPRRegister::Encoded_Reg_##Name32;                                  \
69  }                                                                            \
70  static constexpr ByteRegister Encoded_Bytereg_##NewName() {                  \
71    return ByteRegister::Encoded_8_Reg_##Name8;                                \
72  }                                                                            \
73  static constexpr GPRRegister Encoded_GPR_##Name64() {                        \
74    return GPRRegister::Encoded_Reg_##Name32;                                  \
75  }                                                                            \
76  static constexpr GPRRegister Encoded_GPR_##Name32() {                        \
77    return GPRRegister::Encoded_Reg_##Name32;                                  \
78  }                                                                            \
79  static constexpr GPRRegister Encoded_GPR_##Name16() {                        \
80    return GPRRegister::Encoded_Reg_##Name32;                                  \
81  }                                                                            \
82  static constexpr GPRRegister Encoded_GPR_##Name8() {                         \
83    return GPRRegister::Encoded_Reg_##Name32;                                  \
84  }
85#define NewRegAliases(Name)                                                    \
86  static constexpr GPRRegister Encoded_GPR_##Name() {                          \
87    return GPRRegister::Encoded_Reg_##Name##d;                                 \
88  }                                                                            \
89  static constexpr GPRRegister Encoded_GPR_##Name##q() {                       \
90    return GPRRegister::Encoded_Reg_##Name##d;                                 \
91  }                                                                            \
92  static constexpr GPRRegister Encoded_GPR_##Name##d() {                       \
93    return GPRRegister::Encoded_Reg_##Name##d;                                 \
94  }                                                                            \
95  static constexpr GPRRegister Encoded_GPR_##Name##w() {                       \
96    return GPRRegister::Encoded_Reg_##Name##d;                                 \
97  }                                                                            \
98  static constexpr GPRRegister Encoded_GPR_##Name##l() {                       \
99    return GPRRegister::Encoded_Reg_##Name##d;                                 \
100  }                                                                            \
101  static constexpr ByteRegister Encoded_Bytereg_##Name() {                     \
102    return ByteRegister::Encoded_8_Reg_##Name##l;                              \
103  }
104#define XmmRegAliases(Name)                                                    \
105  static constexpr XmmRegister Encoded_Xmm_##Name() {                          \
106    return XmmRegister::Encoded_Reg_##Name;                                    \
107  }
108  LegacyRegAliases(r0, rsp, esp, sp, spl);
109  LegacyRegAliases(r1, rax, eax, ax, al);
110  LegacyRegAliases(r2, rbx, ebx, bx, bl);
111  LegacyRegAliases(r3, rcx, ecx, cx, cl);
112  LegacyRegAliases(r4, rdx, edx, dx, dl);
113  LegacyRegAliases(r5, rbp, ebp, bp, bpl);
114  LegacyRegAliases(r6, rsi, esi, si, sil);
115  LegacyRegAliases(r7, rdi, edi, di, dil);
116  NewRegAliases(r8);
117  NewRegAliases(r9);
118  NewRegAliases(r10);
119  NewRegAliases(r11);
120  NewRegAliases(r12);
121  NewRegAliases(r13);
122  NewRegAliases(r14);
123  NewRegAliases(r15);
124  XmmRegAliases(xmm0);
125  XmmRegAliases(xmm1);
126  XmmRegAliases(xmm2);
127  XmmRegAliases(xmm3);
128  XmmRegAliases(xmm4);
129  XmmRegAliases(xmm5);
130  XmmRegAliases(xmm6);
131  XmmRegAliases(xmm7);
132  XmmRegAliases(xmm8);
133  XmmRegAliases(xmm9);
134  XmmRegAliases(xmm10);
135  XmmRegAliases(xmm11);
136  XmmRegAliases(xmm12);
137  XmmRegAliases(xmm13);
138  XmmRegAliases(xmm14);
139  XmmRegAliases(xmm15);
140#undef XmmRegAliases
141#undef NewRegAliases
142#undef LegacyRegAliases
143
144  AssemblerX8664TestBase() { reset(); }
145
146  void reset() { Assembler = makeUnique<AssemblerX8664>(); }
147
148  AssemblerX8664 *assembler() const { return Assembler.get(); }
149
150  size_t codeBytesSize() const { return Assembler->getBufferView().size(); }
151
152  const uint8_t *codeBytes() const {
153    return static_cast<const uint8_t *>(
154        static_cast<const void *>(Assembler->getBufferView().data()));
155  }
156
157private:
158  std::unique_ptr<AssemblerX8664> Assembler;
159};
160
161// __ is a helper macro. It allows test cases to emit X8664 assembly
162// instructions with
163//
164//   __ mov(GPRRegister::Reg_Eax, 1);
165//   __ ret();
166//
167// and so on. The idea of having this was "stolen" from dart's unit tests.
168#define __ (this->assembler())->
169
170// AssemblerX8664LowLevelTest verify that the "basic" instructions the tests
171// rely on are encoded correctly. Therefore, instead of executing the assembled
172// code, these tests will verify that the assembled bytes are sane.
173class AssemblerX8664LowLevelTest : public AssemblerX8664TestBase {
174protected:
175  // verifyBytes is a template helper that takes a Buffer, and a variable number
176  // of bytes. As the name indicates, it is used to verify the bytes for an
177  // instruction encoding.
178  template <int N, int I> static bool verifyBytes(const uint8_t *) {
179    static_assert(I == N, "Invalid template instantiation.");
180    return true;
181  }
182
183  template <int N, int I = 0, typename... Args>
184  static bool verifyBytes(const uint8_t *Buffer, uint8_t Byte,
185                          Args... OtherBytes) {
186    static_assert(I < N, "Invalid template instantiation.");
187    EXPECT_EQ(Byte, Buffer[I]) << "Byte " << (I + 1) << " of " << N;
188    return verifyBytes<N, I + 1>(Buffer, OtherBytes...) && Buffer[I] == Byte;
189  }
190};
191
192// After these tests we should have a sane environment; we know the following
193// work:
194//
195//  (*) zeroing eax, ebx, ecx, edx, edi, and esi;
196//  (*) call $4 instruction (used for ip materialization);
197//  (*) register push and pop;
198//  (*) cmp reg, reg; and
199//  (*) returning from functions.
200//
201// We can now dive into testing each emitting method in AssemblerX8664. Each
202// test will emit some instructions for performing the test. The assembled
203// instructions will operate in a "safe" environment. All x86-64 registers are
204// spilled to the program stack, and the registers are then zeroed out, with the
205// exception of %esp and %r9.
206//
207// The jitted code and the unittest code will share the same stack. Therefore,
208// test harnesses need to ensure it does not leave anything it pushed on the
209// stack.
210//
211// %r9 is initialized with a pointer for rIP-based addressing. This pointer is
212// used for position-independent access to a scratchpad area for use in tests.
213// In theory we could use rip-based addressing, but in practice that would
214// require creating fixups, which would, in turn, require creating a global
215// context. We therefore rely on the same technique used for pic code in x86-32
216// (i.e., IP materialization). Upon a test start up, a call(NextInstruction) is
217// executed. We then pop the return address from the stack, and use it for pic
218// addressing.
219//
220// The jitted code will look like the following:
221//
222// test:
223//       push   %r9
224//       call   test$materialize_ip
225// test$materialize_ip:                           <<------- %r9 will point here
226//       pop    %r9
227//       push   %rax
228//       push   %rbx
229//       push   %rcx
230//       push   %rdx
231//       push   %rbp
232//       push   %rdi
233//       push   %rsi
234//       push   %r8
235//       push   %r10
236//       push   %r11
237//       push   %r12
238//       push   %r13
239//       push   %r14
240//       push   %r15
241//       mov    $0, %rax
242//       mov    $0, %rbx
243//       mov    $0, %rcx
244//       mov    $0, %rdx
245//       mov    $0, %rbp
246//       mov    $0, %rdi
247//       mov    $0, %rsi
248//       mov    $0, %r8
249//       mov    $0, %r10
250//       mov    $0, %r11
251//       mov    $0, %r12
252//       mov    $0, %r13
253//       mov    $0, %r14
254//       mov    $0, %r15
255//
256//       << test code goes here >>
257//
258//       mov    %rax, {  0 + $ScratchpadOffset}(%rbp)
259//       mov    %rbx, {  8 + $ScratchpadOffset}(%rbp)
260//       mov    %rcx, { 16 + $ScratchpadOffset}(%rbp)
261//       mov    %rdx, { 24 + $ScratchpadOffset}(%rbp)
262//       mov    %rdi, { 32 + $ScratchpadOffset}(%rbp)
263//       mov    %rsi, { 40 + $ScratchpadOffset}(%rbp)
264//       mov    %rbp, { 48 + $ScratchpadOffset}(%rbp)
265//       mov    %rsp, { 56 + $ScratchpadOffset}(%rbp)
266//       mov    %r8,  { 64 + $ScratchpadOffset}(%rbp)
267//       mov    %r9,  { 72 + $ScratchpadOffset}(%rbp)
268//       mov    %r10, { 80 + $ScratchpadOffset}(%rbp)
269//       mov    %r11, { 88 + $ScratchpadOffset}(%rbp)
270//       mov    %r12, { 96 + $ScratchpadOffset}(%rbp)
271//       mov    %r13, {104 + $ScratchpadOffset}(%rbp)
272//       mov    %r14, {112 + $ScratchpadOffset}(%rbp)
273//       mov    %r15, {120 + $ScratchpadOffset}(%rbp)
274//       movups %xmm0,  {128 + $ScratchpadOffset}(%rbp)
275//       movups %xmm1,  {136 + $ScratchpadOffset}(%rbp)
276//       movups %xmm2,  {144 + $ScratchpadOffset}(%rbp)
277//       movups %xmm3,  {152 + $ScratchpadOffset}(%rbp)
278//       movups %xmm4,  {160 + $ScratchpadOffset}(%rbp)
279//       movups %xmm5,  {168 + $ScratchpadOffset}(%rbp)
280//       movups %xmm6,  {176 + $ScratchpadOffset}(%rbp)
281//       movups %xmm7,  {184 + $ScratchpadOffset}(%rbp)
282//       movups %xmm8,  {192 + $ScratchpadOffset}(%rbp)
283//       movups %xmm9,  {200 + $ScratchpadOffset}(%rbp)
284//       movups %xmm10, {208 + $ScratchpadOffset}(%rbp)
285//       movups %xmm11, {216 + $ScratchpadOffset}(%rbp)
286//       movups %xmm12, {224 + $ScratchpadOffset}(%rbp)
287//       movups %xmm13, {232 + $ScratchpadOffset}(%rbp)
288//       movups %xmm14, {240 + $ScratchpadOffset}(%rbp)
289//       movups %xmm15, {248 + $ScratchpadOffset}(%rbp)
290//
291//       pop    %r15
292//       pop    %r14
293//       pop    %r13
294//       pop    %r12
295//       pop    %r11
296//       pop    %r10
297//       pop    %r8
298//       pop    %rsi
299//       pop    %rdi
300//       pop    %rbp
301//       pop    %rdx
302//       pop    %rcx
303//       pop    %rbx
304//       pop    %rax
305//       pop    %r9
306//       ret
307//
308//      << ... >>
309//
310// scratchpad:                              <<------- accessed via $Offset(%ebp)
311//
312//      << test scratch area >>
313//
314// TODO(jpp): test the
315//
316//    mov %reg, $Offset(%ebp)
317//    movups %xmm, $Offset(%ebp)
318//
319// encodings using the low level assembler test ensuring that the register
320// values can be written to the scratchpad area.
321//
322// r9 was deliberately choosen so that every instruction accessing memory would
323// fail if the rex prefix was not emitted for it.
324class AssemblerX8664Test : public AssemblerX8664TestBase {
325protected:
326  // Dqword is used to represent 128-bit data types. The Dqword's contents are
327  // the same as the contents read from memory. Tests can then use the union
328  // members to verify the tests' outputs.
329  //
330  // NOTE: We want sizeof(Dqword) == sizeof(uint64_t) * 2. In other words, we
331  // want Dqword's contents to be **exactly** what the memory contents were so
332  // that we can do, e.g.,
333  //
334  // ...
335  // float Ret[4];
336  // // populate Ret
337  // return *reinterpret_cast<Dqword *>(&Ret);
338  //
339  // While being an ugly hack, this kind of return statements are used
340  // extensively in the PackedArith (see below) class.
341  union Dqword {
342    template <typename T0, typename T1, typename T2, typename T3,
343              typename = typename std::enable_if<
344                  std::is_floating_point<T0>::value>::type>
345    Dqword(T0 F0, T1 F1, T2 F2, T3 F3) {
346      F32[0] = F0;
347      F32[1] = F1;
348      F32[2] = F2;
349      F32[3] = F3;
350    }
351
352    template <typename T>
353    Dqword(typename std::enable_if<std::is_same<T, int32_t>::value, T>::type I0,
354           T I1, T I2, T I3) {
355      I32[0] = I0;
356      I32[1] = I1;
357      I32[2] = I2;
358      I32[3] = I3;
359    }
360
361    template <typename T>
362    Dqword(typename std::enable_if<std::is_same<T, uint64_t>::value, T>::type
363               U64_0,
364           T U64_1) {
365      U64[0] = U64_0;
366      U64[1] = U64_1;
367    }
368
369    template <typename T>
370    Dqword(typename std::enable_if<std::is_same<T, double>::value, T>::type D0,
371           T D1) {
372      F64[0] = D0;
373      F64[1] = D1;
374    }
375
376    bool operator==(const Dqword &Rhs) const {
377      return std::memcmp(this, &Rhs, sizeof(*this)) == 0;
378    }
379
380    double F64[2];
381    uint64_t U64[2];
382    int64_t I64[2];
383
384    float F32[4];
385    uint32_t U32[4];
386    int32_t I32[4];
387
388    uint16_t U16[8];
389    int16_t I16[8];
390
391    uint8_t U8[16];
392    int8_t I8[16];
393
394  private:
395    Dqword() = delete;
396  };
397
398  // As stated, we want this condition to hold, so we assert.
399  static_assert(sizeof(Dqword) == 2 * sizeof(uint64_t),
400                "Dqword has the wrong size.");
401
402  // PackedArith is an interface provider for Dqwords. PackedArith's C argument
403  // is the undelying Dqword's type, which is then used so that we can define
404  // operators in terms of C++ operators on the underlying elements' type.
405  template <typename C> class PackedArith {
406  public:
407    static constexpr uint32_t N = sizeof(Dqword) / sizeof(C);
408    static_assert(N * sizeof(C) == sizeof(Dqword),
409                  "Invalid template paramenter.");
410    static_assert((N & 1) == 0, "N should be divisible by 2");
411
412#define DefinePackedComparisonOperator(Op)                                     \
413  template <typename Container = C, int Size = N>                              \
414  typename std::enable_if<std::is_floating_point<Container>::value,            \
415                          Dqword>::type                                        \
416  operator Op(const Dqword &Rhs) const {                                       \
417    using ElemType =                                                           \
418        typename std::conditional<std::is_same<float, Container>::value,       \
419                                  int32_t, int64_t>::type;                     \
420    static_assert(sizeof(ElemType) == sizeof(Container),                       \
421                  "Check ElemType definition.");                               \
422    const ElemType *const RhsPtr =                                             \
423        reinterpret_cast<const ElemType *const>(&Rhs);                         \
424    const ElemType *const LhsPtr =                                             \
425        reinterpret_cast<const ElemType *const>(&Lhs);                         \
426    ElemType Ret[N];                                                           \
427    for (uint32_t i = 0; i < N; ++i) {                                         \
428      Ret[i] = (LhsPtr[i] Op RhsPtr[i]) ? -1 : 0;                              \
429    }                                                                          \
430    return *reinterpret_cast<Dqword *>(&Ret);                                  \
431  }
432
433    DefinePackedComparisonOperator(< );
434    DefinePackedComparisonOperator(<= );
435    DefinePackedComparisonOperator(> );
436    DefinePackedComparisonOperator(>= );
437    DefinePackedComparisonOperator(== );
438    DefinePackedComparisonOperator(!= );
439
440#undef DefinePackedComparisonOperator
441
442#define DefinePackedOrdUnordComparisonOperator(Op, Ordered)                    \
443  template <typename Container = C, int Size = N>                              \
444  typename std::enable_if<std::is_floating_point<Container>::value,            \
445                          Dqword>::type                                        \
446  Op(const Dqword &Rhs) const {                                                \
447    using ElemType =                                                           \
448        typename std::conditional<std::is_same<float, Container>::value,       \
449                                  int32_t, int64_t>::type;                     \
450    static_assert(sizeof(ElemType) == sizeof(Container),                       \
451                  "Check ElemType definition.");                               \
452    const Container *const RhsPtr =                                            \
453        reinterpret_cast<const Container *const>(&Rhs);                        \
454    const Container *const LhsPtr =                                            \
455        reinterpret_cast<const Container *const>(&Lhs);                        \
456    ElemType Ret[N];                                                           \
457    for (uint32_t i = 0; i < N; ++i) {                                         \
458      Ret[i] = (!(LhsPtr[i] == LhsPtr[i]) || !(RhsPtr[i] == RhsPtr[i])) !=     \
459                       (Ordered)                                               \
460                   ? -1                                                        \
461                   : 0;                                                        \
462    }                                                                          \
463    return *reinterpret_cast<Dqword *>(&Ret);                                  \
464  }
465
466    DefinePackedOrdUnordComparisonOperator(ord, true);
467    DefinePackedOrdUnordComparisonOperator(unord, false);
468#undef DefinePackedOrdUnordComparisonOperator
469
470#define DefinePackedArithOperator(Op, RhsIndexChanges, NeedsInt)               \
471  template <typename Container = C, int Size = N>                              \
472  Dqword operator Op(const Dqword &Rhs) const {                                \
473    using ElemTypeForFp = typename std::conditional<                           \
474        !(NeedsInt), Container,                                                \
475        typename std::conditional<                                             \
476            std::is_same<Container, float>::value, uint32_t,                   \
477            typename std::conditional<std::is_same<Container, double>::value,  \
478                                      uint64_t, void>::type>::type>::type;     \
479    using ElemType =                                                           \
480        typename std::conditional<std::is_integral<Container>::value,          \
481                                  Container, ElemTypeForFp>::type;             \
482    static_assert(!std::is_same<void, ElemType>::value,                        \
483                  "Check ElemType definition.");                               \
484    const ElemType *const RhsPtr =                                             \
485        reinterpret_cast<const ElemType *const>(&Rhs);                         \
486    const ElemType *const LhsPtr =                                             \
487        reinterpret_cast<const ElemType *const>(&Lhs);                         \
488    ElemType Ret[N];                                                           \
489    for (uint32_t i = 0; i < N; ++i) {                                         \
490      Ret[i] = LhsPtr[i] Op RhsPtr[(RhsIndexChanges) ? i : 0];                 \
491    }                                                                          \
492    return *reinterpret_cast<Dqword *>(&Ret);                                  \
493  }
494
495    DefinePackedArithOperator(>>, false, true);
496    DefinePackedArithOperator(<<, false, true);
497    DefinePackedArithOperator(+, true, false);
498    DefinePackedArithOperator(-, true, false);
499    DefinePackedArithOperator(/, true, false);
500    DefinePackedArithOperator(&, true, true);
501    DefinePackedArithOperator(|, true, true);
502    DefinePackedArithOperator (^, true, true);
503
504#undef DefinePackedArithOperator
505
506#define DefinePackedArithShiftImm(Op)                                          \
507  template <typename Container = C, int Size = N>                              \
508  Dqword operator Op(uint8_t imm) const {                                      \
509    const Container *const LhsPtr =                                            \
510        reinterpret_cast<const Container *const>(&Lhs);                        \
511    Container Ret[N];                                                          \
512    for (uint32_t i = 0; i < N; ++i) {                                         \
513      Ret[i] = LhsPtr[i] Op imm;                                               \
514    }                                                                          \
515    return *reinterpret_cast<Dqword *>(&Ret);                                  \
516  }
517
518    DefinePackedArithShiftImm(>> );
519    DefinePackedArithShiftImm(<< );
520
521#undef DefinePackedArithShiftImm
522
523    template <typename Container = C, int Size = N>
524    typename std::enable_if<std::is_signed<Container>::value ||
525                                std::is_floating_point<Container>::value,
526                            Dqword>::type
527    operator*(const Dqword &Rhs) const {
528      static_assert((std::is_integral<Container>::value &&
529                     sizeof(Container) < sizeof(uint64_t)) ||
530                        std::is_floating_point<Container>::value,
531                    "* is only defined for i(8|16|32), and fp types.");
532
533      const Container *const RhsPtr =
534          reinterpret_cast<const Container *const>(&Rhs);
535      const Container *const LhsPtr =
536          reinterpret_cast<const Container *const>(&Lhs);
537      Container Ret[Size];
538      for (uint32_t i = 0; i < Size; ++i) {
539        Ret[i] = LhsPtr[i] * RhsPtr[i];
540      }
541      return *reinterpret_cast<Dqword *>(&Ret);
542    }
543
544    template <typename Container = C, int Size = N,
545              typename = typename std::enable_if<
546                  !std::is_signed<Container>::value>::type>
547    Dqword operator*(const Dqword &Rhs) const {
548      static_assert(std::is_integral<Container>::value &&
549                        sizeof(Container) < sizeof(uint64_t),
550                    "* is only defined for ui(8|16|32)");
551      using NextType = typename std::conditional<
552          sizeof(Container) == 1, uint16_t,
553          typename std::conditional<sizeof(Container) == 2, uint32_t,
554                                    uint64_t>::type>::type;
555      static_assert(sizeof(Container) * 2 == sizeof(NextType),
556                    "Unexpected size");
557
558      const Container *const RhsPtr =
559          reinterpret_cast<const Container *const>(&Rhs);
560      const Container *const LhsPtr =
561          reinterpret_cast<const Container *const>(&Lhs);
562      NextType Ret[Size / 2];
563      for (uint32_t i = 0; i < Size; i += 2) {
564        Ret[i / 2] =
565            static_cast<NextType>(LhsPtr[i]) * static_cast<NextType>(RhsPtr[i]);
566      }
567      return *reinterpret_cast<Dqword *>(&Ret);
568    }
569
570    template <typename Container = C, int Size = N>
571    PackedArith<Container> operator~() const {
572      const Container *const LhsPtr =
573          reinterpret_cast<const Container *const>(&Lhs);
574      Container Ret[Size];
575      for (uint32_t i = 0; i < Size; ++i) {
576        Ret[i] = ~LhsPtr[i];
577      }
578      return PackedArith<Container>(*reinterpret_cast<Dqword *>(&Ret));
579    }
580
581#define MinMaxOperations(Name, Suffix)                                         \
582  template <typename Container = C, int Size = N>                              \
583  Dqword Name##Suffix(const Dqword &Rhs) const {                               \
584    static_assert(std::is_floating_point<Container>::value,                    \
585                  #Name #Suffix "ps is only available for fp.");               \
586    const Container *const RhsPtr =                                            \
587        reinterpret_cast<const Container *const>(&Rhs);                        \
588    const Container *const LhsPtr =                                            \
589        reinterpret_cast<const Container *const>(&Lhs);                        \
590    Container Ret[Size];                                                       \
591    for (uint32_t i = 0; i < Size; ++i) {                                      \
592      Ret[i] = std::Name(LhsPtr[i], RhsPtr[i]);                                \
593    }                                                                          \
594    return *reinterpret_cast<Dqword *>(&Ret);                                  \
595  }
596
597    MinMaxOperations(max, ps);
598    MinMaxOperations(max, pd);
599    MinMaxOperations(min, ps);
600    MinMaxOperations(min, pd);
601#undef MinMaxOperations
602
603    template <typename Container = C, int Size = N>
604    Dqword blendWith(const Dqword &Rhs, const Dqword &Mask) const {
605      using MaskType = typename std::conditional<
606          sizeof(Container) == 1, int8_t,
607          typename std::conditional<sizeof(Container) == 2, int16_t,
608                                    int32_t>::type>::type;
609      static_assert(sizeof(MaskType) == sizeof(Container),
610                    "MaskType has the wrong size.");
611      const Container *const RhsPtr =
612          reinterpret_cast<const Container *const>(&Rhs);
613      const Container *const LhsPtr =
614          reinterpret_cast<const Container *const>(&Lhs);
615      const MaskType *const MaskPtr =
616          reinterpret_cast<const MaskType *const>(&Mask);
617      Container Ret[Size];
618      for (int i = 0; i < Size; ++i) {
619        Ret[i] = ((MaskPtr[i] < 0) ? RhsPtr : LhsPtr)[i];
620      }
621      return *reinterpret_cast<Dqword *>(&Ret);
622    }
623
624  private:
625    // The AssemblerX8664Test class needs to be a friend so that it can create
626    // PackedArith objects (see below.)
627    friend class AssemblerX8664Test;
628
629    explicit PackedArith(const Dqword &MyLhs) : Lhs(MyLhs) {}
630
631    // Lhs can't be a & because operator~ returns a temporary object that needs
632    // access to its own Dqword.
633    const Dqword Lhs;
634  };
635
636  // Named constructor for PackedArith objects.
637  template <typename C> static PackedArith<C> packedAs(const Dqword &D) {
638    return PackedArith<C>(D);
639  }
640
641  AssemblerX8664Test() { reset(); }
642
643  void reset() {
644    AssemblerX8664TestBase::reset();
645
646    NeedsEpilogue = true;
647    // These dwords are allocated for saving the GPR state after the jitted code
648    // runs.
649    NumAllocatedDwords = AssembledTest::ScratchpadSlots;
650    addPrologue();
651  }
652
653  // AssembledTest is a wrapper around a PROT_EXEC mmap'ed buffer. This buffer
654  // contains both the test code as well as prologue/epilogue, and the
655  // scratchpad area that tests may use -- all tests use this scratchpad area
656  // for storing the processor's registers after the tests executed. This class
657  // also exposes helper methods for reading the register state after test
658  // execution, as well as for reading the scratchpad area.
659  class AssembledTest {
660    AssembledTest() = delete;
661    AssembledTest(const AssembledTest &) = delete;
662    AssembledTest &operator=(const AssembledTest &) = delete;
663
664  public:
665    static constexpr uint32_t MaximumCodeSize = 1 << 20;
666    static constexpr uint32_t raxSlot() { return 0; }
667    static constexpr uint32_t rbxSlot() { return 2; }
668    static constexpr uint32_t rcxSlot() { return 4; }
669    static constexpr uint32_t rdxSlot() { return 6; }
670    static constexpr uint32_t rdiSlot() { return 8; }
671    static constexpr uint32_t rsiSlot() { return 10; }
672    static constexpr uint32_t rbpSlot() { return 12; }
673    static constexpr uint32_t rspSlot() { return 14; }
674    static constexpr uint32_t r8Slot() { return 16; }
675    static constexpr uint32_t r9Slot() { return 18; }
676    static constexpr uint32_t r10Slot() { return 20; }
677    static constexpr uint32_t r11Slot() { return 22; }
678    static constexpr uint32_t r12Slot() { return 24; }
679    static constexpr uint32_t r13Slot() { return 26; }
680    static constexpr uint32_t r14Slot() { return 28; }
681    static constexpr uint32_t r15Slot() { return 30; }
682
683    // save 4 dwords for each xmm registers.
684    static constexpr uint32_t xmm0Slot() { return 32; }
685    static constexpr uint32_t xmm1Slot() { return 36; }
686    static constexpr uint32_t xmm2Slot() { return 40; }
687    static constexpr uint32_t xmm3Slot() { return 44; }
688    static constexpr uint32_t xmm4Slot() { return 48; }
689    static constexpr uint32_t xmm5Slot() { return 52; }
690    static constexpr uint32_t xmm6Slot() { return 56; }
691    static constexpr uint32_t xmm7Slot() { return 60; }
692    static constexpr uint32_t xmm8Slot() { return 64; }
693    static constexpr uint32_t xmm9Slot() { return 68; }
694    static constexpr uint32_t xmm10Slot() { return 72; }
695    static constexpr uint32_t xmm11Slot() { return 76; }
696    static constexpr uint32_t xmm12Slot() { return 80; }
697    static constexpr uint32_t xmm13Slot() { return 84; }
698    static constexpr uint32_t xmm14Slot() { return 88; }
699    static constexpr uint32_t xmm15Slot() { return 92; }
700
701    static constexpr uint32_t ScratchpadSlots = 96;
702
703    AssembledTest(const uint8_t *Data, const size_t MySize,
704                  const size_t ExtraStorageDwords)
705        : Size(MaximumCodeSize + 4 * ExtraStorageDwords) {
706      // MaxCodeSize is needed because EXPECT_LT needs a symbol with a name --
707      // probably a compiler bug?
708      uint32_t MaxCodeSize = MaximumCodeSize;
709      EXPECT_LT(MySize, MaxCodeSize);
710      assert(MySize < MaximumCodeSize);
711
712#if defined(__unix__)
713      ExecutableData = mmap(nullptr, Size, PROT_WRITE | PROT_READ | PROT_EXEC,
714                            MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0);
715      EXPECT_NE(MAP_FAILED, ExecutableData) << strerror(errno);
716      assert(MAP_FAILED != ExecutableData);
717#elif defined(_WIN32)
718      ExecutableData = VirtualAlloc(NULL, Size, MEM_COMMIT | MEM_RESERVE,
719                                    PAGE_EXECUTE_READWRITE);
720      EXPECT_NE(nullptr, ExecutableData) << strerror(errno);
721      assert(nullptr != ExecutableData);
722#else
723#error "Platform unsupported"
724#endif
725
726      std::memcpy(ExecutableData, Data, MySize);
727    }
728
729    // We allow AssembledTest to be moved so that we can return objects of
730    // this type.
731    AssembledTest(AssembledTest &&Buffer)
732        : ExecutableData(Buffer.ExecutableData), Size(Buffer.Size) {
733      Buffer.ExecutableData = nullptr;
734      Buffer.Size = 0;
735    }
736
737    AssembledTest &operator=(AssembledTest &&Buffer) {
738      ExecutableData = Buffer.ExecutableData;
739      Buffer.ExecutableData = nullptr;
740      Size = Buffer.Size;
741      Buffer.Size = 0;
742      return *this;
743    }
744
745    ~AssembledTest() {
746      if (ExecutableData != nullptr) {
747#if defined(__unix__)
748        munmap(ExecutableData, Size);
749#elif defined(_WIN32)
750        VirtualFree(ExecutableData, 0, MEM_RELEASE);
751#else
752#error "Platform unsupported"
753#endif
754        ExecutableData = nullptr;
755      }
756    }
757
758    void run() const { reinterpret_cast<void (*)()>(ExecutableData)(); }
759
760#define LegacyRegAccessors(NewName, Name64, Name32, Name16, Name8)             \
761  static_assert(Encoded_GPR_##NewName() == Encoded_GPR_##Name64(),             \
762                "Invalid aliasing.");                                          \
763  uint64_t NewName() const {                                                   \
764    return contentsOfQword(AssembledTest::Name64##Slot());                     \
765  }                                                                            \
766  static_assert(Encoded_GPR_##NewName##q() == Encoded_GPR_##Name64(),          \
767                "Invalid aliasing.");                                          \
768  uint64_t NewName##q() const {                                                \
769    return contentsOfQword(AssembledTest::Name64##Slot());                     \
770  }                                                                            \
771  static_assert(Encoded_GPR_##NewName##d() == Encoded_GPR_##Name64(),          \
772                "Invalid aliasing.");                                          \
773  uint32_t NewName##d() const {                                                \
774    return contentsOfQword(AssembledTest::Name64##Slot());                     \
775  }                                                                            \
776  static_assert(Encoded_GPR_##NewName##w() == Encoded_GPR_##Name64(),          \
777                "Invalid aliasing.");                                          \
778  uint16_t NewName##w() const {                                                \
779    return contentsOfQword(AssembledTest::Name64##Slot());                     \
780  }                                                                            \
781  static_assert(Encoded_GPR_##NewName##l() == Encoded_GPR_##Name64(),          \
782                "Invalid aliasing.");                                          \
783  uint8_t NewName##l() const {                                                 \
784    return contentsOfQword(AssembledTest::Name64##Slot());                     \
785  }                                                                            \
786  static_assert(Encoded_GPR_##Name64() == Encoded_GPR_##Name64(),              \
787                "Invalid aliasing.");                                          \
788  uint64_t Name64() const {                                                    \
789    return contentsOfQword(AssembledTest::Name64##Slot());                     \
790  }                                                                            \
791  static_assert(Encoded_GPR_##Name32() == Encoded_GPR_##Name64(),              \
792                "Invalid aliasing.");                                          \
793  uint32_t Name32() const {                                                    \
794    return contentsOfQword(AssembledTest::Name64##Slot());                     \
795  }                                                                            \
796  static_assert(Encoded_GPR_##Name16() == Encoded_GPR_##Name64(),              \
797                "Invalid aliasing.");                                          \
798  uint16_t Name16() const {                                                    \
799    return contentsOfQword(AssembledTest::Name64##Slot());                     \
800  }                                                                            \
801  static_assert(Encoded_GPR_##Name8() == Encoded_GPR_##Name64(),               \
802                "Invalid aliasing.");                                          \
803  uint8_t Name8() const {                                                      \
804    return contentsOfQword(AssembledTest::Name64##Slot());                     \
805  }
806#define NewRegAccessors(NewName)                                               \
807  uint64_t NewName() const {                                                   \
808    return contentsOfQword(AssembledTest::NewName##Slot());                    \
809  }                                                                            \
810  uint64_t NewName##q() const {                                                \
811    return contentsOfQword(AssembledTest::NewName##Slot());                    \
812  }                                                                            \
813  uint32_t NewName##d() const {                                                \
814    return contentsOfQword(AssembledTest::NewName##Slot());                    \
815  }                                                                            \
816  uint16_t NewName##w() const {                                                \
817    return contentsOfQword(AssembledTest::NewName##Slot());                    \
818  }                                                                            \
819  uint8_t NewName##l() const {                                                 \
820    return contentsOfQword(AssembledTest::NewName##Slot());                    \
821  }
822#define XmmRegAccessor(Name)                                                   \
823  template <typename T> T Name() const {                                       \
824    return xmm<T>(AssembledTest::Name##Slot());                                \
825  }
826    LegacyRegAccessors(r0, rsp, esp, sp, spl);
827    LegacyRegAccessors(r1, rax, eax, ax, al);
828    LegacyRegAccessors(r2, rbx, ebx, bx, bl);
829    LegacyRegAccessors(r3, rcx, ecx, cx, cl);
830    LegacyRegAccessors(r4, rdx, edx, dx, dl);
831    LegacyRegAccessors(r5, rbp, ebp, bp, bpl);
832    LegacyRegAccessors(r6, rsi, esi, si, sil);
833    LegacyRegAccessors(r7, rdi, edi, di, dil);
834    NewRegAccessors(r8);
835    NewRegAccessors(r9);
836    NewRegAccessors(r10);
837    NewRegAccessors(r11);
838    NewRegAccessors(r12);
839    NewRegAccessors(r13);
840    NewRegAccessors(r14);
841    NewRegAccessors(r15);
842    XmmRegAccessor(xmm0);
843    XmmRegAccessor(xmm1);
844    XmmRegAccessor(xmm2);
845    XmmRegAccessor(xmm3);
846    XmmRegAccessor(xmm4);
847    XmmRegAccessor(xmm5);
848    XmmRegAccessor(xmm6);
849    XmmRegAccessor(xmm7);
850    XmmRegAccessor(xmm8);
851    XmmRegAccessor(xmm9);
852    XmmRegAccessor(xmm10);
853    XmmRegAccessor(xmm11);
854    XmmRegAccessor(xmm12);
855    XmmRegAccessor(xmm13);
856    XmmRegAccessor(xmm14);
857    XmmRegAccessor(xmm15);
858#undef XmmRegAccessor
859#undef NewRegAccessors
860#undef LegacyRegAccessors
861
862    // contentsOfDword is used for reading the values in the scratchpad area.
863    // Valid arguments are the dword ids returned by
864    // AssemblerX8664Test::allocateDword() -- other inputs are considered
865    // invalid, and are not guaranteed to work if the implementation changes.
866    template <typename T = uint32_t, typename = typename std::enable_if<
867                                         sizeof(T) == sizeof(uint32_t)>::type>
868    T contentsOfDword(uint32_t Dword) const {
869      return *reinterpret_cast<T *>(static_cast<uint8_t *>(ExecutableData) +
870                                    dwordOffset(Dword));
871    }
872
873    template <typename T = uint64_t, typename = typename std::enable_if<
874                                         sizeof(T) == sizeof(uint64_t)>::type>
875    T contentsOfQword(uint32_t InitialDword) const {
876      return *reinterpret_cast<T *>(static_cast<uint8_t *>(ExecutableData) +
877                                    dwordOffset(InitialDword));
878    }
879
880    Dqword contentsOfDqword(uint32_t InitialDword) const {
881      return *reinterpret_cast<Dqword *>(
882                 static_cast<uint8_t *>(ExecutableData) +
883                 dwordOffset(InitialDword));
884    }
885
886    template <typename T = uint32_t, typename = typename std::enable_if<
887                                         sizeof(T) == sizeof(uint32_t)>::type>
888    void setDwordTo(uint32_t Dword, T value) {
889      *reinterpret_cast<uint32_t *>(static_cast<uint8_t *>(ExecutableData) +
890                                    dwordOffset(Dword)) =
891          *reinterpret_cast<uint32_t *>(&value);
892    }
893
894    template <typename T = uint64_t, typename = typename std::enable_if<
895                                         sizeof(T) == sizeof(uint64_t)>::type>
896    void setQwordTo(uint32_t InitialDword, T value) {
897      *reinterpret_cast<uint64_t *>(static_cast<uint8_t *>(ExecutableData) +
898                                    dwordOffset(InitialDword)) =
899          *reinterpret_cast<uint64_t *>(&value);
900    }
901
902    void setDqwordTo(uint32_t InitialDword, const Dqword &qdword) {
903      setQwordTo(InitialDword, qdword.U64[0]);
904      setQwordTo(InitialDword + 2, qdword.U64[1]);
905    }
906
907  private:
908    template <typename T>
909    typename std::enable_if<std::is_same<T, Dqword>::value, Dqword>::type
910    xmm(uint8_t Slot) const {
911      return contentsOfDqword(Slot);
912    }
913
914    template <typename T>
915    typename std::enable_if<!std::is_same<T, Dqword>::value, T>::type
916    xmm(uint8_t Slot) const {
917      constexpr bool TIs64Bit = sizeof(T) == sizeof(uint64_t);
918      using _64BitType = typename std::conditional<TIs64Bit, T, uint64_t>::type;
919      using _32BitType = typename std::conditional<TIs64Bit, uint32_t, T>::type;
920      if (TIs64Bit) {
921        return contentsOfQword<_64BitType>(Slot);
922      }
923      return contentsOfDword<_32BitType>(Slot);
924    }
925
926    static uint32_t dwordOffset(uint32_t Index) {
927      return MaximumCodeSize + (Index * 4);
928    }
929
930    void *ExecutableData = nullptr;
931    size_t Size;
932  };
933
934  // assemble created an AssembledTest with the jitted code. The first time
935  // assemble is executed it will add the epilogue to the jitted code (which is
936  // the reason why this method is not const qualified.
937  AssembledTest assemble() {
938    if (NeedsEpilogue) {
939      addEpilogue();
940    }
941    NeedsEpilogue = false;
942
943    for (const auto *Fixup : assembler()->fixups()) {
944      Fixup->emitOffset(assembler());
945    }
946
947    return AssembledTest(codeBytes(), codeBytesSize(), NumAllocatedDwords);
948  }
949
950  // Allocates a new dword slot in the test's scratchpad area.
951  uint32_t allocateDword() { return NumAllocatedDwords++; }
952
953  // Allocates a new qword slot in the test's scratchpad area.
954  uint32_t allocateQword() {
955    uint32_t InitialDword = allocateDword();
956    allocateDword();
957    return InitialDword;
958  }
959
960  // Allocates a new dqword slot in the test's scratchpad area.
961  uint32_t allocateDqword() {
962    uint32_t InitialDword = allocateQword();
963    allocateQword();
964    return InitialDword;
965  }
966
967  Address dwordAddress(uint32_t Dword) {
968    return Address(Encoded_GPR_r9(), dwordDisp(Dword), nullptr);
969  }
970
971private:
972  // e??SlotAddress returns an AssemblerX8664::Traits::Address that can be used
973  // by the test cases to encode an address operand for accessing the slot for
974  // the specified register. These are all private for, when jitting the test
975  // code, tests should not tamper with these values. Besides, during the test
976  // execution these slots' contents are undefined and should not be accessed.
977  Address raxSlotAddress() { return dwordAddress(AssembledTest::raxSlot()); }
978  Address rbxSlotAddress() { return dwordAddress(AssembledTest::rbxSlot()); }
979  Address rcxSlotAddress() { return dwordAddress(AssembledTest::rcxSlot()); }
980  Address rdxSlotAddress() { return dwordAddress(AssembledTest::rdxSlot()); }
981  Address rdiSlotAddress() { return dwordAddress(AssembledTest::rdiSlot()); }
982  Address rsiSlotAddress() { return dwordAddress(AssembledTest::rsiSlot()); }
983  Address rbpSlotAddress() { return dwordAddress(AssembledTest::rbpSlot()); }
984  Address rspSlotAddress() { return dwordAddress(AssembledTest::rspSlot()); }
985  Address r8SlotAddress() { return dwordAddress(AssembledTest::r8Slot()); }
986  Address r9SlotAddress() { return dwordAddress(AssembledTest::r9Slot()); }
987  Address r10SlotAddress() { return dwordAddress(AssembledTest::r10Slot()); }
988  Address r11SlotAddress() { return dwordAddress(AssembledTest::r11Slot()); }
989  Address r12SlotAddress() { return dwordAddress(AssembledTest::r12Slot()); }
990  Address r13SlotAddress() { return dwordAddress(AssembledTest::r13Slot()); }
991  Address r14SlotAddress() { return dwordAddress(AssembledTest::r14Slot()); }
992  Address r15SlotAddress() { return dwordAddress(AssembledTest::r15Slot()); }
993  Address xmm0SlotAddress() { return dwordAddress(AssembledTest::xmm0Slot()); }
994  Address xmm1SlotAddress() { return dwordAddress(AssembledTest::xmm1Slot()); }
995  Address xmm2SlotAddress() { return dwordAddress(AssembledTest::xmm2Slot()); }
996  Address xmm3SlotAddress() { return dwordAddress(AssembledTest::xmm3Slot()); }
997  Address xmm4SlotAddress() { return dwordAddress(AssembledTest::xmm4Slot()); }
998  Address xmm5SlotAddress() { return dwordAddress(AssembledTest::xmm5Slot()); }
999  Address xmm6SlotAddress() { return dwordAddress(AssembledTest::xmm6Slot()); }
1000  Address xmm7SlotAddress() { return dwordAddress(AssembledTest::xmm7Slot()); }
1001  Address xmm8SlotAddress() { return dwordAddress(AssembledTest::xmm8Slot()); }
1002  Address xmm9SlotAddress() { return dwordAddress(AssembledTest::xmm9Slot()); }
1003  Address xmm10SlotAddress() {
1004    return dwordAddress(AssembledTest::xmm10Slot());
1005  }
1006  Address xmm11SlotAddress() {
1007    return dwordAddress(AssembledTest::xmm11Slot());
1008  }
1009  Address xmm12SlotAddress() {
1010    return dwordAddress(AssembledTest::xmm12Slot());
1011  }
1012  Address xmm13SlotAddress() {
1013    return dwordAddress(AssembledTest::xmm13Slot());
1014  }
1015  Address xmm14SlotAddress() {
1016    return dwordAddress(AssembledTest::xmm14Slot());
1017  }
1018  Address xmm15SlotAddress() {
1019    return dwordAddress(AssembledTest::xmm15Slot());
1020  }
1021
1022  // Returns the displacement that should be used when accessing the specified
1023  // Dword in the scratchpad area. It needs to adjust for the initial
1024  // instructions that are emitted before the call that materializes the IP
1025  // register.
1026  uint32_t dwordDisp(uint32_t Dword) const {
1027    EXPECT_LT(Dword, NumAllocatedDwords);
1028    assert(Dword < NumAllocatedDwords);
1029    static constexpr uint8_t PushR9Bytes = 2;
1030    static constexpr uint8_t CallImmBytes = 5;
1031    return AssembledTest::MaximumCodeSize + (Dword * 4) -
1032           (PushR9Bytes + CallImmBytes);
1033  }
1034
1035  void addPrologue() {
1036    __ pushl(Encoded_GPR_r9());
1037    __ call(Immediate(4));
1038    __ popl(Encoded_GPR_r9());
1039
1040    __ pushl(Encoded_GPR_rax());
1041    __ pushl(Encoded_GPR_rbx());
1042    __ pushl(Encoded_GPR_rcx());
1043    __ pushl(Encoded_GPR_rdx());
1044    __ pushl(Encoded_GPR_rbp());
1045    __ pushl(Encoded_GPR_rdi());
1046    __ pushl(Encoded_GPR_rsi());
1047    __ pushl(Encoded_GPR_r8());
1048    __ pushl(Encoded_GPR_r10());
1049    __ pushl(Encoded_GPR_r11());
1050    __ pushl(Encoded_GPR_r12());
1051    __ pushl(Encoded_GPR_r13());
1052    __ pushl(Encoded_GPR_r14());
1053    __ pushl(Encoded_GPR_r15());
1054
1055    __ mov(IceType_i32, Encoded_GPR_rax(), Immediate(0x00));
1056    __ mov(IceType_i32, Encoded_GPR_rbx(), Immediate(0x00));
1057    __ mov(IceType_i32, Encoded_GPR_rcx(), Immediate(0x00));
1058    __ mov(IceType_i32, Encoded_GPR_rdx(), Immediate(0x00));
1059    __ mov(IceType_i32, Encoded_GPR_rbp(), Immediate(0x00));
1060    __ mov(IceType_i32, Encoded_GPR_rdi(), Immediate(0x00));
1061    __ mov(IceType_i32, Encoded_GPR_rsi(), Immediate(0x00));
1062    __ mov(IceType_i32, Encoded_GPR_r8(), Immediate(0x00));
1063    __ mov(IceType_i32, Encoded_GPR_r10(), Immediate(0x00));
1064    __ mov(IceType_i32, Encoded_GPR_r11(), Immediate(0x00));
1065    __ mov(IceType_i32, Encoded_GPR_r12(), Immediate(0x00));
1066    __ mov(IceType_i32, Encoded_GPR_r13(), Immediate(0x00));
1067    __ mov(IceType_i32, Encoded_GPR_r14(), Immediate(0x00));
1068    __ mov(IceType_i32, Encoded_GPR_r15(), Immediate(0x00));
1069  }
1070
1071  void addEpilogue() {
1072    __ mov(IceType_i64, raxSlotAddress(), Encoded_GPR_rax());
1073    __ mov(IceType_i64, rbxSlotAddress(), Encoded_GPR_rbx());
1074    __ mov(IceType_i64, rcxSlotAddress(), Encoded_GPR_rcx());
1075    __ mov(IceType_i64, rdxSlotAddress(), Encoded_GPR_rdx());
1076    __ mov(IceType_i64, rdiSlotAddress(), Encoded_GPR_rdi());
1077    __ mov(IceType_i64, rsiSlotAddress(), Encoded_GPR_rsi());
1078    __ mov(IceType_i64, rbpSlotAddress(), Encoded_GPR_rbp());
1079    __ mov(IceType_i64, rspSlotAddress(), Encoded_GPR_rsp());
1080    __ mov(IceType_i64, r8SlotAddress(), Encoded_GPR_r8());
1081    __ mov(IceType_i64, r9SlotAddress(), Encoded_GPR_r9());
1082    __ mov(IceType_i64, r10SlotAddress(), Encoded_GPR_r10());
1083    __ mov(IceType_i64, r11SlotAddress(), Encoded_GPR_r11());
1084    __ mov(IceType_i64, r12SlotAddress(), Encoded_GPR_r12());
1085    __ mov(IceType_i64, r13SlotAddress(), Encoded_GPR_r13());
1086    __ mov(IceType_i64, r14SlotAddress(), Encoded_GPR_r14());
1087    __ mov(IceType_i64, r15SlotAddress(), Encoded_GPR_r15());
1088    __ movups(xmm0SlotAddress(), Encoded_Xmm_xmm0());
1089    __ movups(xmm1SlotAddress(), Encoded_Xmm_xmm1());
1090    __ movups(xmm2SlotAddress(), Encoded_Xmm_xmm2());
1091    __ movups(xmm3SlotAddress(), Encoded_Xmm_xmm3());
1092    __ movups(xmm4SlotAddress(), Encoded_Xmm_xmm4());
1093    __ movups(xmm5SlotAddress(), Encoded_Xmm_xmm5());
1094    __ movups(xmm6SlotAddress(), Encoded_Xmm_xmm6());
1095    __ movups(xmm7SlotAddress(), Encoded_Xmm_xmm7());
1096    __ movups(xmm8SlotAddress(), Encoded_Xmm_xmm8());
1097    __ movups(xmm9SlotAddress(), Encoded_Xmm_xmm9());
1098    __ movups(xmm10SlotAddress(), Encoded_Xmm_xmm10());
1099    __ movups(xmm11SlotAddress(), Encoded_Xmm_xmm11());
1100    __ movups(xmm12SlotAddress(), Encoded_Xmm_xmm12());
1101    __ movups(xmm13SlotAddress(), Encoded_Xmm_xmm13());
1102    __ movups(xmm14SlotAddress(), Encoded_Xmm_xmm14());
1103    __ movups(xmm15SlotAddress(), Encoded_Xmm_xmm15());
1104
1105    __ popl(Encoded_GPR_r15());
1106    __ popl(Encoded_GPR_r14());
1107    __ popl(Encoded_GPR_r13());
1108    __ popl(Encoded_GPR_r12());
1109    __ popl(Encoded_GPR_r11());
1110    __ popl(Encoded_GPR_r10());
1111    __ popl(Encoded_GPR_r8());
1112    __ popl(Encoded_GPR_rsi());
1113    __ popl(Encoded_GPR_rdi());
1114    __ popl(Encoded_GPR_rbp());
1115    __ popl(Encoded_GPR_rdx());
1116    __ popl(Encoded_GPR_rcx());
1117    __ popl(Encoded_GPR_rbx());
1118    __ popl(Encoded_GPR_rax());
1119    __ popl(Encoded_GPR_r9());
1120
1121    __ ret();
1122  }
1123
1124  bool NeedsEpilogue;
1125  uint32_t NumAllocatedDwords;
1126};
1127
1128} // end of namespace Test
1129} // end of namespace X8664
1130} // end of namespace Ice
1131
1132#endif // ASSEMBLERX8664_TESTUTIL_H_
1133