1//===- subzero/unittest/AssemblerX8664/XmmArith.cpp -----------------------===//
2//
3//                        The Subzero Code Generator
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9#include "AssemblerX8664/TestUtil.h"
10
11namespace Ice {
12namespace X8664 {
13namespace Test {
14namespace {
15
16TEST_F(AssemblerX8664Test, ArithSS) {
17#define TestArithSSXmmXmm(FloatSize, Src, Value0, Dst, Value1, Inst, Op)       \
18  do {                                                                         \
19    static_assert(FloatSize == 32 || FloatSize == 64,                          \
20                  "Invalid fp size " #FloatSize);                              \
21    static constexpr char TestString[] =                                       \
22        "(" #FloatSize ", " #Src ", " #Value0 ", " #Dst ", " #Value1           \
23        ", " #Inst ", " #Op ")";                                               \
24    static constexpr bool IsDouble = FloatSize == 64;                          \
25    using Type = std::conditional<IsDouble, double, float>::type;              \
26    const uint32_t T0 = allocateQword();                                       \
27    const Type V0 = Value0;                                                    \
28    const uint32_t T1 = allocateQword();                                       \
29    const Type V1 = Value1;                                                    \
30                                                                               \
31    __ movss(IceType_f##FloatSize, Encoded_Xmm_##Dst(), dwordAddress(T0));     \
32    __ movss(IceType_f##FloatSize, Encoded_Xmm_##Src(), dwordAddress(T1));     \
33    __ Inst(IceType_f##FloatSize, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src());   \
34                                                                               \
35    AssembledTest test = assemble();                                           \
36    if (IsDouble) {                                                            \
37      test.setQwordTo(T0, static_cast<double>(V0));                            \
38      test.setQwordTo(T1, static_cast<double>(V1));                            \
39    } else {                                                                   \
40      test.setDwordTo(T0, static_cast<float>(V0));                             \
41      test.setDwordTo(T1, static_cast<float>(V1));                             \
42    }                                                                          \
43                                                                               \
44    test.run();                                                                \
45                                                                               \
46    ASSERT_DOUBLE_EQ(V0 Op V1, test.Dst<Type>()) << TestString;                \
47    reset();                                                                   \
48  } while (0)
49
50#define TestArithSSXmmAddr(FloatSize, Value0, Dst, Value1, Inst, Op)           \
51  do {                                                                         \
52    static_assert(FloatSize == 32 || FloatSize == 64,                          \
53                  "Invalid fp size " #FloatSize);                              \
54    static constexpr char TestString[] =                                       \
55        "(" #FloatSize ", Addr, " #Value0 ", " #Dst ", " #Value1 ", " #Inst    \
56        ", " #Op ")";                                                          \
57    static constexpr bool IsDouble = FloatSize == 64;                          \
58    using Type = std::conditional<IsDouble, double, float>::type;              \
59    const uint32_t T0 = allocateQword();                                       \
60    const Type V0 = Value0;                                                    \
61    const uint32_t T1 = allocateQword();                                       \
62    const Type V1 = Value1;                                                    \
63                                                                               \
64    __ movss(IceType_f##FloatSize, Encoded_Xmm_##Dst(), dwordAddress(T0));     \
65    __ Inst(IceType_f##FloatSize, Encoded_Xmm_##Dst(), dwordAddress(T1));      \
66                                                                               \
67    AssembledTest test = assemble();                                           \
68    if (IsDouble) {                                                            \
69      test.setQwordTo(T0, static_cast<double>(V0));                            \
70      test.setQwordTo(T1, static_cast<double>(V1));                            \
71    } else {                                                                   \
72      test.setDwordTo(T0, static_cast<float>(V0));                             \
73      test.setDwordTo(T1, static_cast<float>(V1));                             \
74    }                                                                          \
75                                                                               \
76    test.run();                                                                \
77                                                                               \
78    ASSERT_DOUBLE_EQ(V0 Op V1, test.Dst<Type>()) << TestString;                \
79    reset();                                                                   \
80  } while (0)
81
82#define TestArithSS(FloatSize, Src, Dst0, Dst1)                                \
83  do {                                                                         \
84    TestArithSSXmmXmm(FloatSize, Src, 1.0, Dst0, 10.0, addss, +);              \
85    TestArithSSXmmAddr(FloatSize, 2.0, Dst1, 20.0, addss, +);                  \
86    TestArithSSXmmXmm(FloatSize, Src, 3.0, Dst0, 30.0, subss, -);              \
87    TestArithSSXmmAddr(FloatSize, 4.0, Dst1, 40.0, subss, -);                  \
88    TestArithSSXmmXmm(FloatSize, Src, 5.0, Dst0, 50.0, mulss, *);              \
89    TestArithSSXmmAddr(FloatSize, 6.0, Dst1, 60.0, mulss, *);                  \
90    TestArithSSXmmXmm(FloatSize, Src, 7.0, Dst0, 70.0, divss, / );             \
91    TestArithSSXmmAddr(FloatSize, 8.0, Dst1, 80.0, divss, / );                 \
92  } while (0)
93
94#define TestImpl(Src, Dst0, Dst1)                                              \
95  do {                                                                         \
96    TestArithSS(32, Src, Dst0, Dst1);                                          \
97    TestArithSS(64, Src, Dst0, Dst1);                                          \
98  } while (0)
99
100  TestImpl(xmm0, xmm1, xmm2);
101  TestImpl(xmm1, xmm2, xmm3);
102  TestImpl(xmm2, xmm3, xmm4);
103  TestImpl(xmm3, xmm4, xmm5);
104  TestImpl(xmm4, xmm5, xmm6);
105  TestImpl(xmm5, xmm6, xmm7);
106  TestImpl(xmm6, xmm7, xmm8);
107  TestImpl(xmm7, xmm8, xmm9);
108  TestImpl(xmm8, xmm9, xmm10);
109  TestImpl(xmm9, xmm10, xmm11);
110  TestImpl(xmm10, xmm11, xmm12);
111  TestImpl(xmm11, xmm12, xmm13);
112  TestImpl(xmm12, xmm13, xmm14);
113  TestImpl(xmm13, xmm14, xmm15);
114  TestImpl(xmm14, xmm15, xmm0);
115  TestImpl(xmm15, xmm0, xmm1);
116
117#undef TestImpl
118#undef TestArithSS
119#undef TestArithSSXmmAddr
120#undef TestArithSSXmmXmm
121}
122
123TEST_F(AssemblerX8664Test, PArith) {
124#define TestPArithXmmXmm(Dst, Value0, Src, Value1, Inst, Op, Type, Size)       \
125  do {                                                                         \
126    static constexpr char TestString[] =                                       \
127        "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Inst ", " #Op       \
128        ", " #Type ", " #Size ")";                                             \
129    const uint32_t T0 = allocateDqword();                                      \
130    const Dqword V0 Value0;                                                    \
131                                                                               \
132    const uint32_t T1 = allocateDqword();                                      \
133    const Dqword V1 Value1;                                                    \
134                                                                               \
135    __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
136    __ movups(Encoded_Xmm_##Src(), dwordAddress(T1));                          \
137    __ Inst(IceType_i##Size, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src());        \
138                                                                               \
139    AssembledTest test = assemble();                                           \
140    test.setDqwordTo(T0, V0);                                                  \
141    test.setDqwordTo(T1, V1);                                                  \
142    test.run();                                                                \
143                                                                               \
144    ASSERT_EQ(packedAs<Type##Size##_t>(V0) Op V1, test.Dst<Dqword>())          \
145        << TestString;                                                         \
146    reset();                                                                   \
147  } while (0)
148
149#define TestPArithXmmAddr(Dst, Value0, Value1, Inst, Op, Type, Size)           \
150  do {                                                                         \
151    static constexpr char TestString[] =                                       \
152        "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Inst ", " #Op           \
153        ", " #Type ", " #Size ")";                                             \
154    const uint32_t T0 = allocateDqword();                                      \
155    const Dqword V0 Value0;                                                    \
156                                                                               \
157    const uint32_t T1 = allocateDqword();                                      \
158    const Dqword V1 Value1;                                                    \
159                                                                               \
160    __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
161    __ Inst(IceType_i##Size, Encoded_Xmm_##Dst(), dwordAddress(T1));           \
162                                                                               \
163    AssembledTest test = assemble();                                           \
164    test.setDqwordTo(T0, V0);                                                  \
165    test.setDqwordTo(T1, V1);                                                  \
166    test.run();                                                                \
167                                                                               \
168    ASSERT_EQ(packedAs<Type##Size##_t>(V0) Op V1, test.Dst<Dqword>())          \
169        << TestString;                                                         \
170    reset();                                                                   \
171  } while (0)
172
173#define TestPArithXmmImm(Dst, Value0, Imm, Inst, Op, Type, Size)               \
174  do {                                                                         \
175    static constexpr char TestString[] =                                       \
176        "(" #Dst ", " #Value0 ", " #Imm ", " #Inst ", " #Op ", " #Type         \
177        ", " #Size ")";                                                        \
178    const uint32_t T0 = allocateDqword();                                      \
179    const Dqword V0 Value0;                                                    \
180                                                                               \
181    __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
182    __ Inst(IceType_i##Size, Encoded_Xmm_##Dst(), Immediate(Imm));             \
183                                                                               \
184    AssembledTest test = assemble();                                           \
185    test.setDqwordTo(T0, V0);                                                  \
186    test.run();                                                                \
187                                                                               \
188    ASSERT_EQ(packedAs<Type##Size##_t>(V0) Op Imm, test.Dst<Dqword>())         \
189        << TestString;                                                         \
190    reset();                                                                   \
191  } while (0)
192
193#define TestPAndnXmmXmm(Dst, Value0, Src, Value1, Type, Size)                  \
194  do {                                                                         \
195    static constexpr char TestString[] =                                       \
196        "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", pandn, " #Type         \
197        ", " #Size ")";                                                        \
198    const uint32_t T0 = allocateDqword();                                      \
199    const Dqword V0 Value0;                                                    \
200                                                                               \
201    const uint32_t T1 = allocateDqword();                                      \
202    const Dqword V1 Value1;                                                    \
203                                                                               \
204    __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
205    __ movups(Encoded_Xmm_##Src(), dwordAddress(T1));                          \
206    __ pandn(IceType_i##Size, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src());       \
207                                                                               \
208    AssembledTest test = assemble();                                           \
209    test.setDqwordTo(T0, V0);                                                  \
210    test.setDqwordTo(T1, V1);                                                  \
211    test.run();                                                                \
212                                                                               \
213    ASSERT_EQ(~(packedAs<Type##Size##_t>(V0)) & V1, test.Dst<Dqword>())        \
214        << TestString;                                                         \
215    reset();                                                                   \
216  } while (0)
217
218#define TestPAndnXmmAddr(Dst, Value0, Value1, Type, Size)                      \
219  do {                                                                         \
220    static constexpr char TestString[] =                                       \
221        "(" #Dst ", " #Value0 ", Addr, " #Value1 ", pandn, " #Type ", " #Size  \
222        ")";                                                                   \
223    const uint32_t T0 = allocateDqword();                                      \
224    const Dqword V0 Value0;                                                    \
225                                                                               \
226    const uint32_t T1 = allocateDqword();                                      \
227    const Dqword V1 Value1;                                                    \
228                                                                               \
229    __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
230    __ pandn(IceType_i##Size, Encoded_Xmm_##Dst(), dwordAddress(T1));          \
231                                                                               \
232    AssembledTest test = assemble();                                           \
233    test.setDqwordTo(T0, V0);                                                  \
234    test.setDqwordTo(T1, V1);                                                  \
235    test.run();                                                                \
236                                                                               \
237    ASSERT_EQ((~packedAs<Type##Size##_t>(V0)) & V1, test.Dst<Dqword>())        \
238        << TestString;                                                         \
239    reset();                                                                   \
240  } while (0)
241
242#define TestPArithSize(Dst, Src, Size)                                         \
243  do {                                                                         \
244    static_assert(Size == 8 || Size == 16 || Size == 32, "Invalid size.");     \
245    if (Size != 8) {                                                           \
246      TestPArithXmmXmm(                                                        \
247          Dst,                                                                 \
248          (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),  \
249          Src, (uint64_t(3u), uint64_t(0u)), psra, >>, int, Size);             \
250      TestPArithXmmAddr(Dst, (uint64_t(0x8040201008040201ull),                 \
251                              uint64_t(0x8080404002020101ull)),                \
252                        (uint64_t(3u), uint64_t(0u)), psra, >>, int, Size);    \
253      TestPArithXmmImm(Dst, (uint64_t(0x8040201008040201ull),                  \
254                             uint64_t(0x8080404002020101ull)),                 \
255                       3u, psra, >>, int, Size);                               \
256      TestPArithXmmXmm(                                                        \
257          Dst,                                                                 \
258          (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),  \
259          Src, (uint64_t(3u), uint64_t(0u)), psrl, >>, uint, Size);            \
260      TestPArithXmmAddr(Dst, (uint64_t(0x8040201008040201ull),                 \
261                              uint64_t(0x8080404002020101ull)),                \
262                        (uint64_t(3u), uint64_t(0u)), psrl, >>, uint, Size);   \
263      TestPArithXmmImm(Dst, (uint64_t(0x8040201008040201ull),                  \
264                             uint64_t(0x8080404002020101ull)),                 \
265                       3u, psrl, >>, uint, Size);                              \
266      TestPArithXmmXmm(                                                        \
267          Dst,                                                                 \
268          (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),  \
269          Src, (uint64_t(3u), uint64_t(0u)), psll, <<, uint, Size);            \
270      TestPArithXmmAddr(Dst, (uint64_t(0x8040201008040201ull),                 \
271                              uint64_t(0x8080404002020101ull)),                \
272                        (uint64_t(3u), uint64_t(0u)), psll, <<, uint, Size);   \
273      TestPArithXmmImm(Dst, (uint64_t(0x8040201008040201ull),                  \
274                             uint64_t(0x8080404002020101ull)),                 \
275                       3u, psll, <<, uint, Size);                              \
276                                                                               \
277      TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull),                  \
278                             uint64_t(0x8080404002020101ull)),                 \
279                       Src, (uint64_t(0xFFFFFFFF00000000ull),                  \
280                             uint64_t(0x0123456789ABCDEull)),                  \
281                       pmull, *, int, Size);                                   \
282      TestPArithXmmAddr(                                                       \
283          Dst,                                                                 \
284          (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),  \
285          (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),   \
286          pmull, *, int, Size);                                                \
287      if (Size != 16) {                                                        \
288        TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull),                \
289                               uint64_t(0x8080404002020101ull)),               \
290                         Src, (uint64_t(0xFFFFFFFF00000000ull),                \
291                               uint64_t(0x0123456789ABCDEull)),                \
292                         pmuludq, *, uint, Size);                              \
293        TestPArithXmmAddr(                                                     \
294            Dst, (uint64_t(0x8040201008040201ull),                             \
295                  uint64_t(0x8080404002020101ull)),                            \
296            (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \
297            pmuludq, *, uint, Size);                                           \
298      }                                                                        \
299    }                                                                          \
300    TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull),                    \
301                           uint64_t(0x8080404002020101ull)),                   \
302                     Src, (uint64_t(0xFFFFFFFF00000000ull),                    \
303                           uint64_t(0x0123456789ABCDEull)),                    \
304                     padd, +, int, Size);                                      \
305    TestPArithXmmAddr(                                                         \
306        Dst,                                                                   \
307        (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),    \
308        (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),     \
309        padd, +, int, Size);                                                   \
310    TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull),                    \
311                           uint64_t(0x8080404002020101ull)),                   \
312                     Src, (uint64_t(0xFFFFFFFF00000000ull),                    \
313                           uint64_t(0x0123456789ABCDEull)),                    \
314                     psub, -, int, Size);                                      \
315    TestPArithXmmAddr(                                                         \
316        Dst,                                                                   \
317        (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),    \
318        (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),     \
319        psub, -, int, Size);                                                   \
320    TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull),                    \
321                           uint64_t(0x8080404002020101ull)),                   \
322                     Src, (uint64_t(0xFFFFFFFF00000000ull),                    \
323                           uint64_t(0x0123456789ABCDEull)),                    \
324                     pand, &, int, Size);                                      \
325    TestPArithXmmAddr(                                                         \
326        Dst,                                                                   \
327        (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),    \
328        (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),     \
329        pand, &, int, Size);                                                   \
330                                                                               \
331    TestPAndnXmmXmm(Dst, (uint64_t(0x8040201008040201ull),                     \
332                          uint64_t(0x8080404002020101ull)),                    \
333                    Src, (uint64_t(0xFFFFFFFF00000000ull),                     \
334                          uint64_t(0x0123456789ABCDEull)),                     \
335                    int, Size);                                                \
336    TestPAndnXmmAddr(                                                          \
337        Dst,                                                                   \
338        (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),    \
339        (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),     \
340        int, Size);                                                            \
341                                                                               \
342    TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull),                    \
343                           uint64_t(0x8080404002020101ull)),                   \
344                     Src, (uint64_t(0xFFFFFFFF00000000ull),                    \
345                           uint64_t(0x0123456789ABCDEull)),                    \
346                     por, |, int, Size);                                       \
347    TestPArithXmmAddr(                                                         \
348        Dst,                                                                   \
349        (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),    \
350        (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),     \
351        por, |, int, Size);                                                    \
352    TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull),                    \
353                           uint64_t(0x8080404002020101ull)),                   \
354                     Src, (uint64_t(0xFFFFFFFF00000000ull),                    \
355                           uint64_t(0x0123456789ABCDEull)),                    \
356                     pxor, ^, int, Size);                                      \
357    TestPArithXmmAddr(                                                         \
358        Dst,                                                                   \
359        (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),    \
360        (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),     \
361        pxor, ^, int, Size);                                                   \
362  } while (0)
363
364#define TestPArith(Src, Dst)                                                   \
365  do {                                                                         \
366    TestPArithSize(Src, Dst, 8);                                               \
367    TestPArithSize(Src, Dst, 16);                                              \
368    TestPArithSize(Src, Dst, 32);                                              \
369  } while (0)
370
371  TestPArith(xmm0, xmm1);
372  TestPArith(xmm1, xmm2);
373  TestPArith(xmm2, xmm3);
374  TestPArith(xmm3, xmm4);
375  TestPArith(xmm4, xmm5);
376  TestPArith(xmm5, xmm6);
377  TestPArith(xmm6, xmm7);
378  TestPArith(xmm7, xmm8);
379  TestPArith(xmm8, xmm9);
380  TestPArith(xmm9, xmm10);
381  TestPArith(xmm10, xmm11);
382  TestPArith(xmm11, xmm12);
383  TestPArith(xmm12, xmm13);
384  TestPArith(xmm13, xmm14);
385  TestPArith(xmm14, xmm15);
386  TestPArith(xmm15, xmm0);
387
388#undef TestPArith
389#undef TestPArithSize
390#undef TestPAndnXmmAddr
391#undef TestPAndnXmmXmm
392#undef TestPArithXmmImm
393#undef TestPArithXmmAddr
394#undef TestPArithXmmXmm
395}
396
397TEST_F(AssemblerX8664Test, ArithPS) {
398#define TestArithPSXmmXmm(FloatSize, Dst, Value0, Src, Value1, Inst, Op, Type) \
399  do {                                                                         \
400    static constexpr char TestString[] =                                       \
401        "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Inst ", " #Op       \
402        ", " #Type ")";                                                        \
403    const uint32_t T0 = allocateDqword();                                      \
404    const Dqword V0 Value0;                                                    \
405    const uint32_t T1 = allocateDqword();                                      \
406    const Dqword V1 Value1;                                                    \
407                                                                               \
408    __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
409    __ movups(Encoded_Xmm_##Src(), dwordAddress(T1));                          \
410    __ Inst(IceType_f32, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src());            \
411                                                                               \
412    AssembledTest test = assemble();                                           \
413    test.setDqwordTo(T0, V0);                                                  \
414    test.setDqwordTo(T1, V1);                                                  \
415    test.run();                                                                \
416                                                                               \
417    ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString;     \
418                                                                               \
419    reset();                                                                   \
420  } while (0)
421
422#define TestArithPSXmmXmmUntyped(Dst, Value0, Src, Value1, Inst, Op, Type)     \
423  do {                                                                         \
424    static constexpr char TestString[] =                                       \
425        "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Inst ", " #Op       \
426        ", " #Type ")";                                                        \
427    const uint32_t T0 = allocateDqword();                                      \
428    const Dqword V0 Value0;                                                    \
429    const uint32_t T1 = allocateDqword();                                      \
430    const Dqword V1 Value1;                                                    \
431                                                                               \
432    __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
433    __ movups(Encoded_Xmm_##Src(), dwordAddress(T1));                          \
434    __ Inst(Encoded_Xmm_##Dst(), Encoded_Xmm_##Src());                         \
435                                                                               \
436    AssembledTest test = assemble();                                           \
437    test.setDqwordTo(T0, V0);                                                  \
438    test.setDqwordTo(T1, V1);                                                  \
439    test.run();                                                                \
440                                                                               \
441    ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString;     \
442                                                                               \
443    reset();                                                                   \
444  } while (0)
445
446#define TestArithPSXmmAddrUntyped(Dst, Value0, Value1, Inst, Op, Type)         \
447  do {                                                                         \
448    static constexpr char TestString[] =                                       \
449        "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Inst ", " #Op           \
450        ", " #Type ")";                                                        \
451    const uint32_t T0 = allocateDqword();                                      \
452    const Dqword V0 Value0;                                                    \
453    const uint32_t T1 = allocateDqword();                                      \
454    const Dqword V1 Value1;                                                    \
455                                                                               \
456    __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
457    __ Inst(Encoded_Xmm_##Dst(), dwordAddress(T1));                            \
458                                                                               \
459    AssembledTest test = assemble();                                           \
460    test.setDqwordTo(T0, V0);                                                  \
461    test.setDqwordTo(T1, V1);                                                  \
462    test.run();                                                                \
463                                                                               \
464    ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString;     \
465                                                                               \
466    reset();                                                                   \
467  } while (0)
468
469#define TestMinMaxPS(FloatSize, Dst, Value0, Src, Value1, Inst, Type)          \
470  do {                                                                         \
471    static constexpr char TestString[] =                                       \
472        "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Inst ", " #Type     \
473        ")";                                                                   \
474    const uint32_t T0 = allocateDqword();                                      \
475    const Dqword V0 Value0;                                                    \
476    const uint32_t T1 = allocateDqword();                                      \
477    const Dqword V1 Value1;                                                    \
478                                                                               \
479    __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
480    __ movups(Encoded_Xmm_##Src(), dwordAddress(T1));                          \
481    __ Inst(IceType_f##FloatSize, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src());   \
482                                                                               \
483    AssembledTest test = assemble();                                           \
484    test.setDqwordTo(T0, V0);                                                  \
485    test.setDqwordTo(T1, V1);                                                  \
486    test.run();                                                                \
487                                                                               \
488    ASSERT_EQ(packedAs<Type>(V0).Inst(V1), test.Dst<Dqword>()) << TestString;  \
489                                                                               \
490    reset();                                                                   \
491  } while (0)
492
493#define TestArithPSXmmAddr(FloatSize, Dst, Value0, Value1, Inst, Op, Type)     \
494  do {                                                                         \
495    static constexpr char TestString[] =                                       \
496        "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Inst ", " #Op           \
497        ", " #Type ")";                                                        \
498    const uint32_t T0 = allocateDqword();                                      \
499    const Dqword V0 Value0;                                                    \
500    const uint32_t T1 = allocateDqword();                                      \
501    const Dqword V1 Value1;                                                    \
502                                                                               \
503    __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
504    __ Inst(IceType_f##FloatSize, Encoded_Xmm_##Dst(), dwordAddress(T1));      \
505                                                                               \
506    AssembledTest test = assemble();                                           \
507    test.setDqwordTo(T0, V0);                                                  \
508    test.setDqwordTo(T1, V1);                                                  \
509    test.run();                                                                \
510                                                                               \
511    ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString;     \
512                                                                               \
513    reset();                                                                   \
514  } while (0)
515
516#define TestArithPS(Dst, Src)                                                  \
517  do {                                                                         \
518    TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,               \
519                      (0.55, 0.43, 0.23, 1.21), addps, +, float);              \
520    TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0),                   \
521                       (0.55, 0.43, 0.23, 1.21), addps, +, float);             \
522    TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,               \
523                      (0.55, 0.43, 0.23, 1.21), subps, -, float);              \
524    TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0),                   \
525                       (0.55, 0.43, 0.23, 1.21), subps, -, float);             \
526    TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,               \
527                      (0.55, 0.43, 0.23, 1.21), mulps, *, float);              \
528    TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0),                   \
529                       (0.55, 0.43, 0.23, 1.21), mulps, *, float);             \
530    TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,               \
531                      (0.55, 0.43, 0.23, 1.21), divps, /, float);              \
532    TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0),                   \
533                       (0.55, 0.43, 0.23, 1.21), divps, /, float);             \
534    TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,               \
535                      (0.55, 0.43, 0.23, 1.21), andps, &, float);              \
536    TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0),                   \
537                       (0.55, 0.43, 0.23, 1.21), andps, &, float);             \
538    TestArithPSXmmXmm(64, Dst, (1.0, -1000.0), Src, (0.55, 1.21), andps, &,    \
539                      double);                                                 \
540    TestArithPSXmmAddr(64, Dst, (1.0, -1000.0), (0.55, 1.21), andps, &,        \
541                       double);                                                \
542    TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,               \
543                      (0.55, 0.43, 0.23, 1.21), orps, |, float);               \
544    TestArithPSXmmXmm(64, Dst, (1.0, -1000.0), Src, (0.55, 1.21), orps, |,     \
545                      double);                                                 \
546    TestMinMaxPS(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,                    \
547                 (0.55, 0.43, 0.23, 1.21), minps, float);                      \
548    TestMinMaxPS(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,                    \
549                 (0.55, 0.43, 0.23, 1.21), maxps, float);                      \
550    TestMinMaxPS(64, Dst, (1.0, -1000.0), Src, (0.55, 1.21), minps, double);   \
551    TestMinMaxPS(64, Dst, (1.0, -1000.0), Src, (0.55, 1.21), maxps, double);   \
552    TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,               \
553                      (0.55, 0.43, 0.23, 1.21), xorps, ^, float);              \
554    TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0),                   \
555                       (0.55, 0.43, 0.23, 1.21), xorps, ^, float);             \
556    TestArithPSXmmXmm(64, Dst, (1.0, -1000.0), Src, (0.55, 1.21), xorps, ^,    \
557                      double);                                                 \
558    TestArithPSXmmAddr(64, Dst, (1.0, -1000.0), (0.55, 1.21), xorps, ^,        \
559                       double);                                                \
560  } while (0)
561
562  TestArithPS(xmm0, xmm1);
563  TestArithPS(xmm1, xmm2);
564  TestArithPS(xmm2, xmm3);
565  TestArithPS(xmm3, xmm4);
566  TestArithPS(xmm4, xmm5);
567  TestArithPS(xmm5, xmm6);
568  TestArithPS(xmm6, xmm7);
569  TestArithPS(xmm7, xmm8);
570  TestArithPS(xmm8, xmm9);
571  TestArithPS(xmm9, xmm10);
572  TestArithPS(xmm10, xmm11);
573  TestArithPS(xmm11, xmm12);
574  TestArithPS(xmm12, xmm13);
575  TestArithPS(xmm13, xmm14);
576  TestArithPS(xmm14, xmm15);
577  TestArithPS(xmm15, xmm0);
578
579#undef TestArithPs
580#undef TestMinMaxPS
581#undef TestArithPSXmmXmmUntyped
582#undef TestArithPSXmmAddr
583#undef TestArithPSXmmXmm
584}
585
586TEST_F(AssemblerX8664Test, Blending) {
587  using f32 = float;
588  using i8 = uint8_t;
589
590#define TestBlendingXmmXmm(Dst, Value0, Src, Value1, M /*ask*/, Inst, Type)    \
591  do {                                                                         \
592    static constexpr char TestString[] =                                       \
593        "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #M ", " #Inst        \
594        ", " #Type ")";                                                        \
595    const uint32_t T0 = allocateDqword();                                      \
596    const Dqword V0 Value0;                                                    \
597    const uint32_t T1 = allocateDqword();                                      \
598    const Dqword V1 Value1;                                                    \
599    const uint32_t Mask = allocateDqword();                                    \
600    const Dqword MaskValue M;                                                  \
601                                                                               \
602    __ movups(Encoded_Xmm_xmm0(), dwordAddress(Mask));                         \
603    __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
604    __ movups(Encoded_Xmm_##Src(), dwordAddress(T1));                          \
605    __ Inst(IceType_##Type, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src());         \
606                                                                               \
607    AssembledTest test = assemble();                                           \
608    test.setDqwordTo(T0, V0);                                                  \
609    test.setDqwordTo(T1, V1);                                                  \
610    test.setDqwordTo(Mask, MaskValue);                                         \
611    test.run();                                                                \
612                                                                               \
613    ASSERT_EQ(packedAs<Type>(V0).blendWith(V1, MaskValue), test.Dst<Dqword>()) \
614        << TestString;                                                         \
615    reset();                                                                   \
616  } while (0)
617
618#define TestBlendingXmmAddr(Dst, Value0, Value1, M /*ask*/, Inst, Type)        \
619  do {                                                                         \
620    static constexpr char TestString[] =                                       \
621        "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #M ", " #Inst ", " #Type \
622        ")";                                                                   \
623    const uint32_t T0 = allocateDqword();                                      \
624    const Dqword V0 Value0;                                                    \
625    const uint32_t T1 = allocateDqword();                                      \
626    const Dqword V1 Value1;                                                    \
627    const uint32_t Mask = allocateDqword();                                    \
628    const Dqword MaskValue M;                                                  \
629                                                                               \
630    __ movups(Encoded_Xmm_xmm0(), dwordAddress(Mask));                         \
631    __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
632    __ Inst(IceType_##Type, Encoded_Xmm_##Dst(), dwordAddress(T1));            \
633                                                                               \
634    AssembledTest test = assemble();                                           \
635    test.setDqwordTo(T0, V0);                                                  \
636    test.setDqwordTo(T1, V1);                                                  \
637    test.setDqwordTo(Mask, MaskValue);                                         \
638    test.run();                                                                \
639                                                                               \
640    ASSERT_EQ(packedAs<Type>(V0).blendWith(V1, MaskValue), test.Dst<Dqword>()) \
641        << TestString;                                                         \
642    reset();                                                                   \
643  } while (0)
644
645#define TestBlending(Src, Dst)                                                 \
646  do {                                                                         \
647    TestBlendingXmmXmm(                                                        \
648        Dst, (1.0, 2.0, 1.0, 2.0), Src, (-1.0, -2.0, -1.0, -2.0),              \
649        (uint64_t(0x8000000000000000ull), uint64_t(0x0000000080000000ull)),    \
650        blendvps, f32);                                                        \
651    TestBlendingXmmAddr(                                                       \
652        Dst, (1.0, 2.0, 1.0, 2.0), (-1.0, -2.0, -1.0, -2.0),                   \
653        (uint64_t(0x8000000000000000ull), uint64_t(0x0000000080000000ull)),    \
654        blendvps, f32);                                                        \
655    TestBlendingXmmXmm(                                                        \
656        Dst,                                                                   \
657        (uint64_t(0xFFFFFFFFFFFFFFFFull), uint64_t(0xBBBBBBBBBBBBBBBBull)),    \
658        Src,                                                                   \
659        (uint64_t(0xAAAAAAAAAAAAAAAAull), uint64_t(0xEEEEEEEEEEEEEEEEull)),    \
660        (uint64_t(0x8000000000000080ull), uint64_t(0x8080808000000000ull)),    \
661        pblendvb, i8);                                                         \
662    TestBlendingXmmAddr(                                                       \
663        Dst,                                                                   \
664        (uint64_t(0xFFFFFFFFFFFFFFFFull), uint64_t(0xBBBBBBBBBBBBBBBBull)),    \
665        (uint64_t(0xAAAAAAAAAAAAAAAAull), uint64_t(0xEEEEEEEEEEEEEEEEull)),    \
666        (uint64_t(0x8000000000000080ull), uint64_t(0x8080808000000000ull)),    \
667        pblendvb, i8);                                                         \
668  } while (0)
669
670  /* xmm0 is taken. It is the implicit mask . */
671  TestBlending(xmm1, xmm2);
672  TestBlending(xmm2, xmm3);
673  TestBlending(xmm3, xmm4);
674  TestBlending(xmm4, xmm5);
675  TestBlending(xmm5, xmm6);
676  TestBlending(xmm6, xmm7);
677  TestBlending(xmm7, xmm8);
678  TestBlending(xmm8, xmm9);
679  TestBlending(xmm9, xmm10);
680  TestBlending(xmm10, xmm11);
681  TestBlending(xmm11, xmm12);
682  TestBlending(xmm12, xmm13);
683  TestBlending(xmm13, xmm14);
684  TestBlending(xmm14, xmm15);
685  TestBlending(xmm15, xmm1);
686
687#undef TestBlending
688#undef TestBlendingXmmAddr
689#undef TestBlendingXmmXmm
690}
691
692TEST_F(AssemblerX8664Test, Cmpps) {
693#define TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, C, Op, Type)      \
694  do {                                                                         \
695    static constexpr char TestString[] =                                       \
696        "(" #Src ", " #Dst ", " #C ", " #Op ")";                               \
697    const uint32_t T0 = allocateDqword();                                      \
698    const Dqword V0 Value0;                                                    \
699    const uint32_t T1 = allocateDqword();                                      \
700    const Dqword V1 Value1;                                                    \
701                                                                               \
702    __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
703    __ movups(Encoded_Xmm_##Src(), dwordAddress(T1));                          \
704    __ cmpps(IceType_f##FloatSize, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src(),   \
705             Cond::Cmpps_##C);                                                 \
706                                                                               \
707    AssembledTest test = assemble();                                           \
708    test.setDqwordTo(T0, V0);                                                  \
709    test.setDqwordTo(T1, V1);                                                  \
710    test.run();                                                                \
711                                                                               \
712    ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString;     \
713    ;                                                                          \
714    reset();                                                                   \
715  } while (0)
716
717#define TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, C, Op, Type)          \
718  do {                                                                         \
719    static constexpr char TestString[] = "(" #Dst ", Addr, " #C ", " #Op ")";  \
720    const uint32_t T0 = allocateDqword();                                      \
721    const Dqword V0 Value0;                                                    \
722    const uint32_t T1 = allocateDqword();                                      \
723    const Dqword V1 Value1;                                                    \
724                                                                               \
725    __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
726    __ cmpps(IceType_f##FloatSize, Encoded_Xmm_##Dst(), dwordAddress(T1),      \
727             Cond::Cmpps_##C);                                                 \
728                                                                               \
729    AssembledTest test = assemble();                                           \
730    test.setDqwordTo(T0, V0);                                                  \
731    test.setDqwordTo(T1, V1);                                                  \
732    test.run();                                                                \
733                                                                               \
734    ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString;     \
735    ;                                                                          \
736    reset();                                                                   \
737  } while (0)
738
739#define TestCmppsOrdUnordXmmXmm(FloatSize, Dst, Value0, Src, Value1, C, Type)  \
740  do {                                                                         \
741    static constexpr char TestString[] = "(" #Src ", " #Dst ", " #C ")";       \
742    const uint32_t T0 = allocateDqword();                                      \
743    const Dqword V0(1.0, 1.0, std::numeric_limits<float>::quiet_NaN(),         \
744                    std::numeric_limits<float>::quiet_NaN());                  \
745    const uint32_t T1 = allocateDqword();                                      \
746    const Dqword V1(1.0, std::numeric_limits<float>::quiet_NaN(), 1.0,         \
747                    std::numeric_limits<float>::quiet_NaN());                  \
748                                                                               \
749    __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
750    __ movups(Encoded_Xmm_##Src(), dwordAddress(T1));                          \
751    __ cmpps(IceType_f##FloatSize, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src(),   \
752             Cond::Cmpps_##C);                                                 \
753                                                                               \
754    AssembledTest test = assemble();                                           \
755    test.setDqwordTo(T0, V0);                                                  \
756    test.setDqwordTo(T1, V1);                                                  \
757    test.run();                                                                \
758                                                                               \
759    ASSERT_EQ(packedAs<Type>(V0).C(V1), test.Dst<Dqword>()) << TestString;     \
760    ;                                                                          \
761    reset();                                                                   \
762  } while (0)
763
764#define TestCmppsOrdUnordXmmAddr(FloatSize, Dst, Value0, Value1, C, Type)      \
765  do {                                                                         \
766    static constexpr char TestString[] = "(" #Dst ", " #C ")";                 \
767    const uint32_t T0 = allocateDqword();                                      \
768    const Dqword V0(1.0, 1.0, std::numeric_limits<float>::quiet_NaN(),         \
769                    std::numeric_limits<float>::quiet_NaN());                  \
770    const uint32_t T1 = allocateDqword();                                      \
771    const Dqword V1(1.0, std::numeric_limits<float>::quiet_NaN(), 1.0,         \
772                    std::numeric_limits<float>::quiet_NaN());                  \
773                                                                               \
774    __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
775    __ cmpps(IceType_f##FloatSize, Encoded_Xmm_##Dst(), dwordAddress(T1),      \
776             Cond::Cmpps_##C);                                                 \
777                                                                               \
778    AssembledTest test = assemble();                                           \
779    test.setDqwordTo(T0, V0);                                                  \
780    test.setDqwordTo(T1, V1);                                                  \
781    test.run();                                                                \
782                                                                               \
783    ASSERT_EQ(packedAs<Type>(V0).C(V1), test.Dst<Dqword>()) << TestString;     \
784    ;                                                                          \
785    reset();                                                                   \
786  } while (0)
787
788#define TestCmpps(FloatSize, Dst, Value0, Src, Value1, Type)                   \
789  do {                                                                         \
790    TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type);        \
791    TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type);            \
792    TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type);        \
793    TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type);            \
794    TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type);        \
795    TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type);            \
796    TestCmppsOrdUnordXmmXmm(FloatSize, Dst, Value0, Src, Value1, unord, Type); \
797    TestCmppsOrdUnordXmmAddr(FloatSize, Dst, Value0, Value1, unord, Type);     \
798    TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type);        \
799    TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type);            \
800    TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type);        \
801    TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type);            \
802    TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type);        \
803    TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type);            \
804    if (FloatSize == 32) {                                                     \
805      TestCmppsOrdUnordXmmXmm(                                                 \
806          32, Dst, (1.0, 1.0, std::numeric_limits<float>::quiet_NaN(),         \
807                    std::numeric_limits<float>::quiet_NaN()),                  \
808          Src, (1.0, std::numeric_limits<float>::quiet_NaN(), 1.0,             \
809                std::numeric_limits<float>::quiet_NaN()),                      \
810          unord, Type);                                                        \
811      TestCmppsOrdUnordXmmAddr(                                                \
812          32, Dst, (1.0, 1.0, std::numeric_limits<float>::quiet_NaN(),         \
813                    std::numeric_limits<float>::quiet_NaN()),                  \
814          (1.0, std::numeric_limits<float>::quiet_NaN(), 1.0,                  \
815           std::numeric_limits<float>::quiet_NaN()),                           \
816          unord, Type);                                                        \
817    } else {                                                                   \
818      TestCmppsOrdUnordXmmXmm(64, Dst,                                         \
819                              (1.0, std::numeric_limits<double>::quiet_NaN()), \
820                              Src, (std::numeric_limits<double>::quiet_NaN(),  \
821                                    std::numeric_limits<double>::quiet_NaN()), \
822                              unord, Type);                                    \
823      TestCmppsOrdUnordXmmXmm(64, Dst, (1.0, 1.0), Src,                        \
824                              (1.0, std::numeric_limits<double>::quiet_NaN()), \
825                              unord, Type);                                    \
826      TestCmppsOrdUnordXmmAddr(                                                \
827          64, Dst, (1.0, std::numeric_limits<double>::quiet_NaN()),            \
828          (std::numeric_limits<double>::quiet_NaN(),                           \
829           std::numeric_limits<double>::quiet_NaN()),                          \
830          unord, Type);                                                        \
831      TestCmppsOrdUnordXmmAddr(                                                \
832          64, Dst, (1.0, 1.0),                                                 \
833          (1.0, std::numeric_limits<double>::quiet_NaN()), unord, Type);       \
834    }                                                                          \
835  } while (0)
836
837#define TestCmppsSize(FloatSize, Value0, Value1, Type)                         \
838  do {                                                                         \
839    TestCmpps(FloatSize, xmm0, Value0, xmm1, Value1, Type);                    \
840    TestCmpps(FloatSize, xmm1, Value0, xmm2, Value1, Type);                    \
841    TestCmpps(FloatSize, xmm2, Value0, xmm3, Value1, Type);                    \
842    TestCmpps(FloatSize, xmm3, Value0, xmm4, Value1, Type);                    \
843    TestCmpps(FloatSize, xmm4, Value0, xmm5, Value1, Type);                    \
844    TestCmpps(FloatSize, xmm5, Value0, xmm6, Value1, Type);                    \
845    TestCmpps(FloatSize, xmm6, Value0, xmm7, Value1, Type);                    \
846    TestCmpps(FloatSize, xmm7, Value0, xmm8, Value1, Type);                    \
847    TestCmpps(FloatSize, xmm8, Value0, xmm9, Value1, Type);                    \
848    TestCmpps(FloatSize, xmm9, Value0, xmm10, Value1, Type);                   \
849    TestCmpps(FloatSize, xmm10, Value0, xmm11, Value1, Type);                  \
850    TestCmpps(FloatSize, xmm11, Value0, xmm12, Value1, Type);                  \
851    TestCmpps(FloatSize, xmm12, Value0, xmm13, Value1, Type);                  \
852    TestCmpps(FloatSize, xmm13, Value0, xmm14, Value1, Type);                  \
853    TestCmpps(FloatSize, xmm14, Value0, xmm15, Value1, Type);                  \
854    TestCmpps(FloatSize, xmm15, Value0, xmm0, Value1, Type);                   \
855  } while (0)
856
857  TestCmppsSize(32, (-1.0, 1.0, 3.14, 1024.5), (-1.0, 1.0, 3.14, 1024.5),
858                float);
859  TestCmppsSize(64, (1.0, -1000.0), (1.0, -1000.0), double);
860
861#undef TestCmpps
862#undef TestCmppsOrdUnordXmmAddr
863#undef TestCmppsOrdUnordXmmXmm
864#undef TestCmppsXmmAddr
865#undef TestCmppsXmmXmm
866}
867
868TEST_F(AssemblerX8664Test, Sqrtps_Rsqrtps_Reciprocalps_Sqrtpd) {
869#define TestImplSingle(Dst, Inst, Expect)                                      \
870  do {                                                                         \
871    static constexpr char TestString[] = "(" #Dst ", " #Inst ")";              \
872    const uint32_t T0 = allocateDqword();                                      \
873    const Dqword V0(1.0, 4.0, 20.0, 3.14);                                     \
874                                                                               \
875    __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
876    __ Inst(Encoded_Xmm_##Dst());                                              \
877                                                                               \
878    AssembledTest test = assemble();                                           \
879    test.setDqwordTo(T0, V0);                                                  \
880    test.run();                                                                \
881    ASSERT_EQ(Dqword Expect, test.Dst<Dqword>()) << TestString;                \
882    reset();                                                                   \
883  } while (0)
884
885#define TestImpl(Dst)                                                          \
886  do {                                                                         \
887    TestImplSingle(Dst, sqrtps, (uint64_t(0x400000003F800000ull),              \
888                                 uint64_t(0x3FE2D10B408F1BBDull)));            \
889    TestImplSingle(Dst, rsqrtps, (uint64_t(0x3EFFF0003F7FF000ull),             \
890                                  uint64_t(0x3F1078003E64F000ull)));           \
891    TestImplSingle(Dst, reciprocalps, (uint64_t(0x3E7FF0003F7FF000ull),        \
892                                       uint64_t(0x3EA310003D4CC000ull)));      \
893                                                                               \
894    TestImplSingle(Dst, sqrtpd, (uint64_t(0x4036A09E9365F5F3ull),              \
895                                 uint64_t(0x401C42FAE40282A8ull)));            \
896  } while (0)
897
898  TestImpl(xmm0);
899  TestImpl(xmm1);
900  TestImpl(xmm2);
901  TestImpl(xmm3);
902  TestImpl(xmm4);
903  TestImpl(xmm5);
904  TestImpl(xmm6);
905  TestImpl(xmm7);
906  TestImpl(xmm8);
907  TestImpl(xmm9);
908  TestImpl(xmm10);
909  TestImpl(xmm11);
910  TestImpl(xmm12);
911  TestImpl(xmm13);
912  TestImpl(xmm14);
913  TestImpl(xmm15);
914
915#undef TestImpl
916#undef TestImplSingle
917}
918
919TEST_F(AssemblerX8664Test, Unpck) {
920  const Dqword V0(uint64_t(0xAAAAAAAABBBBBBBBull),
921                  uint64_t(0xCCCCCCCCDDDDDDDDull));
922  const Dqword V1(uint64_t(0xEEEEEEEEFFFFFFFFull),
923                  uint64_t(0x9999999988888888ull));
924
925  const Dqword unpcklpsExpected(uint64_t(0xFFFFFFFFBBBBBBBBull),
926                                uint64_t(0xEEEEEEEEAAAAAAAAull));
927  const Dqword unpcklpdExpected(uint64_t(0xAAAAAAAABBBBBBBBull),
928                                uint64_t(0xEEEEEEEEFFFFFFFFull));
929  const Dqword unpckhpsExpected(uint64_t(0x88888888DDDDDDDDull),
930                                uint64_t(0x99999999CCCCCCCCull));
931  const Dqword unpckhpdExpected(uint64_t(0xCCCCCCCCDDDDDDDDull),
932                                uint64_t(0x9999999988888888ull));
933
934#define TestImplSingle(Dst, Src, Inst)                                         \
935  do {                                                                         \
936    static constexpr char TestString[] = "(" #Dst ", " #Src ", " #Inst ")";    \
937    const uint32_t T0 = allocateDqword();                                      \
938    const uint32_t T1 = allocateDqword();                                      \
939                                                                               \
940    __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
941    __ movups(Encoded_Xmm_##Src(), dwordAddress(T1));                          \
942    __ Inst(Encoded_Xmm_##Dst(), Encoded_Xmm_##Src());                         \
943                                                                               \
944    AssembledTest test = assemble();                                           \
945    test.setDqwordTo(T0, V0);                                                  \
946    test.setDqwordTo(T1, V1);                                                  \
947    test.run();                                                                \
948                                                                               \
949    ASSERT_EQ(Inst##Expected, test.Dst<Dqword>()) << TestString;               \
950    reset();                                                                   \
951  } while (0)
952
953#define TestImpl(Dst, Src)                                                     \
954  do {                                                                         \
955    TestImplSingle(Dst, Src, unpcklps);                                        \
956    TestImplSingle(Dst, Src, unpcklpd);                                        \
957    TestImplSingle(Dst, Src, unpckhps);                                        \
958    TestImplSingle(Dst, Src, unpckhpd);                                        \
959  } while (0)
960
961  TestImpl(xmm0, xmm1);
962  TestImpl(xmm1, xmm2);
963  TestImpl(xmm2, xmm3);
964  TestImpl(xmm3, xmm4);
965  TestImpl(xmm4, xmm5);
966  TestImpl(xmm5, xmm6);
967  TestImpl(xmm6, xmm7);
968  TestImpl(xmm7, xmm8);
969  TestImpl(xmm8, xmm9);
970  TestImpl(xmm9, xmm10);
971  TestImpl(xmm10, xmm11);
972  TestImpl(xmm11, xmm12);
973  TestImpl(xmm12, xmm13);
974  TestImpl(xmm13, xmm14);
975  TestImpl(xmm14, xmm15);
976  TestImpl(xmm15, xmm0);
977
978#undef TestImpl
979#undef TestImplSingle
980}
981
982TEST_F(AssemblerX8664Test, Shufp) {
983  const Dqword V0(uint64_t(0x1111111122222222ull),
984                  uint64_t(0x5555555577777777ull));
985  const Dqword V1(uint64_t(0xAAAAAAAABBBBBBBBull),
986                  uint64_t(0xCCCCCCCCDDDDDDDDull));
987
988  const uint8_t pshufdImm = 0x63;
989  const Dqword pshufdExpected(uint64_t(0xBBBBBBBBCCCCCCCCull),
990                              uint64_t(0xAAAAAAAADDDDDDDDull));
991
992  const uint8_t shufpsImm = 0xf9;
993  const Dqword shufpsExpected(uint64_t(0x7777777711111111ull),
994                              uint64_t(0xCCCCCCCCCCCCCCCCull));
995
996#define TestImplSingleXmmXmm(Dst, Src, Inst)                                   \
997  do {                                                                         \
998    static constexpr char TestString[] = "(" #Dst ", " #Src ", " #Inst ")";    \
999    const uint32_t T0 = allocateDqword();                                      \
1000    const uint32_t T1 = allocateDqword();                                      \
1001                                                                               \
1002    __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
1003    __ movups(Encoded_Xmm_##Src(), dwordAddress(T1));                          \
1004    __ Inst(IceType_f32, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src(),             \
1005            Immediate(Inst##Imm));                                             \
1006                                                                               \
1007    AssembledTest test = assemble();                                           \
1008    test.setDqwordTo(T0, V0);                                                  \
1009    test.setDqwordTo(T1, V1);                                                  \
1010    test.run();                                                                \
1011                                                                               \
1012    ASSERT_EQ(Inst##Expected, test.Dst<Dqword>()) << TestString;               \
1013    reset();                                                                   \
1014  } while (0)
1015
1016#define TestImplSingleXmmAddr(Dst, Inst)                                       \
1017  do {                                                                         \
1018    static constexpr char TestString[] = "(" #Dst ", Addr, " #Inst ")";        \
1019    const uint32_t T0 = allocateDqword();                                      \
1020    const uint32_t T1 = allocateDqword();                                      \
1021                                                                               \
1022    __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
1023    __ Inst(IceType_f32, Encoded_Xmm_##Dst(), dwordAddress(T1),                \
1024            Immediate(Inst##Imm));                                             \
1025                                                                               \
1026    AssembledTest test = assemble();                                           \
1027    test.setDqwordTo(T0, V0);                                                  \
1028    test.setDqwordTo(T1, V1);                                                  \
1029    test.run();                                                                \
1030                                                                               \
1031    ASSERT_EQ(Inst##Expected, test.Dst<Dqword>()) << TestString;               \
1032    reset();                                                                   \
1033  } while (0)
1034
1035#define TestImplSingleXmmXmmUntyped(Dst, Src, Inst)                            \
1036  do {                                                                         \
1037    static constexpr char TestString[] =                                       \
1038        "(" #Dst ", " #Src ", " #Inst ", Untyped)";                            \
1039    const uint32_t T0 = allocateDqword();                                      \
1040    const uint32_t T1 = allocateDqword();                                      \
1041                                                                               \
1042    __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
1043    __ movups(Encoded_Xmm_##Src(), dwordAddress(T1));                          \
1044    __ Inst(Encoded_Xmm_##Dst(), Encoded_Xmm_##Src(), Immediate(Inst##Imm));   \
1045                                                                               \
1046    AssembledTest test = assemble();                                           \
1047    test.setDqwordTo(T0, V0);                                                  \
1048    test.setDqwordTo(T1, V1);                                                  \
1049    test.run();                                                                \
1050                                                                               \
1051    ASSERT_EQ(Inst##UntypedExpected, test.Dst<Dqword>()) << TestString;        \
1052    reset();                                                                   \
1053  } while (0)
1054
1055#define TestImpl(Dst, Src)                                                     \
1056  do {                                                                         \
1057    TestImplSingleXmmXmm(Dst, Src, pshufd);                                    \
1058    TestImplSingleXmmAddr(Dst, pshufd);                                        \
1059    TestImplSingleXmmXmm(Dst, Src, shufps);                                    \
1060    TestImplSingleXmmAddr(Dst, shufps);                                        \
1061  } while (0)
1062
1063  TestImpl(xmm0, xmm1);
1064  TestImpl(xmm1, xmm2);
1065  TestImpl(xmm2, xmm3);
1066  TestImpl(xmm3, xmm4);
1067  TestImpl(xmm4, xmm5);
1068  TestImpl(xmm5, xmm6);
1069  TestImpl(xmm6, xmm7);
1070  TestImpl(xmm7, xmm8);
1071  TestImpl(xmm8, xmm9);
1072  TestImpl(xmm9, xmm10);
1073  TestImpl(xmm10, xmm11);
1074  TestImpl(xmm11, xmm12);
1075  TestImpl(xmm12, xmm13);
1076  TestImpl(xmm13, xmm14);
1077  TestImpl(xmm14, xmm15);
1078  TestImpl(xmm15, xmm0);
1079
1080#undef TestImpl
1081#undef TestImplSingleXmmXmmUntyped
1082#undef TestImplSingleXmmAddr
1083#undef TestImplSingleXmmXmm
1084}
1085
1086TEST_F(AssemblerX8664Test, Punpckl) {
1087  const Dqword V0_v4i32(uint64_t(0x1111111122222222ull),
1088                        uint64_t(0x5555555577777777ull));
1089  const Dqword V1_v4i32(uint64_t(0xAAAAAAAABBBBBBBBull),
1090                        uint64_t(0xCCCCCCCCDDDDDDDDull));
1091  const Dqword Expected_v4i32(uint64_t(0xBBBBBBBB22222222ull),
1092                              uint64_t(0xAAAAAAAA11111111ull));
1093
1094  const Dqword V0_v8i16(uint64_t(0x1111222233334444ull),
1095                        uint64_t(0x5555666677778888ull));
1096  const Dqword V1_v8i16(uint64_t(0xAAAABBBBCCCCDDDDull),
1097                        uint64_t(0xEEEEFFFF00009999ull));
1098  const Dqword Expected_v8i16(uint64_t(0xCCCC3333DDDD4444ull),
1099                              uint64_t(0xAAAA1111BBBB2222ull));
1100
1101  const Dqword V0_v16i8(uint64_t(0x1122334455667788ull),
1102                        uint64_t(0x99AABBCCDDEEFF00ull));
1103  const Dqword V1_v16i8(uint64_t(0xFFEEDDCCBBAA9900ull),
1104                        uint64_t(0xBAADF00DFEEDFACEull));
1105  const Dqword Expected_v16i8(uint64_t(0xBB55AA6699770088ull),
1106                              uint64_t(0xFF11EE22DD33CC44ull));
1107
1108#define TestImplXmmXmm(Dst, Src, Inst, Ty)                                     \
1109  do {                                                                         \
1110    static constexpr char TestString[] =                                       \
1111        "(" #Dst ", " #Src ", " #Inst ", " #Ty ")";                            \
1112    const uint32_t T0 = allocateDqword();                                      \
1113    const uint32_t T1 = allocateDqword();                                      \
1114                                                                               \
1115    __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1116    __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
1117    __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst,                      \
1118            XmmRegister::Encoded_Reg_##Src);                                   \
1119                                                                               \
1120    AssembledTest test = assemble();                                           \
1121    test.setDqwordTo(T0, V0_##Ty);                                             \
1122    test.setDqwordTo(T1, V1_##Ty);                                             \
1123    test.run();                                                                \
1124                                                                               \
1125    ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString;                \
1126    reset();                                                                   \
1127  } while (0)
1128
1129#define TestImplXmmAddr(Dst, Inst, Ty)                                         \
1130  do {                                                                         \
1131    static constexpr char TestString[] =                                       \
1132        "(" #Dst ", Addr, " #Inst ", " #Ty ")";                                \
1133    const uint32_t T0 = allocateDqword();                                      \
1134    const uint32_t T1 = allocateDqword();                                      \
1135                                                                               \
1136    __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1137    __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1));   \
1138                                                                               \
1139    AssembledTest test = assemble();                                           \
1140    test.setDqwordTo(T0, V0_##Ty);                                             \
1141    test.setDqwordTo(T1, V1_##Ty);                                             \
1142    test.run();                                                                \
1143                                                                               \
1144    ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString;                \
1145    reset();                                                                   \
1146  } while (0)
1147
1148#define TestImpl(Dst, Src)                                                     \
1149  do {                                                                         \
1150    TestImplXmmXmm(Dst, Src, punpckl, v4i32);                                  \
1151    TestImplXmmAddr(Dst, punpckl, v4i32);                                      \
1152    TestImplXmmXmm(Dst, Src, punpckl, v8i16);                                  \
1153    TestImplXmmAddr(Dst, punpckl, v8i16);                                      \
1154    TestImplXmmXmm(Dst, Src, punpckl, v16i8);                                  \
1155    TestImplXmmAddr(Dst, punpckl, v16i8);                                      \
1156  } while (0)
1157
1158  TestImpl(xmm0, xmm1);
1159  TestImpl(xmm1, xmm2);
1160  TestImpl(xmm2, xmm3);
1161  TestImpl(xmm3, xmm4);
1162  TestImpl(xmm4, xmm5);
1163  TestImpl(xmm5, xmm6);
1164  TestImpl(xmm6, xmm7);
1165  TestImpl(xmm7, xmm0);
1166
1167#undef TestImpl
1168#undef TestImplXmmAddr
1169#undef TestImplXmmXmm
1170}
1171
1172TEST_F(AssemblerX8664Test, Packss) {
1173  const Dqword V0_v4i32(uint64_t(0x0001000000001234ull),
1174                        uint64_t(0x7FFFFFFF80000000ull));
1175  const Dqword V1_v4i32(uint64_t(0xFFFFFFFEFFFFFFFFull),
1176                        uint64_t(0x0000800100007FFEull));
1177  const Dqword Expected_v4i32(uint64_t(0x7FFF80007FFF1234ull),
1178                              uint64_t(0x7FFF7FFEFFFEFFFFull));
1179
1180  const Dqword V0_v8i16(uint64_t(0x0001000000120034ull),
1181                        uint64_t(0xFFFEFFFF7FFF8000ull));
1182  const Dqword V1_v8i16(uint64_t(0x00057FF80081007Eull),
1183                        uint64_t(0x0088007700660055ull));
1184  const Dqword Expected_v8i16(uint64_t(0xFEFF7F8001001234ull),
1185                              uint64_t(0x7F776655057F7F7Eull));
1186
1187#define TestImplXmmXmm(Dst, Src, Inst, Ty)                                     \
1188  do {                                                                         \
1189    static constexpr char TestString[] =                                       \
1190        "(" #Dst ", " #Src ", " #Inst ", " #Ty ")";                            \
1191    const uint32_t T0 = allocateDqword();                                      \
1192    const uint32_t T1 = allocateDqword();                                      \
1193                                                                               \
1194    __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1195    __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
1196    __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst,                      \
1197            XmmRegister::Encoded_Reg_##Src);                                   \
1198                                                                               \
1199    AssembledTest test = assemble();                                           \
1200    test.setDqwordTo(T0, V0_##Ty);                                             \
1201    test.setDqwordTo(T1, V1_##Ty);                                             \
1202    test.run();                                                                \
1203                                                                               \
1204    ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString;                \
1205    reset();                                                                   \
1206  } while (0)
1207
1208#define TestImplXmmAddr(Dst, Inst, Ty)                                         \
1209  do {                                                                         \
1210    static constexpr char TestString[] =                                       \
1211        "(" #Dst ", Addr, " #Inst ", " #Ty ")";                                \
1212    const uint32_t T0 = allocateDqword();                                      \
1213    const uint32_t T1 = allocateDqword();                                      \
1214                                                                               \
1215    __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1216    __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1));   \
1217                                                                               \
1218    AssembledTest test = assemble();                                           \
1219    test.setDqwordTo(T0, V0_##Ty);                                             \
1220    test.setDqwordTo(T1, V1_##Ty);                                             \
1221    test.run();                                                                \
1222                                                                               \
1223    ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString;                \
1224    reset();                                                                   \
1225  } while (0)
1226
1227#define TestImpl(Dst, Src)                                                     \
1228  do {                                                                         \
1229    TestImplXmmXmm(Dst, Src, packss, v4i32);                                   \
1230    TestImplXmmAddr(Dst, packss, v4i32);                                       \
1231    TestImplXmmXmm(Dst, Src, packss, v8i16);                                   \
1232    TestImplXmmAddr(Dst, packss, v8i16);                                       \
1233  } while (0)
1234
1235  TestImpl(xmm0, xmm1);
1236  TestImpl(xmm1, xmm2);
1237  TestImpl(xmm2, xmm3);
1238  TestImpl(xmm3, xmm4);
1239  TestImpl(xmm4, xmm5);
1240  TestImpl(xmm5, xmm6);
1241  TestImpl(xmm6, xmm7);
1242  TestImpl(xmm7, xmm0);
1243
1244#undef TestImpl
1245#undef TestImplXmmAddr
1246#undef TestImplXmmXmm
1247}
1248
1249TEST_F(AssemblerX8664Test, Packus) {
1250  const Dqword V0_v4i32(uint64_t(0x0001000000001234ull),
1251                        uint64_t(0x7FFFFFFF80000000ull));
1252  const Dqword V1_v4i32(uint64_t(0xFFFFFFFEFFFFFFFFull),
1253                        uint64_t(0x0000800100007FFEull));
1254  const Dqword Expected_v4i32(uint64_t(0xFFFF0000FFFF1234ull),
1255                              uint64_t(0x80017FFE00000000ull));
1256
1257  const Dqword V0_v8i16(uint64_t(0x0001000000120034ull),
1258                        uint64_t(0xFFFEFFFF7FFF8000ull));
1259  const Dqword V1_v8i16(uint64_t(0x00057FF80081007Eull),
1260                        uint64_t(0x0088007700660055ull));
1261  const Dqword Expected_v8i16(uint64_t(0x0000FF0001001234ull),
1262                              uint64_t(0x8877665505FF817Eull));
1263
1264#define TestImplXmmXmm(Dst, Src, Inst, Ty)                                     \
1265  do {                                                                         \
1266    static constexpr char TestString[] =                                       \
1267        "(" #Dst ", " #Src ", " #Inst ", " #Ty ")";                            \
1268    const uint32_t T0 = allocateDqword();                                      \
1269    const uint32_t T1 = allocateDqword();                                      \
1270                                                                               \
1271    __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1272    __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
1273    __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst,                      \
1274            XmmRegister::Encoded_Reg_##Src);                                   \
1275                                                                               \
1276    AssembledTest test = assemble();                                           \
1277    test.setDqwordTo(T0, V0_##Ty);                                             \
1278    test.setDqwordTo(T1, V1_##Ty);                                             \
1279    test.run();                                                                \
1280                                                                               \
1281    ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString;                \
1282    reset();                                                                   \
1283  } while (0)
1284
1285#define TestImplXmmAddr(Dst, Inst, Ty)                                         \
1286  do {                                                                         \
1287    static constexpr char TestString[] =                                       \
1288        "(" #Dst ", Addr, " #Inst ", " #Ty ")";                                \
1289    const uint32_t T0 = allocateDqword();                                      \
1290    const uint32_t T1 = allocateDqword();                                      \
1291                                                                               \
1292    __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1293    __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1));   \
1294                                                                               \
1295    AssembledTest test = assemble();                                           \
1296    test.setDqwordTo(T0, V0_##Ty);                                             \
1297    test.setDqwordTo(T1, V1_##Ty);                                             \
1298    test.run();                                                                \
1299                                                                               \
1300    ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString;                \
1301    reset();                                                                   \
1302  } while (0)
1303
1304#define TestImpl(Dst, Src)                                                     \
1305  do {                                                                         \
1306    TestImplXmmXmm(Dst, Src, packus, v4i32);                                   \
1307    TestImplXmmAddr(Dst, packus, v4i32);                                       \
1308    TestImplXmmXmm(Dst, Src, packus, v8i16);                                   \
1309    TestImplXmmAddr(Dst, packus, v8i16);                                       \
1310  } while (0)
1311
1312  TestImpl(xmm0, xmm1);
1313  TestImpl(xmm1, xmm2);
1314  TestImpl(xmm2, xmm3);
1315  TestImpl(xmm3, xmm4);
1316  TestImpl(xmm4, xmm5);
1317  TestImpl(xmm5, xmm6);
1318  TestImpl(xmm6, xmm7);
1319  TestImpl(xmm7, xmm0);
1320
1321#undef TestImpl
1322#undef TestImplXmmAddr
1323#undef TestImplXmmXmm
1324}
1325
1326TEST_F(AssemblerX8664Test, Pshufb) {
1327  const Dqword V0(uint64_t(0x1122334455667788ull),
1328                  uint64_t(0x99aabbccddeeff32ull));
1329  const Dqword V1(uint64_t(0x0204050380060708ull),
1330                  uint64_t(0x010306080a8b0c0dull));
1331
1332  const Dqword Expected(uint64_t(0x6644335500221132ull),
1333                        uint64_t(0x77552232ee00ccbbull));
1334
1335#define TestImplXmmXmm(Dst, Src, Inst)                                         \
1336  do {                                                                         \
1337    static constexpr char TestString[] = "(" #Dst ", " #Src ", " #Inst ")";    \
1338    const uint32_t T0 = allocateDqword();                                      \
1339    const uint32_t T1 = allocateDqword();                                      \
1340                                                                               \
1341    __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1342    __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
1343    __ Inst(IceType_void, XmmRegister::Encoded_Reg_##Dst,                      \
1344            XmmRegister::Encoded_Reg_##Src);                                   \
1345                                                                               \
1346    AssembledTest test = assemble();                                           \
1347    test.setDqwordTo(T0, V0);                                                  \
1348    test.setDqwordTo(T1, V1);                                                  \
1349    test.run();                                                                \
1350                                                                               \
1351    ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
1352    reset();                                                                   \
1353  } while (0)
1354
1355#define TestImplXmmAddr(Dst, Inst)                                             \
1356  do {                                                                         \
1357    static constexpr char TestString[] = "(" #Dst ", Addr, " #Inst ")";        \
1358    const uint32_t T0 = allocateDqword();                                      \
1359    const uint32_t T1 = allocateDqword();                                      \
1360                                                                               \
1361    __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1362    __ Inst(IceType_void, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1));   \
1363                                                                               \
1364    AssembledTest test = assemble();                                           \
1365    test.setDqwordTo(T0, V0);                                                  \
1366    test.setDqwordTo(T1, V1);                                                  \
1367    test.run();                                                                \
1368                                                                               \
1369    ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
1370    reset();                                                                   \
1371  } while (0)
1372
1373#define TestImpl(Dst, Src)                                                     \
1374  do {                                                                         \
1375    TestImplXmmXmm(Dst, Src, pshufb);                                          \
1376    TestImplXmmAddr(Dst, pshufb);                                              \
1377  } while (0)
1378
1379  TestImpl(xmm0, xmm1);
1380  TestImpl(xmm1, xmm2);
1381  TestImpl(xmm2, xmm3);
1382  TestImpl(xmm3, xmm4);
1383  TestImpl(xmm4, xmm5);
1384  TestImpl(xmm5, xmm6);
1385  TestImpl(xmm6, xmm7);
1386  TestImpl(xmm7, xmm8);
1387  TestImpl(xmm8, xmm9);
1388  TestImpl(xmm9, xmm10);
1389  TestImpl(xmm10, xmm11);
1390  TestImpl(xmm11, xmm12);
1391  TestImpl(xmm12, xmm13);
1392  TestImpl(xmm13, xmm14);
1393  TestImpl(xmm14, xmm15);
1394  TestImpl(xmm15, xmm0);
1395
1396#undef TestImpl
1397#undef TestImplXmmAddr
1398#undef TestImplXmmXmm
1399}
1400
1401TEST_F(AssemblerX8664Test, Cvt) {
1402  const Dqword dq2ps32DstValue(-1.0f, -1.0f, -1.0f, -1.0f);
1403  const Dqword dq2ps32SrcValue(-5, 3, 100, 200);
1404  const Dqword dq2ps32Expected(-5.0f, 3.0f, 100.0, 200.0);
1405
1406  const Dqword dq2ps64DstValue(0.0f, 0.0f, -1.0f, -1.0f);
1407  const Dqword dq2ps64SrcValue(-5, 3, 100, 200);
1408  const Dqword dq2ps64Expected(-5.0f, 3.0f, 100.0, 200.0);
1409
1410  const Dqword tps2dq32DstValue(-1.0f, -1.0f, -1.0f, -1.0f);
1411  const Dqword tps2dq32SrcValue(-5.0f, 3.0f, 100.0, 200.0);
1412  const Dqword tps2dq32Expected(-5, 3, 100, 200);
1413
1414  const Dqword tps2dq64DstValue(-1.0f, -1.0f, -1.0f, -1.0f);
1415  const Dqword tps2dq64SrcValue(-5.0f, 3.0f, 100.0, 200.0);
1416  const Dqword tps2dq64Expected(-5, 3, 100, 200);
1417
1418  const Dqword si2ss32DstValue(-1.0f, -1.0f, -1.0f, -1.0f);
1419  const int32_t si2ss32SrcValue = 5;
1420  const Dqword si2ss32Expected(5.0f, -1.0f, -1.0f, -1.0f);
1421
1422  const Dqword si2ss64DstValue(-1.0, -1.0);
1423  const int32_t si2ss64SrcValue = 5;
1424  const Dqword si2ss64Expected(5.0, -1.0);
1425
1426  const int32_t tss2si32DstValue = 0xF00F0FF0;
1427  const Dqword tss2si32SrcValue(-5.0f, -1.0f, -1.0f, -1.0f);
1428  const int32_t tss2si32Expected = -5;
1429
1430  const int32_t tss2si64DstValue = 0xF00F0FF0;
1431  const Dqword tss2si64SrcValue(-5.0, -1.0);
1432  const int32_t tss2si64Expected = -5;
1433
1434  const Dqword float2float32DstValue(-1.0, -1.0);
1435  const Dqword float2float32SrcValue(-5.0, 3, 100, 200);
1436  const Dqword float2float32Expected(-5.0, -1.0);
1437
1438  const Dqword float2float64DstValue(-1.0, -1.0, -1.0, -1.0);
1439  const Dqword float2float64SrcValue(-5.0, 3.0);
1440  const Dqword float2float64Expected(-5.0, -1.0, -1.0, -1.0);
1441
1442#define TestImplPXmmXmm(Dst, Src, Inst, Size)                                  \
1443  do {                                                                         \
1444    static constexpr char TestString[] =                                       \
1445        "(" #Dst ", " #Src ", cvt" #Inst ", f" #Size ")";                      \
1446    const uint32_t T0 = allocateDqword();                                      \
1447    const uint32_t T1 = allocateDqword();                                      \
1448                                                                               \
1449    __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
1450    __ movups(Encoded_Xmm_##Src(), dwordAddress(T1));                          \
1451    __ cvt##Inst(IceType_f##Size, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src());   \
1452                                                                               \
1453    AssembledTest test = assemble();                                           \
1454    test.setDqwordTo(T0, Inst##Size##DstValue);                                \
1455    test.setDqwordTo(T1, Inst##Size##SrcValue);                                \
1456    test.run();                                                                \
1457                                                                               \
1458    ASSERT_EQ(Inst##Size##Expected, test.Dst<Dqword>()) << TestString;         \
1459    reset();                                                                   \
1460  } while (0)
1461
1462#define TestImplSXmmReg(Dst, GPR, Inst, Size, IntType)                         \
1463  do {                                                                         \
1464    static constexpr char TestString[] =                                       \
1465        "(" #Dst ", " #GPR ", cvt" #Inst ", " #IntType ", f" #Size ")";        \
1466    const uint32_t T0 = allocateDqword();                                      \
1467                                                                               \
1468    __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
1469    __ mov(IceType_i32, Encoded_GPR_##GPR(), Immediate(Inst##Size##SrcValue)); \
1470    __ cvt##Inst(IceType_f##Size, Encoded_Xmm_##Dst(), IntType,                \
1471                 Encoded_GPR_##GPR());                                         \
1472                                                                               \
1473    AssembledTest test = assemble();                                           \
1474    test.setDqwordTo(T0, Inst##Size##DstValue);                                \
1475    test.run();                                                                \
1476                                                                               \
1477    ASSERT_EQ(Inst##Size##Expected, test.Dst<Dqword>()) << TestString;         \
1478    reset();                                                                   \
1479  } while (0)
1480
1481#define TestImplSRegXmm(GPR, Src, Inst, IntSize, Size)                         \
1482  do {                                                                         \
1483    static constexpr char TestString[] =                                       \
1484        "(" #GPR ", " #Src ", cvt" #Inst ", " #IntSize ", f" #Size ")";        \
1485    const uint32_t T0 = allocateDqword();                                      \
1486                                                                               \
1487    __ mov(IceType_i32, Encoded_GPR_##GPR(), Immediate(Inst##Size##DstValue)); \
1488    __ movups(Encoded_Xmm_##Src(), dwordAddress(T0));                          \
1489    __ cvt##Inst(IceType_i##IntSize, Encoded_GPR_##GPR(), IceType_f##Size,     \
1490                 Encoded_Xmm_##Src());                                         \
1491                                                                               \
1492    AssembledTest test = assemble();                                           \
1493    test.setDqwordTo(T0, Inst##Size##SrcValue);                                \
1494    test.run();                                                                \
1495                                                                               \
1496    ASSERT_EQ(static_cast<uint##IntSize##_t>(Inst##Size##Expected),            \
1497              test.GPR())                                                      \
1498        << TestString;                                                         \
1499    reset();                                                                   \
1500  } while (0)
1501
1502#define TestImplPXmmAddr(Dst, Inst, Size)                                      \
1503  do {                                                                         \
1504    static constexpr char TestString[] =                                       \
1505        "(" #Dst ", Addr, cvt" #Inst ", f" #Size ")";                          \
1506    const uint32_t T0 = allocateDqword();                                      \
1507    const uint32_t T1 = allocateDqword();                                      \
1508                                                                               \
1509    __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
1510    __ cvt##Inst(IceType_f##Size, Encoded_Xmm_##Dst(), dwordAddress(T1));      \
1511                                                                               \
1512    AssembledTest test = assemble();                                           \
1513    test.setDqwordTo(T0, Inst##Size##DstValue);                                \
1514    test.setDqwordTo(T1, Inst##Size##SrcValue);                                \
1515    test.run();                                                                \
1516                                                                               \
1517    ASSERT_EQ(Inst##Size##Expected, test.Dst<Dqword>()) << TestString;         \
1518    reset();                                                                   \
1519  } while (0)
1520
1521#define TestImplSXmmAddr(Dst, Inst, Size, IntType)                             \
1522  do {                                                                         \
1523    static constexpr char TestString[] =                                       \
1524        "(" #Dst ", Addr, cvt" #Inst ", f" #Size ", " #IntType ")";            \
1525    const uint32_t T0 = allocateDqword();                                      \
1526    const uint32_t T1 = allocateDword();                                       \
1527                                                                               \
1528    __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
1529    __ cvt##Inst(IceType_f##Size, Encoded_Xmm_##Dst(), IntType,                \
1530                 dwordAddress(T1));                                            \
1531                                                                               \
1532    AssembledTest test = assemble();                                           \
1533    test.setDqwordTo(T0, Inst##Size##DstValue);                                \
1534    test.setDwordTo(T1, Inst##Size##SrcValue);                                 \
1535    test.run();                                                                \
1536                                                                               \
1537    ASSERT_EQ(Inst##Size##Expected, test.Dst<Dqword>()) << TestString;         \
1538    reset();                                                                   \
1539  } while (0)
1540
1541#define TestImplSRegAddr(GPR, Inst, IntSize, Size)                             \
1542  do {                                                                         \
1543    static constexpr char TestString[] =                                       \
1544        "(" #GPR ", Addr, cvt" #Inst ", f" #Size ", " #IntSize ")";            \
1545    const uint32_t T0 = allocateDqword();                                      \
1546                                                                               \
1547    __ mov(IceType_i32, Encoded_GPR_##GPR(), Immediate(Inst##Size##DstValue)); \
1548    __ cvt##Inst(IceType_i##IntSize, Encoded_GPR_##GPR(), IceType_f##Size,     \
1549                 dwordAddress(T0));                                            \
1550                                                                               \
1551    AssembledTest test = assemble();                                           \
1552    test.setDqwordTo(T0, Inst##Size##SrcValue);                                \
1553    test.run();                                                                \
1554                                                                               \
1555    ASSERT_EQ(static_cast<uint##IntSize##_t>(Inst##Size##Expected),            \
1556              test.GPR())                                                      \
1557        << TestString;                                                         \
1558    reset();                                                                   \
1559  } while (0)
1560
1561#define TestImplSize(Dst, Src, GPR, Size)                                      \
1562  do {                                                                         \
1563    TestImplPXmmXmm(Dst, Src, dq2ps, Size);                                    \
1564    TestImplPXmmAddr(Src, dq2ps, Size);                                        \
1565    TestImplPXmmXmm(Dst, Src, tps2dq, Size);                                   \
1566    TestImplPXmmAddr(Src, tps2dq, Size);                                       \
1567    TestImplSXmmReg(Dst, GPR, si2ss, Size, IceType_i32);                       \
1568    TestImplSXmmReg(Dst, GPR, si2ss, Size, IceType_i64);                       \
1569    TestImplSXmmAddr(Dst, si2ss, Size, IceType_i32);                           \
1570    TestImplSXmmAddr(Dst, si2ss, Size, IceType_i64);                           \
1571    TestImplSRegXmm(GPR, Src, tss2si, 32, Size);                               \
1572    TestImplSRegXmm(GPR, Src, tss2si, 64, Size);                               \
1573    TestImplSRegAddr(GPR, tss2si, 32, Size);                                   \
1574    TestImplSRegAddr(GPR, tss2si, 64, Size);                                   \
1575    TestImplPXmmXmm(Dst, Src, float2float, Size);                              \
1576    TestImplPXmmAddr(Src, float2float, Size);                                  \
1577  } while (0)
1578
1579#define TestImpl(Dst, Src, GPR)                                                \
1580  do {                                                                         \
1581    TestImplSize(Dst, Src, GPR, 32);                                           \
1582    TestImplSize(Dst, Src, GPR, 64);                                           \
1583  } while (0)
1584
1585  TestImpl(xmm0, xmm1, r1);
1586  TestImpl(xmm1, xmm2, r2);
1587  TestImpl(xmm2, xmm3, r3);
1588  TestImpl(xmm3, xmm4, r4);
1589  TestImpl(xmm4, xmm5, r5);
1590  TestImpl(xmm5, xmm6, r6);
1591  TestImpl(xmm6, xmm7, r7);
1592  TestImpl(xmm7, xmm8, r8);
1593  TestImpl(xmm8, xmm9, r10);
1594  TestImpl(xmm9, xmm10, r11);
1595  TestImpl(xmm10, xmm11, r12);
1596  TestImpl(xmm11, xmm12, r13);
1597  TestImpl(xmm12, xmm13, r14);
1598  TestImpl(xmm13, xmm14, r15);
1599  TestImpl(xmm14, xmm15, r1);
1600  TestImpl(xmm15, xmm0, r2);
1601
1602#undef TestImpl
1603#undef TestImplSize
1604#undef TestImplSRegAddr
1605#undef TestImplSXmmAddr
1606#undef TestImplPXmmAddr
1607#undef TestImplSRegXmm
1608#undef TestImplSXmmReg
1609#undef TestImplPXmmXmm
1610}
1611
1612TEST_F(AssemblerX8664Test, Ucomiss) {
1613  static constexpr float qnan32 = std::numeric_limits<float>::quiet_NaN();
1614  static constexpr double qnan64 = std::numeric_limits<float>::quiet_NaN();
1615
1616  Dqword test32DstValue(0.0, qnan32, qnan32, qnan32);
1617  Dqword test32SrcValue(0.0, qnan32, qnan32, qnan32);
1618
1619  Dqword test64DstValue(0.0, qnan64);
1620  Dqword test64SrcValue(0.0, qnan64);
1621
1622#define TestImplXmmXmm(Dst, Value0, Src, Value1, Size, CompType, BParity,      \
1623                       BOther)                                                 \
1624  do {                                                                         \
1625    static constexpr char NearBranch = AssemblerX8664::kNearJump;              \
1626    static constexpr char TestString[] =                                       \
1627        "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Size ", " #CompType \
1628        ", " #BParity ", " #BOther ")";                                        \
1629    const uint32_t T0 = allocateDqword();                                      \
1630    test##Size##DstValue.F##Size[0] = Value0;                                  \
1631    const uint32_t T1 = allocateDqword();                                      \
1632    test##Size##SrcValue.F##Size[0] = Value1;                                  \
1633    const uint32_t ImmIfTrue = 0xBEEF;                                         \
1634    const uint32_t ImmIfFalse = 0xC0FFE;                                       \
1635                                                                               \
1636    __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
1637    __ movups(Encoded_Xmm_##Src(), dwordAddress(T1));                          \
1638    __ mov(IceType_i32, GPRRegister::Encoded_Reg_eax, Immediate(ImmIfFalse));  \
1639    __ ucomiss(IceType_f##Size, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src());     \
1640    Label Done;                                                                \
1641    __ j(Cond::Br_##BParity, &Done, NearBranch);                               \
1642    __ j(Cond::Br_##BOther, &Done, NearBranch);                                \
1643    __ mov(IceType_i32, GPRRegister::Encoded_Reg_eax, Immediate(ImmIfTrue));   \
1644    __ bind(&Done);                                                            \
1645                                                                               \
1646    AssembledTest test = assemble();                                           \
1647    test.setDqwordTo(T0, test##Size##DstValue);                                \
1648    test.setDqwordTo(T1, test##Size##SrcValue);                                \
1649    test.run();                                                                \
1650                                                                               \
1651    ASSERT_EQ(ImmIfTrue, test.eax()) << TestString;                            \
1652    reset();                                                                   \
1653  } while (0)
1654
1655#define TestImplXmmAddr(Dst, Value0, Value1, Size, CompType, BParity, BOther)  \
1656  do {                                                                         \
1657    static constexpr char NearBranch = AssemblerX8664::kNearJump;              \
1658    static constexpr char TestString[] =                                       \
1659        "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Size ", " #CompType     \
1660        ", " #BParity ", " #BOther ")";                                        \
1661    const uint32_t T0 = allocateDqword();                                      \
1662    test##Size##DstValue.F##Size[0] = Value0;                                  \
1663    const uint32_t T1 = allocateDqword();                                      \
1664    test##Size##SrcValue.F##Size[0] = Value1;                                  \
1665    const uint32_t ImmIfTrue = 0xBEEF;                                         \
1666    const uint32_t ImmIfFalse = 0xC0FFE;                                       \
1667                                                                               \
1668    __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
1669    __ mov(IceType_i32, GPRRegister::Encoded_Reg_eax, Immediate(ImmIfFalse));  \
1670    __ ucomiss(IceType_f##Size, Encoded_Xmm_##Dst(), dwordAddress(T1));        \
1671    Label Done;                                                                \
1672    __ j(Cond::Br_##BParity, &Done, NearBranch);                               \
1673    __ j(Cond::Br_##BOther, &Done, NearBranch);                                \
1674    __ mov(IceType_i32, GPRRegister::Encoded_Reg_eax, Immediate(ImmIfTrue));   \
1675    __ bind(&Done);                                                            \
1676                                                                               \
1677    AssembledTest test = assemble();                                           \
1678    test.setDqwordTo(T0, test##Size##DstValue);                                \
1679    test.setDqwordTo(T1, test##Size##SrcValue);                                \
1680    test.run();                                                                \
1681                                                                               \
1682    ASSERT_EQ(ImmIfTrue, test.eax()) << TestString;                            \
1683    reset();                                                                   \
1684  } while (0)
1685
1686#define TestImplCond(Dst, Value0, Src, Value1, Size, CompType, BParity,        \
1687                     BOther)                                                   \
1688  do {                                                                         \
1689    TestImplXmmXmm(Dst, Value0, Src, Value1, Size, CompType, BParity, BOther); \
1690    TestImplXmmAddr(Dst, Value0, Value1, Size, CompType, BParity, BOther);     \
1691  } while (0)
1692
1693#define TestImplSize(Dst, Src, Size)                                           \
1694  do {                                                                         \
1695    TestImplCond(Dst, 1.0, Src, 1.0, Size, isEq, p, ne);                       \
1696    TestImplCond(Dst, 1.0, Src, 2.0, Size, isNe, p, e);                        \
1697    TestImplCond(Dst, 1.0, Src, 2.0, Size, isLe, p, a);                        \
1698    TestImplCond(Dst, 1.0, Src, 1.0, Size, isLe, p, a);                        \
1699    TestImplCond(Dst, 1.0, Src, 2.0, Size, isLt, p, ae);                       \
1700    TestImplCond(Dst, 2.0, Src, 1.0, Size, isGe, p, b);                        \
1701    TestImplCond(Dst, 1.0, Src, 1.0, Size, isGe, p, b);                        \
1702    TestImplCond(Dst, 2.0, Src, 1.0, Size, isGt, p, be);                       \
1703    TestImplCond(Dst, qnan##Size, Src, 1.0, Size, isUnord, np, o);             \
1704    TestImplCond(Dst, 1.0, Src, qnan##Size, Size, isUnord, np, s);             \
1705    TestImplCond(Dst, qnan##Size, Src, qnan##Size, Size, isUnord, np, s);      \
1706  } while (0)
1707
1708#define TestImpl(Dst, Src)                                                     \
1709  do {                                                                         \
1710    TestImplSize(Dst, Src, 32);                                                \
1711    TestImplSize(Dst, Src, 64);                                                \
1712  } while (0)
1713
1714  TestImpl(xmm0, xmm1);
1715  TestImpl(xmm1, xmm2);
1716  TestImpl(xmm2, xmm3);
1717  TestImpl(xmm3, xmm4);
1718  TestImpl(xmm4, xmm5);
1719  TestImpl(xmm5, xmm6);
1720  TestImpl(xmm6, xmm7);
1721  TestImpl(xmm7, xmm8);
1722  TestImpl(xmm8, xmm9);
1723  TestImpl(xmm9, xmm10);
1724  TestImpl(xmm10, xmm11);
1725  TestImpl(xmm11, xmm12);
1726  TestImpl(xmm12, xmm13);
1727  TestImpl(xmm13, xmm14);
1728  TestImpl(xmm14, xmm15);
1729  TestImpl(xmm15, xmm0);
1730
1731#undef TestImpl
1732#undef TestImplSize
1733#undef TestImplCond
1734#undef TestImplXmmAddr
1735#undef TestImplXmmXmm
1736}
1737
1738TEST_F(AssemblerX8664Test, Sqrtss) {
1739  Dqword test32SrcValue(-100.0, -100.0, -100.0, -100.0);
1740  Dqword test32DstValue(-1.0, -1.0, -1.0, -1.0);
1741
1742  Dqword test64SrcValue(-100.0, -100.0);
1743  Dqword test64DstValue(-1.0, -1.0);
1744
1745#define TestSqrtssXmmXmm(Dst, Src, Value1, Result, Size)                       \
1746  do {                                                                         \
1747    static constexpr char TestString[] =                                       \
1748        "(" #Dst ", " #Src ", " #Value1 ", " #Result ", " #Size ")";           \
1749    const uint32_t T0 = allocateDqword();                                      \
1750    test##Size##SrcValue.F##Size[0] = Value1;                                  \
1751    const uint32_t T1 = allocateDqword();                                      \
1752                                                                               \
1753    __ movups(Encoded_Xmm_##Src(), dwordAddress(T0));                          \
1754    __ movups(Encoded_Xmm_##Dst(), dwordAddress(T1));                          \
1755    __ sqrt(IceType_f##Size, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src());        \
1756                                                                               \
1757    AssembledTest test = assemble();                                           \
1758    test.setDqwordTo(T0, test##Size##SrcValue);                                \
1759    test.setDqwordTo(T1, test##Size##DstValue);                                \
1760    test.run();                                                                \
1761                                                                               \
1762    Dqword Expected = test##Size##DstValue;                                    \
1763    Expected.F##Size[0] = Result;                                              \
1764    ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
1765    reset();                                                                   \
1766  } while (0)
1767
1768#define TestSqrtssXmmAddr(Dst, Value1, Result, Size)                           \
1769  do {                                                                         \
1770    static constexpr char TestString[] =                                       \
1771        "(" #Dst ", Addr, " #Value1 ", " #Result ", " #Size ")";               \
1772    const uint32_t T0 = allocateDqword();                                      \
1773    test##Size##SrcValue.F##Size[0] = Value1;                                  \
1774    const uint32_t T1 = allocateDqword();                                      \
1775                                                                               \
1776    __ movups(Encoded_Xmm_##Dst(), dwordAddress(T1));                          \
1777    __ sqrt(IceType_f##Size, Encoded_Xmm_##Dst(), dwordAddress(T0));           \
1778                                                                               \
1779    AssembledTest test = assemble();                                           \
1780    test.setDqwordTo(T0, test##Size##SrcValue);                                \
1781    test.setDqwordTo(T1, test##Size##DstValue);                                \
1782    test.run();                                                                \
1783                                                                               \
1784    Dqword Expected = test##Size##DstValue;                                    \
1785    Expected.F##Size[0] = Result;                                              \
1786    ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
1787    reset();                                                                   \
1788  } while (0)
1789
1790#define TestSqrtssSize(Dst, Src, Size)                                         \
1791  do {                                                                         \
1792    TestSqrtssXmmXmm(Dst, Src, 4.0, 2.0, Size);                                \
1793    TestSqrtssXmmAddr(Dst, 4.0, 2.0, Size);                                    \
1794    TestSqrtssXmmXmm(Dst, Src, 9.0, 3.0, Size);                                \
1795    TestSqrtssXmmAddr(Dst, 9.0, 3.0, Size);                                    \
1796    TestSqrtssXmmXmm(Dst, Src, 100.0, 10.0, Size);                             \
1797    TestSqrtssXmmAddr(Dst, 100.0, 10.0, Size);                                 \
1798  } while (0)
1799
1800#define TestSqrtss(Dst, Src)                                                   \
1801  do {                                                                         \
1802    TestSqrtssSize(Dst, Src, 32);                                              \
1803    TestSqrtssSize(Dst, Src, 64);                                              \
1804  } while (0)
1805
1806  TestSqrtss(xmm0, xmm1);
1807  TestSqrtss(xmm1, xmm2);
1808  TestSqrtss(xmm2, xmm3);
1809  TestSqrtss(xmm3, xmm4);
1810  TestSqrtss(xmm4, xmm5);
1811  TestSqrtss(xmm5, xmm6);
1812  TestSqrtss(xmm6, xmm7);
1813  TestSqrtss(xmm7, xmm8);
1814  TestSqrtss(xmm8, xmm9);
1815  TestSqrtss(xmm9, xmm10);
1816  TestSqrtss(xmm10, xmm11);
1817  TestSqrtss(xmm11, xmm12);
1818  TestSqrtss(xmm12, xmm13);
1819  TestSqrtss(xmm13, xmm14);
1820  TestSqrtss(xmm14, xmm15);
1821  TestSqrtss(xmm15, xmm0);
1822
1823#undef TestSqrtss
1824#undef TestSqrtssSize
1825#undef TestSqrtssXmmAddr
1826#undef TestSqrtssXmmXmm
1827}
1828
1829TEST_F(AssemblerX8664Test, Insertps) {
1830#define TestInsertpsXmmXmmImm(Dst, Value0, Src, Value1, Imm, Expected)         \
1831  do {                                                                         \
1832    static constexpr char TestString[] =                                       \
1833        "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Imm ", " #Expected  \
1834        ")";                                                                   \
1835    const uint32_t T0 = allocateDqword();                                      \
1836    const Dqword V0 Value0;                                                    \
1837    const uint32_t T1 = allocateDqword();                                      \
1838    const Dqword V1 Value1;                                                    \
1839                                                                               \
1840    __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
1841    __ movups(Encoded_Xmm_##Src(), dwordAddress(T1));                          \
1842    __ insertps(IceType_v4f32, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src(),       \
1843                Immediate(Imm));                                               \
1844                                                                               \
1845    AssembledTest test = assemble();                                           \
1846    test.setDqwordTo(T0, V0);                                                  \
1847    test.setDqwordTo(T1, V1);                                                  \
1848    test.run();                                                                \
1849                                                                               \
1850    ASSERT_EQ(Dqword Expected, test.Dst<Dqword>()) << TestString;              \
1851    reset();                                                                   \
1852  } while (0)
1853
1854#define TestInsertpsXmmAddrImm(Dst, Value0, Value1, Imm, Expected)             \
1855  do {                                                                         \
1856    static constexpr char TestString[] =                                       \
1857        "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Imm ", " #Expected ")"; \
1858    const uint32_t T0 = allocateDqword();                                      \
1859    const Dqword V0 Value0;                                                    \
1860    const uint32_t T1 = allocateDqword();                                      \
1861    const Dqword V1 Value1;                                                    \
1862                                                                               \
1863    __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
1864    __ insertps(IceType_v4f32, Encoded_Xmm_##Dst(), dwordAddress(T1),          \
1865                Immediate(Imm));                                               \
1866                                                                               \
1867    AssembledTest test = assemble();                                           \
1868    test.setDqwordTo(T0, V0);                                                  \
1869    test.setDqwordTo(T1, V1);                                                  \
1870    test.run();                                                                \
1871                                                                               \
1872    ASSERT_EQ(Dqword Expected, test.Dst<Dqword>()) << TestString;              \
1873    reset();                                                                   \
1874  } while (0)
1875
1876#define TestInsertps(Dst, Src)                                                 \
1877  do {                                                                         \
1878    TestInsertpsXmmXmmImm(                                                     \
1879        Dst, (uint64_t(-1), uint64_t(-1)), Src,                                \
1880        (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xCCCCCCCCDDDDDDDDull)),    \
1881        0x99,                                                                  \
1882        (uint64_t(0xDDDDDDDD00000000ull), uint64_t(0x00000000FFFFFFFFull)));   \
1883    TestInsertpsXmmAddrImm(                                                    \
1884        Dst, (uint64_t(-1), uint64_t(-1)),                                     \
1885        (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xCCCCCCCCDDDDDDDDull)),    \
1886        0x99,                                                                  \
1887        (uint64_t(0xBBBBBBBB00000000ull), uint64_t(0x00000000FFFFFFFFull)));   \
1888    TestInsertpsXmmXmmImm(                                                     \
1889        Dst, (uint64_t(-1), uint64_t(-1)), Src,                                \
1890        (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xCCCCCCCCDDDDDDDDull)),    \
1891        0x9D,                                                                  \
1892        (uint64_t(0xDDDDDDDD00000000ull), uint64_t(0x0000000000000000ull)));   \
1893    TestInsertpsXmmAddrImm(                                                    \
1894        Dst, (uint64_t(-1), uint64_t(-1)),                                     \
1895        (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xCCCCCCCCDDDDDDDDull)),    \
1896        0x9D,                                                                  \
1897        (uint64_t(0xBBBBBBBB00000000ull), uint64_t(0x0000000000000000ull)));   \
1898  } while (0)
1899
1900  TestInsertps(xmm0, xmm1);
1901  TestInsertps(xmm1, xmm2);
1902  TestInsertps(xmm2, xmm3);
1903  TestInsertps(xmm3, xmm4);
1904  TestInsertps(xmm4, xmm5);
1905  TestInsertps(xmm5, xmm6);
1906  TestInsertps(xmm6, xmm7);
1907  TestInsertps(xmm7, xmm8);
1908  TestInsertps(xmm8, xmm9);
1909  TestInsertps(xmm9, xmm10);
1910  TestInsertps(xmm10, xmm11);
1911  TestInsertps(xmm11, xmm12);
1912  TestInsertps(xmm12, xmm13);
1913  TestInsertps(xmm13, xmm14);
1914  TestInsertps(xmm14, xmm15);
1915  TestInsertps(xmm15, xmm0);
1916
1917#undef TestInsertps
1918#undef TestInsertpsXmmXmmAddr
1919#undef TestInsertpsXmmXmmImm
1920}
1921
1922TEST_F(AssemblerX8664Test, Pinsr) {
1923  static constexpr uint8_t Mask32 = 0x03;
1924  static constexpr uint8_t Mask16 = 0x07;
1925  static constexpr uint8_t Mask8 = 0x0F;
1926
1927#define TestPinsrXmmGPRImm(Dst, Value0, GPR, Value1, Imm, Size)                \
1928  do {                                                                         \
1929    static constexpr char TestString[] =                                       \
1930        "(" #Dst ", " #Value0 ", " #GPR ", " #Value1 ", " #Imm ", " #Size ")"; \
1931    const uint32_t T0 = allocateDqword();                                      \
1932    const Dqword V0 Value0;                                                    \
1933                                                                               \
1934    __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
1935    __ mov(IceType_i32, Encoded_GPR_##GPR(), Immediate(Value1));               \
1936    __ pinsr(IceType_i##Size, Encoded_Xmm_##Dst(), Encoded_GPR_##GPR(),        \
1937             Immediate(Imm));                                                  \
1938                                                                               \
1939    AssembledTest test = assemble();                                           \
1940    test.setDqwordTo(T0, V0);                                                  \
1941    test.run();                                                                \
1942                                                                               \
1943    constexpr uint8_t sel = (Imm)&Mask##Size;                                  \
1944    Dqword Expected = V0;                                                      \
1945    Expected.U##Size[sel] = Value1;                                            \
1946    ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
1947    reset();                                                                   \
1948  } while (0)
1949
1950#define TestPinsrXmmAddrImm(Dst, Value0, Value1, Imm, Size)                    \
1951  do {                                                                         \
1952    static constexpr char TestString[] =                                       \
1953        "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Imm ", " #Size ")";     \
1954    const uint32_t T0 = allocateDqword();                                      \
1955    const Dqword V0 Value0;                                                    \
1956    const uint32_t T1 = allocateDword();                                       \
1957    const uint32_t V1 = Value1;                                                \
1958                                                                               \
1959    __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
1960    __ pinsr(IceType_i##Size, Encoded_Xmm_##Dst(), dwordAddress(T1),           \
1961             Immediate(Imm));                                                  \
1962                                                                               \
1963    AssembledTest test = assemble();                                           \
1964    test.setDqwordTo(T0, V0);                                                  \
1965    test.setDwordTo(T1, V1);                                                   \
1966    test.run();                                                                \
1967                                                                               \
1968    constexpr uint8_t sel = (Imm)&Mask##Size;                                  \
1969    Dqword Expected = V0;                                                      \
1970    Expected.U##Size[sel] = Value1;                                            \
1971    ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
1972    reset();                                                                   \
1973  } while (0)
1974
1975#define TestPinsrSize(Dst, GPR, Value1, Imm, Size)                             \
1976  do {                                                                         \
1977    TestPinsrXmmGPRImm(Dst, (uint64_t(0xAAAAAAAABBBBBBBBull),                  \
1978                             uint64_t(0xFFFFFFFFDDDDDDDDull)),                 \
1979                       GPR, Value1, Imm, Size);                                \
1980    TestPinsrXmmAddrImm(Dst, (uint64_t(0xAAAAAAAABBBBBBBBull),                 \
1981                              uint64_t(0xFFFFFFFFDDDDDDDDull)),                \
1982                        Value1, Imm, Size);                                    \
1983  } while (0)
1984
1985#define TestPinsr(Src, Dst)                                                    \
1986  do {                                                                         \
1987    TestPinsrSize(Src, Dst, 0xEE, 0x03, 8);                                    \
1988    TestPinsrSize(Src, Dst, 0xFFEE, 0x03, 16);                                 \
1989    TestPinsrSize(Src, Dst, 0xC0FFEE, 0x03, 32);                               \
1990  } while (0)
1991
1992  TestPinsr(xmm0, r1);
1993  TestPinsr(xmm1, r2);
1994  TestPinsr(xmm2, r3);
1995  TestPinsr(xmm3, r4);
1996  TestPinsr(xmm4, r5);
1997  TestPinsr(xmm5, r6);
1998  TestPinsr(xmm6, r7);
1999  TestPinsr(xmm7, r8);
2000  TestPinsr(xmm8, r10);
2001  TestPinsr(xmm9, r11);
2002  TestPinsr(xmm10, r12);
2003  TestPinsr(xmm11, r13);
2004  TestPinsr(xmm12, r14);
2005  TestPinsr(xmm13, r15);
2006  TestPinsr(xmm14, r1);
2007  TestPinsr(xmm15, r2);
2008
2009#undef TestPinsr
2010#undef TestPinsrSize
2011#undef TestPinsrXmmAddrImm
2012#undef TestPinsrXmmGPRImm
2013}
2014
2015TEST_F(AssemblerX8664Test, Pextr) {
2016  static constexpr uint8_t Mask32 = 0x03;
2017  static constexpr uint8_t Mask16 = 0x07;
2018  static constexpr uint8_t Mask8 = 0x0F;
2019
2020#define TestPextrGPRXmmImm(GPR, Src, Value1, Imm, Size)                        \
2021  do {                                                                         \
2022    static constexpr char TestString[] =                                       \
2023        "(" #GPR ", " #Src ", " #Value1 ", " #Imm ", " #Size ")";              \
2024    const uint32_t T0 = allocateDqword();                                      \
2025    const Dqword V0 Value1;                                                    \
2026                                                                               \
2027    __ movups(Encoded_Xmm_##Src(), dwordAddress(T0));                          \
2028    __ pextr(IceType_i##Size, Encoded_GPR_##GPR(), Encoded_Xmm_##Src(),        \
2029             Immediate(Imm));                                                  \
2030                                                                               \
2031    AssembledTest test = assemble();                                           \
2032    test.setDqwordTo(T0, V0);                                                  \
2033    test.run();                                                                \
2034                                                                               \
2035    constexpr uint8_t sel = (Imm)&Mask##Size;                                  \
2036    ASSERT_EQ(V0.U##Size[sel], test.GPR()) << TestString;                      \
2037    reset();                                                                   \
2038  } while (0)
2039
2040#define TestPextrSize(GPR, Src, Value1, Imm, Size)                             \
2041  do {                                                                         \
2042    TestPextrGPRXmmImm(GPR, Src, (uint64_t(0xAAAAAAAABBBBBBBBull),             \
2043                                  uint64_t(0xFFFFFFFFDDDDDDDDull)),            \
2044                       Imm, Size);                                             \
2045  } while (0)
2046
2047#define TestPextr(Src, Dst)                                                    \
2048  do {                                                                         \
2049    TestPextrSize(Src, Dst, 0xEE, 0x03, 8);                                    \
2050    TestPextrSize(Src, Dst, 0xFFEE, 0x03, 16);                                 \
2051    TestPextrSize(Src, Dst, 0xC0FFEE, 0x03, 32);                               \
2052  } while (0)
2053
2054  TestPextr(r1, xmm0);
2055  TestPextr(r2, xmm1);
2056  TestPextr(r3, xmm2);
2057  TestPextr(r4, xmm3);
2058  TestPextr(r5, xmm4);
2059  TestPextr(r6, xmm5);
2060  TestPextr(r7, xmm6);
2061  TestPextr(r8, xmm7);
2062  TestPextr(r10, xmm8);
2063  TestPextr(r11, xmm9);
2064  TestPextr(r12, xmm10);
2065  TestPextr(r13, xmm11);
2066  TestPextr(r14, xmm12);
2067  TestPextr(r15, xmm13);
2068  TestPextr(r1, xmm14);
2069  TestPextr(r2, xmm15);
2070
2071#undef TestPextr
2072#undef TestPextrSize
2073#undef TestPextrXmmGPRImm
2074}
2075
2076TEST_F(AssemblerX8664Test, Pcmpeq_Pcmpgt) {
2077#define TestPcmpXmmXmm(Dst, Value0, Src, Value1, Size, Inst, Op)               \
2078  do {                                                                         \
2079    static constexpr char TestString[] =                                       \
2080        "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Size ", " #Op ")";  \
2081    const uint32_t T0 = allocateDqword();                                      \
2082    const Dqword V0 Value0;                                                    \
2083    const uint32_t T1 = allocateDqword();                                      \
2084    const Dqword V1 Value1;                                                    \
2085                                                                               \
2086    __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
2087    __ movups(Encoded_Xmm_##Src(), dwordAddress(T1));                          \
2088    __ Inst(IceType_i##Size, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src());        \
2089                                                                               \
2090    AssembledTest test = assemble();                                           \
2091    test.setDqwordTo(T0, V0);                                                  \
2092    test.setDqwordTo(T1, V1);                                                  \
2093    test.run();                                                                \
2094                                                                               \
2095    Dqword Expected(uint64_t(0), uint64_t(0));                                 \
2096    static constexpr uint8_t ArraySize =                                       \
2097        sizeof(Dqword) / sizeof(uint##Size##_t);                               \
2098    for (uint8_t i = 0; i < ArraySize; ++i) {                                  \
2099      Expected.I##Size[i] = (V1.I##Size[i] Op V0.I##Size[i]) ? -1 : 0;         \
2100    }                                                                          \
2101    ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
2102    reset();                                                                   \
2103  } while (0)
2104
2105#define TestPcmpXmmAddr(Dst, Value0, Value1, Size, Inst, Op)                   \
2106  do {                                                                         \
2107    static constexpr char TestString[] =                                       \
2108        "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Size ", " #Op ")";      \
2109    const uint32_t T0 = allocateDqword();                                      \
2110    const Dqword V0 Value0;                                                    \
2111    const uint32_t T1 = allocateDqword();                                      \
2112    const Dqword V1 Value1;                                                    \
2113                                                                               \
2114    __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
2115    __ Inst(IceType_i##Size, Encoded_Xmm_##Dst(), dwordAddress(T1));           \
2116                                                                               \
2117    AssembledTest test = assemble();                                           \
2118    test.setDqwordTo(T0, V0);                                                  \
2119    test.setDqwordTo(T1, V1);                                                  \
2120    test.run();                                                                \
2121                                                                               \
2122    Dqword Expected(uint64_t(0), uint64_t(0));                                 \
2123    static constexpr uint8_t ArraySize =                                       \
2124        sizeof(Dqword) / sizeof(uint##Size##_t);                               \
2125    for (uint8_t i = 0; i < ArraySize; ++i) {                                  \
2126      Expected.I##Size[i] = (V1.I##Size[i] Op V0.I##Size[i]) ? -1 : 0;         \
2127    }                                                                          \
2128    ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
2129    reset();                                                                   \
2130  } while (0)
2131
2132#define TestPcmpValues(Dst, Value0, Src, Value1, Size)                         \
2133  do {                                                                         \
2134    TestPcmpXmmXmm(Dst, Value0, Src, Value1, Size, pcmpeq, == );               \
2135    TestPcmpXmmAddr(Dst, Value0, Value1, Size, pcmpeq, == );                   \
2136    TestPcmpXmmXmm(Dst, Value0, Src, Value1, Size, pcmpgt, < );                \
2137    TestPcmpXmmAddr(Dst, Value0, Value1, Size, pcmpgt, < );                    \
2138  } while (0)
2139
2140#define TestPcmpSize(Dst, Src, Size)                                           \
2141  do {                                                                         \
2142    TestPcmpValues(Dst, (uint64_t(0x8888888888888888ull),                      \
2143                         uint64_t(0x0000000000000000ull)),                     \
2144                   Src, (uint64_t(0x0000008800008800ull),                      \
2145                         uint64_t(0xFFFFFFFFFFFFFFFFull)),                     \
2146                   Size);                                                      \
2147    TestPcmpValues(Dst, (uint64_t(0x123567ABAB55DE01ull),                      \
2148                         uint64_t(0x12345abcde12345Aull)),                     \
2149                   Src, (uint64_t(0x0000008800008800ull),                      \
2150                         uint64_t(0xAABBCCDD1234321Aull)),                     \
2151                   Size);                                                      \
2152  } while (0)
2153
2154#define TestPcmp(Dst, Src)                                                     \
2155  do {                                                                         \
2156    TestPcmpSize(xmm0, xmm1, 8);                                               \
2157    TestPcmpSize(xmm0, xmm1, 16);                                              \
2158    TestPcmpSize(xmm0, xmm1, 32);                                              \
2159  } while (0)
2160
2161  TestPcmp(xmm0, xmm1);
2162  TestPcmp(xmm1, xmm2);
2163  TestPcmp(xmm2, xmm3);
2164  TestPcmp(xmm3, xmm4);
2165  TestPcmp(xmm4, xmm5);
2166  TestPcmp(xmm5, xmm6);
2167  TestPcmp(xmm6, xmm7);
2168  TestPcmp(xmm7, xmm8);
2169  TestPcmp(xmm8, xmm9);
2170  TestPcmp(xmm9, xmm10);
2171  TestPcmp(xmm10, xmm11);
2172  TestPcmp(xmm11, xmm12);
2173  TestPcmp(xmm12, xmm13);
2174  TestPcmp(xmm13, xmm14);
2175  TestPcmp(xmm14, xmm15);
2176  TestPcmp(xmm15, xmm0);
2177
2178#undef TestPcmp
2179#undef TestPcmpSize
2180#undef TestPcmpValues
2181#undef TestPcmpXmmAddr
2182#undef TestPcmpXmmXmm
2183}
2184
2185TEST_F(AssemblerX8664Test, Roundsd) {
2186#define TestRoundsdXmmXmm(Dst, Src, Mode, Input, RN)                           \
2187  do {                                                                         \
2188    static constexpr char TestString[] =                                       \
2189        "(" #Dst ", " #Src ", " #Mode ", " #Input ", " #RN ")";                \
2190    const uint32_t T0 = allocateDqword();                                      \
2191    const Dqword V0(-3.0, -3.0);                                               \
2192    const uint32_t T1 = allocateDqword();                                      \
2193    const Dqword V1(double(Input), -123.4);                                    \
2194                                                                               \
2195    __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
2196    __ movups(Encoded_Xmm_##Src(), dwordAddress(T1));                          \
2197    __ round(IceType_f64, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src(),            \
2198             Immediate(AssemblerX8664::k##Mode));                              \
2199                                                                               \
2200    AssembledTest test = assemble();                                           \
2201    test.setDqwordTo(T0, V0);                                                  \
2202    test.setDqwordTo(T1, V1);                                                  \
2203    test.run();                                                                \
2204                                                                               \
2205    const Dqword Expected(double(RN), -3.0);                                   \
2206    EXPECT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
2207    reset();                                                                   \
2208  } while (0)
2209
2210#define TestRoundsd(Dst, Src)                                                  \
2211  do {                                                                         \
2212    TestRoundsdXmmXmm(Dst, Src, RoundToNearest, 5.51, 6);                      \
2213    TestRoundsdXmmXmm(Dst, Src, RoundToNearest, 5.49, 5);                      \
2214    TestRoundsdXmmXmm(Dst, Src, RoundDown, 5.51, 5);                           \
2215    TestRoundsdXmmXmm(Dst, Src, RoundUp, 5.49, 6);                             \
2216    TestRoundsdXmmXmm(Dst, Src, RoundToZero, 5.49, 5);                         \
2217    TestRoundsdXmmXmm(Dst, Src, RoundToZero, 5.51, 5);                         \
2218  } while (0)
2219
2220  TestRoundsd(xmm0, xmm1);
2221  TestRoundsd(xmm1, xmm2);
2222  TestRoundsd(xmm2, xmm3);
2223  TestRoundsd(xmm3, xmm4);
2224  TestRoundsd(xmm4, xmm5);
2225  TestRoundsd(xmm5, xmm6);
2226  TestRoundsd(xmm6, xmm7);
2227  TestRoundsd(xmm7, xmm8);
2228  TestRoundsd(xmm8, xmm9);
2229  TestRoundsd(xmm9, xmm10);
2230  TestRoundsd(xmm10, xmm11);
2231  TestRoundsd(xmm11, xmm12);
2232  TestRoundsd(xmm12, xmm13);
2233  TestRoundsd(xmm13, xmm14);
2234  TestRoundsd(xmm14, xmm15);
2235  TestRoundsd(xmm15, xmm0);
2236
2237#undef TestRoundsd
2238#undef TestRoundsdXmmXmm
2239}
2240
2241TEST_F(AssemblerX8664Test, Set1ps) {
2242#define TestImpl(Xmm, Src, Imm)                                                \
2243  do {                                                                         \
2244    __ set1ps(Encoded_Xmm_##Xmm(), Encoded_GPR_##Src(), Immediate(Imm));       \
2245                                                                               \
2246    AssembledTest test = assemble();                                           \
2247    test.run();                                                                \
2248                                                                               \
2249    const Dqword Expected((uint64_t(Imm) << 32) | uint32_t(Imm),               \
2250                          (uint64_t(Imm) << 32) | uint32_t(Imm));              \
2251    ASSERT_EQ(Expected, test.Xmm<Dqword>())                                    \
2252        << "(" #Xmm ", " #Src ", " #Imm ")";                                   \
2253    reset();                                                                   \
2254  } while (0)
2255
2256  TestImpl(xmm0, r1, 1);
2257  TestImpl(xmm1, r2, 12);
2258  TestImpl(xmm2, r3, 22);
2259  TestImpl(xmm3, r4, 54);
2260  TestImpl(xmm4, r5, 80);
2261  TestImpl(xmm5, r6, 32);
2262  TestImpl(xmm6, r7, 55);
2263  TestImpl(xmm7, r8, 44);
2264  TestImpl(xmm8, r10, 10);
2265  TestImpl(xmm9, r11, 155);
2266  TestImpl(xmm10, r12, 165);
2267  TestImpl(xmm11, r13, 170);
2268  TestImpl(xmm12, r14, 200);
2269  TestImpl(xmm13, r15, 124);
2270  TestImpl(xmm14, r1, 101);
2271  TestImpl(xmm15, r2, 166);
2272
2273#undef TestImpl
2274}
2275
2276} // end of anonymous namespace
2277} // end of namespace Test
2278} // end of namespace X8664
2279} // end of namespace Ice
2280