X86InstrAVX512.td revision f58e4144054b85e855c57c86eb058a6bb1907552
1// Bitcasts between 512-bit vector types. Return the original type since
2// no instruction is needed for the conversion
3let Predicates = [HasAVX512] in {
4  def : Pat<(v8f64  (bitconvert (v16f32 VR512:$src))), (v8f64 VR512:$src)>;
5  def : Pat<(v8f64  (bitconvert (v16i32 VR512:$src))), (v8f64 VR512:$src)>;
6  def : Pat<(v8f64  (bitconvert (v8i64 VR512:$src))),  (v8f64 VR512:$src)>;
7  def : Pat<(v16f32 (bitconvert (v16i32 VR512:$src))), (v16f32 VR512:$src)>;
8  def : Pat<(v16f32 (bitconvert (v8i64 VR512:$src))),  (v16f32 VR512:$src)>;
9  def : Pat<(v16f32 (bitconvert (v8f64 VR512:$src))),  (v16f32 VR512:$src)>;
10  def : Pat<(v8i64  (bitconvert (v16f32 VR512:$src))), (v8i64 VR512:$src)>;
11  def : Pat<(v8i64  (bitconvert (v16i32 VR512:$src))), (v8i64 VR512:$src)>;
12  def : Pat<(v8i64  (bitconvert (v8f64 VR512:$src))),  (v8i64 VR512:$src)>;
13  def : Pat<(v16i32 (bitconvert (v16f32 VR512:$src))), (v16i32 VR512:$src)>;
14  def : Pat<(v16i32 (bitconvert (v8i64 VR512:$src))),  (v16i32 VR512:$src)>;
15  def : Pat<(v16i32 (bitconvert (v8f64 VR512:$src))),  (v16i32 VR512:$src)>;
16  def : Pat<(v8f64  (bitconvert (v8i64 VR512:$src))),  (v8f64 VR512:$src)>;
17
18  def : Pat<(v2i64 (bitconvert (v4i32 VR128X:$src))), (v2i64 VR128X:$src)>;
19  def : Pat<(v2i64 (bitconvert (v8i16 VR128X:$src))), (v2i64 VR128X:$src)>;
20  def : Pat<(v2i64 (bitconvert (v16i8 VR128X:$src))), (v2i64 VR128X:$src)>;
21  def : Pat<(v2i64 (bitconvert (v2f64 VR128X:$src))), (v2i64 VR128X:$src)>;
22  def : Pat<(v2i64 (bitconvert (v4f32 VR128X:$src))), (v2i64 VR128X:$src)>;
23  def : Pat<(v4i32 (bitconvert (v2i64 VR128X:$src))), (v4i32 VR128X:$src)>;
24  def : Pat<(v4i32 (bitconvert (v8i16 VR128X:$src))), (v4i32 VR128X:$src)>;
25  def : Pat<(v4i32 (bitconvert (v16i8 VR128X:$src))), (v4i32 VR128X:$src)>;
26  def : Pat<(v4i32 (bitconvert (v2f64 VR128X:$src))), (v4i32 VR128X:$src)>;
27  def : Pat<(v4i32 (bitconvert (v4f32 VR128X:$src))), (v4i32 VR128X:$src)>;
28  def : Pat<(v8i16 (bitconvert (v2i64 VR128X:$src))), (v8i16 VR128X:$src)>;
29  def : Pat<(v8i16 (bitconvert (v4i32 VR128X:$src))), (v8i16 VR128X:$src)>;
30  def : Pat<(v8i16 (bitconvert (v16i8 VR128X:$src))), (v8i16 VR128X:$src)>;
31  def : Pat<(v8i16 (bitconvert (v2f64 VR128X:$src))), (v8i16 VR128X:$src)>;
32  def : Pat<(v8i16 (bitconvert (v4f32 VR128X:$src))), (v8i16 VR128X:$src)>;
33  def : Pat<(v16i8 (bitconvert (v2i64 VR128X:$src))), (v16i8 VR128X:$src)>;
34  def : Pat<(v16i8 (bitconvert (v4i32 VR128X:$src))), (v16i8 VR128X:$src)>;
35  def : Pat<(v16i8 (bitconvert (v8i16 VR128X:$src))), (v16i8 VR128X:$src)>;
36  def : Pat<(v16i8 (bitconvert (v2f64 VR128X:$src))), (v16i8 VR128X:$src)>;
37  def : Pat<(v16i8 (bitconvert (v4f32 VR128X:$src))), (v16i8 VR128X:$src)>;
38  def : Pat<(v4f32 (bitconvert (v2i64 VR128X:$src))), (v4f32 VR128X:$src)>;
39  def : Pat<(v4f32 (bitconvert (v4i32 VR128X:$src))), (v4f32 VR128X:$src)>;
40  def : Pat<(v4f32 (bitconvert (v8i16 VR128X:$src))), (v4f32 VR128X:$src)>;
41  def : Pat<(v4f32 (bitconvert (v16i8 VR128X:$src))), (v4f32 VR128X:$src)>;
42  def : Pat<(v4f32 (bitconvert (v2f64 VR128X:$src))), (v4f32 VR128X:$src)>;
43  def : Pat<(v2f64 (bitconvert (v2i64 VR128X:$src))), (v2f64 VR128X:$src)>;
44  def : Pat<(v2f64 (bitconvert (v4i32 VR128X:$src))), (v2f64 VR128X:$src)>;
45  def : Pat<(v2f64 (bitconvert (v8i16 VR128X:$src))), (v2f64 VR128X:$src)>;
46  def : Pat<(v2f64 (bitconvert (v16i8 VR128X:$src))), (v2f64 VR128X:$src)>;
47  def : Pat<(v2f64 (bitconvert (v4f32 VR128X:$src))), (v2f64 VR128X:$src)>;
48
49// Bitcasts between 256-bit vector types. Return the original type since
50// no instruction is needed for the conversion
51  def : Pat<(v4f64  (bitconvert (v8f32 VR256X:$src))),  (v4f64 VR256X:$src)>;
52  def : Pat<(v4f64  (bitconvert (v8i32 VR256X:$src))),  (v4f64 VR256X:$src)>;
53  def : Pat<(v4f64  (bitconvert (v4i64 VR256X:$src))),  (v4f64 VR256X:$src)>;
54  def : Pat<(v4f64  (bitconvert (v16i16 VR256X:$src))), (v4f64 VR256X:$src)>;
55  def : Pat<(v4f64  (bitconvert (v32i8 VR256X:$src))),  (v4f64 VR256X:$src)>;
56  def : Pat<(v8f32  (bitconvert (v8i32 VR256X:$src))),  (v8f32 VR256X:$src)>;
57  def : Pat<(v8f32  (bitconvert (v4i64 VR256X:$src))),  (v8f32 VR256X:$src)>;
58  def : Pat<(v8f32  (bitconvert (v4f64 VR256X:$src))),  (v8f32 VR256X:$src)>;
59  def : Pat<(v8f32  (bitconvert (v32i8 VR256X:$src))),  (v8f32 VR256X:$src)>;
60  def : Pat<(v8f32  (bitconvert (v16i16 VR256X:$src))), (v8f32 VR256X:$src)>;
61  def : Pat<(v4i64  (bitconvert (v8f32 VR256X:$src))),  (v4i64 VR256X:$src)>;
62  def : Pat<(v4i64  (bitconvert (v8i32 VR256X:$src))),  (v4i64 VR256X:$src)>;
63  def : Pat<(v4i64  (bitconvert (v4f64 VR256X:$src))),  (v4i64 VR256X:$src)>;
64  def : Pat<(v4i64  (bitconvert (v32i8 VR256X:$src))),  (v4i64 VR256X:$src)>;
65  def : Pat<(v4i64  (bitconvert (v16i16 VR256X:$src))), (v4i64 VR256X:$src)>;
66  def : Pat<(v32i8  (bitconvert (v4f64 VR256X:$src))),  (v32i8 VR256X:$src)>;
67  def : Pat<(v32i8  (bitconvert (v4i64 VR256X:$src))),  (v32i8 VR256X:$src)>;
68  def : Pat<(v32i8  (bitconvert (v8f32 VR256X:$src))),  (v32i8 VR256X:$src)>;
69  def : Pat<(v32i8  (bitconvert (v8i32 VR256X:$src))),  (v32i8 VR256X:$src)>;
70  def : Pat<(v32i8  (bitconvert (v16i16 VR256X:$src))), (v32i8 VR256X:$src)>;
71  def : Pat<(v8i32  (bitconvert (v32i8 VR256X:$src))),  (v8i32 VR256X:$src)>;
72  def : Pat<(v8i32  (bitconvert (v16i16 VR256X:$src))), (v8i32 VR256X:$src)>;
73  def : Pat<(v8i32  (bitconvert (v8f32 VR256X:$src))),  (v8i32 VR256X:$src)>;
74  def : Pat<(v8i32  (bitconvert (v4i64 VR256X:$src))),  (v8i32 VR256X:$src)>;
75  def : Pat<(v8i32  (bitconvert (v4f64 VR256X:$src))),  (v8i32 VR256X:$src)>;
76  def : Pat<(v16i16 (bitconvert (v8f32 VR256X:$src))),  (v16i16 VR256X:$src)>;
77  def : Pat<(v16i16 (bitconvert (v8i32 VR256X:$src))),  (v16i16 VR256X:$src)>;
78  def : Pat<(v16i16 (bitconvert (v4i64 VR256X:$src))),  (v16i16 VR256X:$src)>;
79  def : Pat<(v16i16 (bitconvert (v4f64 VR256X:$src))),  (v16i16 VR256X:$src)>;
80  def : Pat<(v16i16 (bitconvert (v32i8 VR256X:$src))),  (v16i16 VR256X:$src)>;
81}
82
83//
84// AVX-512: VPXOR instruction writes zero to its upper part, it's safe build zeros.
85//
86
87let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
88    isPseudo = 1, Predicates = [HasAVX512] in {
89def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
90               [(set VR512:$dst, (v16f32 immAllZerosV))]>;
91}
92
93def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>;
94def : Pat<(v16i32 immAllZerosV), (AVX512_512_SET0)>;
95def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
96def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>;
97
98//===----------------------------------------------------------------------===//
99// AVX-512 - VECTOR INSERT
100//
101// -- 32x8 form --
102let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in {
103def VINSERTF32x4rr : AVX512AIi8<0x18, MRMSrcReg, (outs VR512:$dst),
104          (ins VR512:$src1, VR128X:$src2, i8imm:$src3),
105          "vinsertf32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
106          []>, EVEX_4V, EVEX_V512;
107let mayLoad = 1 in
108def VINSERTF32x4rm : AVX512AIi8<0x18, MRMSrcMem, (outs VR512:$dst),
109          (ins VR512:$src1, f128mem:$src2, i8imm:$src3),
110          "vinsertf32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
111          []>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VT4>;
112}
113
114// -- 64x4 fp form --
115let neverHasSideEffects = 1, ExeDomain = SSEPackedDouble in {
116def VINSERTF64x4rr : AVX512AIi8<0x1a, MRMSrcReg, (outs VR512:$dst),
117          (ins VR512:$src1, VR256X:$src2, i8imm:$src3),
118          "vinsertf64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
119          []>, EVEX_4V, EVEX_V512, VEX_W;
120let mayLoad = 1 in
121def VINSERTF64x4rm : AVX512AIi8<0x1a, MRMSrcMem, (outs VR512:$dst),
122          (ins VR512:$src1, i256mem:$src2, i8imm:$src3),
123          "vinsertf64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
124          []>, EVEX_4V, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
125}
126// -- 32x4 integer form --
127let neverHasSideEffects = 1 in {
128def VINSERTI32x4rr : AVX512AIi8<0x38, MRMSrcReg, (outs VR512:$dst),
129          (ins VR512:$src1, VR128X:$src2, i8imm:$src3),
130          "vinserti32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
131          []>, EVEX_4V, EVEX_V512;
132let mayLoad = 1 in
133def VINSERTI32x4rm : AVX512AIi8<0x38, MRMSrcMem, (outs VR512:$dst),
134          (ins VR512:$src1, i128mem:$src2, i8imm:$src3),
135          "vinserti32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
136          []>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VT4>;
137
138}
139
140let neverHasSideEffects = 1 in {
141// -- 64x4 form --
142def VINSERTI64x4rr : AVX512AIi8<0x3a, MRMSrcReg, (outs VR512:$dst),
143          (ins VR512:$src1, VR256X:$src2, i8imm:$src3),
144          "vinserti64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
145          []>, EVEX_4V, EVEX_V512, VEX_W;
146let mayLoad = 1 in
147def VINSERTI64x4rm : AVX512AIi8<0x3a, MRMSrcMem, (outs VR512:$dst),
148          (ins VR512:$src1, i256mem:$src2, i8imm:$src3),
149          "vinserti64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
150          []>, EVEX_4V, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
151}
152
153def : Pat<(vinsert128_insert:$ins (v16f32 VR512:$src1), (v4f32 VR128X:$src2),
154           (iPTR imm)), (VINSERTF32x4rr VR512:$src1, VR128X:$src2,
155                        (INSERT_get_vinsert128_imm VR512:$ins))>;
156def : Pat<(vinsert128_insert:$ins (v8f64  VR512:$src1), (v2f64 VR128X:$src2),
157           (iPTR imm)), (VINSERTF32x4rr VR512:$src1, VR128X:$src2,
158                        (INSERT_get_vinsert128_imm VR512:$ins))>;
159def : Pat<(vinsert128_insert:$ins (v8i64  VR512:$src1), (v2i64 VR128X:$src2),
160           (iPTR imm)), (VINSERTI32x4rr VR512:$src1, VR128X:$src2,
161                        (INSERT_get_vinsert128_imm VR512:$ins))>;
162def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1), (v4i32 VR128X:$src2),
163           (iPTR imm)), (VINSERTI32x4rr VR512:$src1, VR128X:$src2,
164                        (INSERT_get_vinsert128_imm VR512:$ins))>;
165			
166def : Pat<(vinsert128_insert:$ins (v16f32 VR512:$src1), (loadv4f32 addr:$src2),
167           (iPTR imm)), (VINSERTF32x4rm VR512:$src1, addr:$src2,
168                        (INSERT_get_vinsert128_imm VR512:$ins))>;
169def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1),
170	                (bc_v4i32 (loadv2i64 addr:$src2)),
171           (iPTR imm)), (VINSERTI32x4rm VR512:$src1, addr:$src2,
172                        (INSERT_get_vinsert128_imm VR512:$ins))>;
173def : Pat<(vinsert128_insert:$ins (v8f64  VR512:$src1), (loadv2f64 addr:$src2),
174           (iPTR imm)), (VINSERTF32x4rm VR512:$src1, addr:$src2,
175                        (INSERT_get_vinsert128_imm VR512:$ins))>;
176def : Pat<(vinsert128_insert:$ins (v8i64  VR512:$src1), (loadv2i64 addr:$src2),
177           (iPTR imm)), (VINSERTI32x4rm VR512:$src1, addr:$src2,
178                        (INSERT_get_vinsert128_imm VR512:$ins))>;
179
180def : Pat<(vinsert256_insert:$ins (v16f32  VR512:$src1), (v8f32 VR256X:$src2),
181           (iPTR imm)), (VINSERTF64x4rr VR512:$src1, VR256X:$src2,
182                        (INSERT_get_vinsert256_imm VR512:$ins))>;
183def : Pat<(vinsert256_insert:$ins (v8f64  VR512:$src1), (v4f64 VR256X:$src2),
184           (iPTR imm)), (VINSERTF64x4rr VR512:$src1, VR256X:$src2,
185                        (INSERT_get_vinsert256_imm VR512:$ins))>;
186def : Pat<(vinsert128_insert:$ins (v8i64  VR512:$src1), (v4i64 VR256X:$src2),
187           (iPTR imm)), (VINSERTI64x4rr VR512:$src1, VR256X:$src2,
188                        (INSERT_get_vinsert256_imm VR512:$ins))>;
189def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1), (v8i32 VR256X:$src2),
190           (iPTR imm)), (VINSERTI64x4rr VR512:$src1, VR256X:$src2,
191                        (INSERT_get_vinsert256_imm VR512:$ins))>;
192
193def : Pat<(vinsert256_insert:$ins (v16f32  VR512:$src1), (loadv8f32 addr:$src2),
194           (iPTR imm)), (VINSERTF64x4rm VR512:$src1, addr:$src2,
195                        (INSERT_get_vinsert256_imm VR512:$ins))>;
196def : Pat<(vinsert256_insert:$ins (v8f64  VR512:$src1), (loadv4f64 addr:$src2),
197           (iPTR imm)), (VINSERTF64x4rm VR512:$src1, addr:$src2,
198                        (INSERT_get_vinsert256_imm VR512:$ins))>;
199def : Pat<(vinsert256_insert:$ins (v8i64  VR512:$src1), (loadv4i64 addr:$src2),
200           (iPTR imm)), (VINSERTI64x4rm VR512:$src1, addr:$src2,
201                        (INSERT_get_vinsert256_imm VR512:$ins))>;
202def : Pat<(vinsert256_insert:$ins (v16i32 VR512:$src1),
203	                (bc_v8i32 (loadv4i64 addr:$src2)),
204           (iPTR imm)), (VINSERTI64x4rm VR512:$src1, addr:$src2,
205                        (INSERT_get_vinsert256_imm VR512:$ins))>;
206
207// vinsertps - insert f32 to XMM
208def VINSERTPSzrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
209      (ins VR128X:$src1, VR128X:$src2, u32u8imm:$src3),
210      "vinsertps{z}\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
211      [(set VR128X:$dst, (X86insrtps VR128X:$src1, VR128X:$src2, imm:$src3))]>,
212      EVEX_4V;
213def VINSERTPSzrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
214      (ins VR128X:$src1, f32mem:$src2, u32u8imm:$src3),
215      "vinsertps{z}\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
216      [(set VR128X:$dst, (X86insrtps VR128X:$src1,
217                          (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
218                          imm:$src3))]>, EVEX_4V, EVEX_CD8<32, CD8VT1>;
219
220//===----------------------------------------------------------------------===//
221// AVX-512 VECTOR EXTRACT
222//---
223let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in {
224// -- 32x4 form --
225def VEXTRACTF32x4rr : AVX512AIi8<0x19, MRMDestReg, (outs VR128X:$dst),
226          (ins VR512:$src1, i8imm:$src2),
227          "vextractf32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
228          []>, EVEX, EVEX_V512;
229def VEXTRACTF32x4mr : AVX512AIi8<0x19, MRMDestMem, (outs),
230          (ins f128mem:$dst, VR512:$src1, i8imm:$src2),
231          "vextractf32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
232          []>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VT4>;
233
234// -- 64x4 form --
235def VEXTRACTF64x4rr : AVX512AIi8<0x1b, MRMDestReg, (outs VR256X:$dst),
236          (ins VR512:$src1, i8imm:$src2),
237          "vextractf64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
238          []>, EVEX, EVEX_V512, VEX_W;
239let mayStore = 1 in
240def VEXTRACTF64x4mr : AVX512AIi8<0x1b, MRMDestMem, (outs),
241          (ins f256mem:$dst, VR512:$src1, i8imm:$src2),
242          "vextractf64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
243          []>, EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
244}
245
246let neverHasSideEffects = 1 in {
247// -- 32x4 form --
248def VEXTRACTI32x4rr : AVX512AIi8<0x39, MRMDestReg, (outs VR128X:$dst),
249          (ins VR512:$src1, i8imm:$src2),
250          "vextracti32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
251          []>, EVEX, EVEX_V512;
252def VEXTRACTI32x4mr : AVX512AIi8<0x39, MRMDestMem, (outs),
253          (ins i128mem:$dst, VR512:$src1, i8imm:$src2),
254          "vextracti32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
255          []>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VT4>;
256
257// -- 64x4 form --
258def VEXTRACTI64x4rr : AVX512AIi8<0x3b, MRMDestReg, (outs VR256X:$dst),
259          (ins VR512:$src1, i8imm:$src2),
260          "vextracti64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
261          []>, EVEX, EVEX_V512, VEX_W;
262let mayStore = 1 in
263def VEXTRACTI64x4mr : AVX512AIi8<0x3b, MRMDestMem, (outs),
264          (ins i256mem:$dst, VR512:$src1, i8imm:$src2),
265          "vextracti64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
266          []>, EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
267}
268
269def : Pat<(vextract128_extract:$ext (v16f32 VR512:$src1), (iPTR imm)),
270          (v4f32 (VEXTRACTF32x4rr VR512:$src1,
271                  (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
272
273def : Pat<(vextract128_extract:$ext VR512:$src1, (iPTR imm)),
274          (v4i32 (VEXTRACTF32x4rr VR512:$src1,
275                  (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
276
277def : Pat<(vextract128_extract:$ext (v8f64 VR512:$src1), (iPTR imm)),
278          (v2f64 (VEXTRACTF32x4rr VR512:$src1,
279                  (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
280
281def : Pat<(vextract128_extract:$ext (v8i64 VR512:$src1), (iPTR imm)),
282          (v2i64 (VEXTRACTI32x4rr VR512:$src1,
283                  (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
284
285
286def : Pat<(vextract256_extract:$ext (v16f32 VR512:$src1), (iPTR imm)),
287          (v8f32 (VEXTRACTF64x4rr VR512:$src1,
288                  (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
289
290def : Pat<(vextract256_extract:$ext (v16i32 VR512:$src1), (iPTR imm)),
291          (v8i32 (VEXTRACTI64x4rr VR512:$src1,
292                    (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
293
294def : Pat<(vextract256_extract:$ext (v8f64 VR512:$src1), (iPTR imm)),
295          (v4f64 (VEXTRACTF64x4rr VR512:$src1,
296                  (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
297
298def : Pat<(vextract256_extract:$ext (v8i64 VR512:$src1), (iPTR imm)),
299          (v4i64 (VEXTRACTI64x4rr VR512:$src1,
300                  (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
301
302// A 256-bit subvector extract from the first 512-bit vector position
303// is a subregister copy that needs no instruction.
304def : Pat<(v8i32 (extract_subvector (v16i32 VR512:$src), (iPTR 0))),
305          (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm))>;
306def : Pat<(v8f32 (extract_subvector (v16f32 VR512:$src), (iPTR 0))),
307          (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm))>;
308def : Pat<(v4i64 (extract_subvector (v8i64 VR512:$src), (iPTR 0))),
309          (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm))>;
310def : Pat<(v4f64 (extract_subvector (v8f64 VR512:$src), (iPTR 0))),
311          (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm))>;
312
313// zmm -> xmm
314def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 0))),
315          (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm))>;
316def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 0))),
317          (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm))>;
318def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 0))),
319          (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>;
320def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 0))),
321          (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
322
323
324// A 128-bit subvector insert to the first 512-bit vector position
325// is a subregister copy that needs no instruction.
326def : Pat<(insert_subvector undef, (v2i64 VR128X:$src), (iPTR 0)),
327          (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)),
328          (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
329          sub_ymm)>;
330def : Pat<(insert_subvector undef, (v2f64 VR128X:$src), (iPTR 0)),
331          (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)),
332          (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
333          sub_ymm)>;
334def : Pat<(insert_subvector undef, (v4i32 VR128X:$src), (iPTR 0)),
335          (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)),
336          (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
337          sub_ymm)>;
338def : Pat<(insert_subvector undef, (v4f32 VR128X:$src), (iPTR 0)),
339          (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)),
340          (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
341          sub_ymm)>;
342
343def : Pat<(insert_subvector undef, (v4i64 VR256X:$src), (iPTR 0)),
344          (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
345def : Pat<(insert_subvector undef, (v4f64 VR256X:$src), (iPTR 0)),
346          (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
347def : Pat<(insert_subvector undef, (v8i32 VR256X:$src), (iPTR 0)),
348          (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
349def : Pat<(insert_subvector undef, (v8f32 VR256X:$src), (iPTR 0)),
350          (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
351
352// vextractps - extract 32 bits from XMM
353def VEXTRACTPSzrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
354      (ins VR128X:$src1, u32u8imm:$src2),
355      "vextractps{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
356      [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
357      EVEX;
358
359def VEXTRACTPSzmr : AVX512AIi8<0x17, MRMDestMem, (outs),
360      (ins f32mem:$dst, VR128X:$src1, u32u8imm:$src2),
361      "vextractps{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
362      [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
363                          addr:$dst)]>, EVEX;
364
365//===---------------------------------------------------------------------===//
366// AVX-512 BROADCAST
367//---
368multiclass avx512_fp_broadcast<bits<8> opc, string OpcodeStr, 
369                         RegisterClass DestRC,
370                         RegisterClass SrcRC, X86MemOperand x86memop> {
371  def rr : AVX5128I<opc, MRMSrcReg, (outs DestRC:$dst), (ins SrcRC:$src),
372         !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
373         []>, EVEX;
374  def rm : AVX5128I<opc, MRMSrcMem, (outs DestRC:$dst), (ins x86memop:$src),
375        !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),[]>, EVEX;
376}
377let ExeDomain = SSEPackedSingle in {
378  defm VBROADCASTSSZ  : avx512_fp_broadcast<0x18, "vbroadcastss{z}", VR512, 
379                                       VR128X, f32mem>,
380                                       EVEX_V512, EVEX_CD8<32, CD8VT1>;
381}
382
383let ExeDomain = SSEPackedDouble in {
384  defm VBROADCASTSDZ  : avx512_fp_broadcast<0x19, "vbroadcastsd{z}", VR512,
385                                       VR128X, f64mem>,
386                                       EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
387}
388
389def : Pat<(v16f32 (X86VBroadcast (loadf32 addr:$src))),
390          (VBROADCASTSSZrm addr:$src)>;
391def : Pat<(v8f64 (X86VBroadcast (loadf64 addr:$src))),
392          (VBROADCASTSDZrm addr:$src)>;
393
394def : Pat<(int_x86_avx512_vbroadcast_ss_512 addr:$src),
395          (VBROADCASTSSZrm addr:$src)>;
396def : Pat<(int_x86_avx512_vbroadcast_sd_512 addr:$src),
397          (VBROADCASTSDZrm addr:$src)>;
398
399multiclass avx512_int_broadcast_reg<bits<8> opc, string OpcodeStr,
400                          RegisterClass SrcRC, RegisterClass KRC> {
401  def Zrr : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst), (ins SrcRC:$src),
402                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
403                   []>, EVEX, EVEX_V512;
404  def Zkrr : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst), 
405                   (ins KRC:$mask, SrcRC:$src),
406                   !strconcat(OpcodeStr, 
407                        "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
408                   []>, EVEX, EVEX_V512, EVEX_KZ;
409}
410
411defm VPBROADCASTDr  : avx512_int_broadcast_reg<0x7C, "vpbroadcastd", GR32, VK16WM>;
412defm VPBROADCASTQr  : avx512_int_broadcast_reg<0x7C, "vpbroadcastq", GR64, VK8WM>,
413                                            VEX_W;
414                                            
415def : Pat <(v16i32 (X86vzext VK16WM:$mask)),
416           (VPBROADCASTDrZkrr VK16WM:$mask, (i32 (MOV32ri 0x1)))>;
417
418def : Pat <(v8i64 (X86vzext VK8WM:$mask)),
419           (VPBROADCASTQrZkrr VK8WM:$mask, (i64 (MOV64ri 0x1)))>;
420
421def : Pat<(v16i32 (X86VBroadcast (i32 GR32:$src))),
422        (VPBROADCASTDrZrr GR32:$src)>;
423def : Pat<(v8i64 (X86VBroadcast (i64 GR64:$src))),
424        (VPBROADCASTQrZrr GR64:$src)>;
425def : Pat<(v8i64 (X86VBroadcastm VK8WM:$mask, (i64 GR64:$src))),
426        (VPBROADCASTQrZkrr VK8WM:$mask, GR64:$src)>;
427
428def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_i32_512 (i32 GR32:$src))),
429        (VPBROADCASTDrZrr GR32:$src)>;
430def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_i64_512 (i64 GR64:$src))),
431        (VPBROADCASTQrZrr GR64:$src)>;
432
433multiclass avx512_int_broadcast_rm<bits<8> opc, string OpcodeStr,
434                          X86MemOperand x86memop, PatFrag ld_frag,
435                          RegisterClass DstRC, ValueType OpVT, ValueType SrcVT,
436                          RegisterClass KRC> {
437  def rr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst), (ins VR128X:$src),
438                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
439                  [(set DstRC:$dst,
440                    (OpVT (X86VBroadcast (SrcVT VR128X:$src))))]>, EVEX;
441  def krr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst), (ins KRC:$mask,
442                                                         VR128X:$src),
443                    !strconcat(OpcodeStr, 
444                    "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
445                    [(set DstRC:$dst,
446                      (OpVT (X86VBroadcastm KRC:$mask, (SrcVT VR128X:$src))))]>,
447                    EVEX, EVEX_KZ;
448  let mayLoad = 1 in {
449  def rm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
450                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
451                  [(set DstRC:$dst, 
452                    (OpVT (X86VBroadcast (ld_frag addr:$src))))]>, EVEX;
453  def krm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins KRC:$mask,
454                                                         x86memop:$src),
455                  !strconcat(OpcodeStr, 
456                      "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
457                  [(set DstRC:$dst, (OpVT (X86VBroadcastm KRC:$mask, 
458                                     (ld_frag addr:$src))))]>, EVEX, EVEX_KZ;
459  }
460}
461
462defm VPBROADCASTDZ  : avx512_int_broadcast_rm<0x58, "vpbroadcastd", i32mem,
463                      loadi32, VR512, v16i32, v4i32, VK16WM>,
464                      EVEX_V512, EVEX_CD8<32, CD8VT1>;
465defm VPBROADCASTQZ  : avx512_int_broadcast_rm<0x59, "vpbroadcastq", i64mem,
466                      loadi64, VR512, v8i64, v2i64, VK8WM>,  EVEX_V512, VEX_W,
467                      EVEX_CD8<64, CD8VT1>;
468
469def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_512 (v4i32 VR128X:$src))),
470          (VPBROADCASTDZrr VR128X:$src)>;
471def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_512 (v2i64 VR128X:$src))),
472          (VPBROADCASTQZrr VR128X:$src)>;
473
474def : Pat<(v16f32 (X86VBroadcast (v4f32 VR128X:$src))),
475          (VBROADCASTSSZrr VR128X:$src)>;
476def : Pat<(v8f64 (X86VBroadcast (v2f64 VR128X:$src))),
477          (VBROADCASTSDZrr VR128X:$src)>;
478
479def : Pat<(v16f32 (int_x86_avx512_vbroadcast_ss_ps_512 (v4f32 VR128X:$src))),
480          (VBROADCASTSSZrr VR128X:$src)>;
481def : Pat<(v8f64 (int_x86_avx512_vbroadcast_sd_pd_512 (v2f64 VR128X:$src))),
482          (VBROADCASTSDZrr VR128X:$src)>;
483    
484// Provide fallback in case the load node that is used in the patterns above
485// is used by additional users, which prevents the pattern selection.
486def : Pat<(v16f32 (X86VBroadcast FR32X:$src)),
487          (VBROADCASTSSZrr (COPY_TO_REGCLASS FR32X:$src, VR128X))>;
488def : Pat<(v8f64 (X86VBroadcast FR64X:$src)),
489          (VBROADCASTSDZrr (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
490
491
492let Predicates = [HasAVX512] in {
493def : Pat<(v8i32 (X86VBroadcastm (v8i1 VK8WM:$mask), (loadi32 addr:$src))),
494           (EXTRACT_SUBREG 
495              (v16i32 (VPBROADCASTDZkrm (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
496                       addr:$src)), sub_ymm)>;
497}
498//===----------------------------------------------------------------------===//
499// AVX-512 BROADCAST MASK TO VECTOR REGISTER
500//---
501
502multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
503                       RegisterClass DstRC, RegisterClass KRC,
504                       ValueType OpVT, ValueType SrcVT> {
505def rr : AVX512XS8I<opc, MRMDestReg, (outs DstRC:$dst), (ins KRC:$src),
506                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
507                  []>, EVEX;
508}
509
510defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", VR512,
511                                             VK16, v16i32, v16i1>, EVEX_V512;
512defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", VR512,
513                                            VK8, v8i64, v8i1>, EVEX_V512, VEX_W;
514
515//===----------------------------------------------------------------------===//
516// AVX-512 - VPERM
517//
518// -- immediate form --
519multiclass avx512_perm_imm<bits<8> opc, string OpcodeStr, RegisterClass RC,
520                         SDNode OpNode, PatFrag mem_frag, 
521                         X86MemOperand x86memop, ValueType OpVT> {
522  def ri : AVX512AIi8<opc, MRMSrcReg, (outs RC:$dst),
523                     (ins RC:$src1, i8imm:$src2),
524                     !strconcat(OpcodeStr,
525                         "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
526                     [(set RC:$dst,
527                       (OpVT (OpNode RC:$src1, (i8 imm:$src2))))]>,
528                     EVEX;
529  def mi : AVX512AIi8<opc, MRMSrcMem, (outs RC:$dst),
530                     (ins x86memop:$src1, i8imm:$src2),
531                     !strconcat(OpcodeStr,
532                         "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
533                     [(set RC:$dst,
534                       (OpVT (OpNode (mem_frag addr:$src1),
535                              (i8 imm:$src2))))]>, EVEX;
536}
537
538defm VPERMQZ  : avx512_perm_imm<0x00, "vpermq", VR512, X86VPermi, memopv8i64,
539                        i512mem, v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
540let ExeDomain = SSEPackedDouble in 
541defm VPERMPDZ  : avx512_perm_imm<0x01, "vpermpd", VR512, X86VPermi, memopv8f64, 
542                        f512mem, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
543
544// -- VPERM - register form --
545multiclass avx512_perm<bits<8> opc, string OpcodeStr, RegisterClass RC, 
546                     PatFrag mem_frag, X86MemOperand x86memop, ValueType OpVT> {
547
548  def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
549                   (ins RC:$src1, RC:$src2),
550                   !strconcat(OpcodeStr,
551                       "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
552                   [(set RC:$dst,
553                     (OpVT (X86VPermv RC:$src1, RC:$src2)))]>, EVEX_4V;
554
555  def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
556                   (ins RC:$src1, x86memop:$src2),
557                   !strconcat(OpcodeStr,
558                       "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
559                   [(set RC:$dst,
560                     (OpVT (X86VPermv RC:$src1, (mem_frag addr:$src2))))]>,
561                     EVEX_4V;
562}
563
564defm VPERMDZ   : avx512_perm<0x36, "vpermd",  VR512,  memopv16i32, i512mem,
565                           v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
566defm VPERMQZ   : avx512_perm<0x36, "vpermq",  VR512,  memopv8i64,  i512mem, 
567                           v8i64>,  EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
568let ExeDomain = SSEPackedSingle in
569defm VPERMPSZ  : avx512_perm<0x16, "vpermps", VR512,  memopv16f32, f512mem,
570                           v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
571let ExeDomain = SSEPackedDouble in
572defm VPERMPDZ  : avx512_perm<0x16, "vpermpd", VR512,  memopv8f64, f512mem, 
573                           v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
574
575// -- VPERM2I - 3 source operands form --
576multiclass avx512_perm_3src<bits<8> opc, string OpcodeStr, RegisterClass RC,
577                          PatFrag mem_frag, X86MemOperand x86memop,
578                          ValueType OpVT> {
579let Constraints = "$src1 = $dst" in {
580  def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
581                   (ins RC:$src1, RC:$src2, RC:$src3),
582                   !strconcat(OpcodeStr,
583                       "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
584                   [(set RC:$dst,
585                     (OpVT (X86VPermv3 RC:$src1, RC:$src2, RC:$src3)))]>,
586                    EVEX_4V;
587
588  def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
589                   (ins RC:$src1, RC:$src2, x86memop:$src3),
590                   !strconcat(OpcodeStr,
591                    "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
592                   [(set RC:$dst,
593                     (OpVT (X86VPermv3 RC:$src1, RC:$src2, 
594                      (mem_frag addr:$src3))))]>, EVEX_4V;
595  }
596}
597defm VPERMI2D  : avx512_perm_3src<0x76, "vpermi2d",  VR512, memopv16i32, i512mem, 
598                               v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
599defm VPERMI2Q  : avx512_perm_3src<0x76, "vpermi2q",  VR512, memopv8i64, i512mem, 
600                               v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
601defm VPERMI2PS : avx512_perm_3src<0x77, "vpermi2ps",  VR512, memopv16f32, i512mem, 
602                               v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
603defm VPERMI2PD : avx512_perm_3src<0x77, "vpermi2pd",  VR512, memopv8f64, i512mem, 
604                               v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
605
606//===----------------------------------------------------------------------===//
607// AVX-512 - BLEND using mask
608//
609multiclass avx512_blendmask<bits<8> opc, string OpcodeStr, Intrinsic Int, 
610                          RegisterClass KRC, RegisterClass RC,
611                          X86MemOperand x86memop, PatFrag mem_frag,
612                          SDNode OpNode, ValueType vt> {
613  def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
614               (ins KRC:$mask, RC:$src1, RC:$src2),
615               !strconcat(OpcodeStr,
616                "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
617               [(set RC:$dst, (OpNode KRC:$mask, (vt RC:$src2), 
618                 (vt RC:$src1)))]>, EVEX_4V, EVEX_K;
619  def rr_Int : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
620               (ins KRC:$mask, RC:$src1, RC:$src2),
621               !strconcat(OpcodeStr,
622                "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
623               [(set RC:$dst, (Int KRC:$mask, (vt RC:$src2),
624                 (vt RC:$src1)))]>, EVEX_4V, EVEX_K;
625
626  let mayLoad = 1 in {
627    def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
628                 (ins KRC:$mask, RC:$src1, x86memop:$src2),
629                 !strconcat(OpcodeStr,
630                  "\t{$src2, $src1, $mask, $dst|$dst, $mask, $src1, $src2}"),
631                 []>, 
632                 EVEX_4V, EVEX_K;
633
634    def rm_Int : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
635                 (ins KRC:$mask, RC:$src1, x86memop:$src2),
636                 !strconcat(OpcodeStr,
637                  "\t{$src2, $src1, $mask, $dst|$dst, $mask, $src1, $src2}"),
638                 [(set RC:$dst, (Int KRC:$mask, (vt RC:$src1),
639                   (mem_frag addr:$src2)))]>,
640                 EVEX_4V, EVEX_K;
641  }
642}
643
644let ExeDomain = SSEPackedSingle in
645defm VBLENDMPSZ : avx512_blendmask<0x65, "vblendmps", 
646                              int_x86_avx512_mskblend_ps_512,
647                              VK16WM, VR512, f512mem,
648                              memopv16f32, vselect, v16f32>, 
649                              EVEX_CD8<32, CD8VF>, EVEX_V512;
650let ExeDomain = SSEPackedDouble in
651defm VBLENDMPDZ : avx512_blendmask<0x65, "vblendmpd", 
652                              int_x86_avx512_mskblend_pd_512,
653                              VK8WM, VR512, f512mem,
654                              memopv8f64, vselect, v8f64>, 
655                              VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V512;
656
657defm VPBLENDMDZ : avx512_blendmask<0x64, "vpblendmd", 
658                              int_x86_avx512_mskblend_d_512,
659                              VK16WM, VR512, f512mem, 
660                              memopv16i32, vselect, v16i32>, 
661                              EVEX_CD8<32, CD8VF>, EVEX_V512;
662
663defm VPBLENDMQZ : avx512_blendmask<0x64, "vpblendmq", 
664                              int_x86_avx512_mskblend_q_512, 
665                              VK8WM, VR512, f512mem, 
666                              memopv8i64, vselect, v8i64>, 
667                              VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V512;
668
669let Predicates = [HasAVX512] in {
670def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1),
671                            (v8f32 VR256X:$src2))),
672            (EXTRACT_SUBREG 
673              (v16f32 (VBLENDMPSZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM), 
674            (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
675            (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
676
677def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1),
678                            (v8i32 VR256X:$src2))),
679            (EXTRACT_SUBREG 
680                (v16i32 (VPBLENDMDZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM), 
681            (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
682            (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
683}
684
685multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, RegisterClass KRC, 
686              RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag, 
687              SDNode OpNode, ValueType vt> {
688  def rr : AVX512BI<opc, MRMSrcReg,
689             (outs KRC:$dst), (ins RC:$src1, RC:$src2), 
690             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
691             [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2)))], 
692             IIC_SSE_ALU_F32P_RR>, EVEX_4V;
693  def rm : AVX512BI<opc, MRMSrcMem,
694             (outs KRC:$dst), (ins RC:$src1, x86memop:$src2), 
695             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
696             [(set KRC:$dst, (OpNode (vt RC:$src1), (memop_frag addr:$src2)))],
697             IIC_SSE_ALU_F32P_RM>, EVEX_4V;
698}
699
700defm VPCMPEQDZ : avx512_icmp_packed<0x76, "vpcmpeqd", VK16, VR512, i512mem, 
701                           memopv16i32, X86pcmpeqm, v16i32>, EVEX_V512;
702defm VPCMPEQQZ : avx512_icmp_packed<0x29, "vpcmpeqq", VK8, VR512, i512mem, 
703                           memopv8i64, X86pcmpeqm, v8i64>, T8, EVEX_V512, VEX_W;
704
705defm VPCMPGTDZ : avx512_icmp_packed<0x66, "vpcmpgtd", VK16, VR512, i512mem, 
706                           memopv16i32, X86pcmpgtm, v16i32>, EVEX_V512;
707defm VPCMPGTQZ : avx512_icmp_packed<0x37, "vpcmpgtq", VK8, VR512, i512mem, 
708                           memopv8i64, X86pcmpgtm, v8i64>, T8, EVEX_V512, VEX_W;
709
710def : Pat<(v8i1 (X86pcmpgtm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
711            (COPY_TO_REGCLASS (VPCMPGTDZrr 
712            (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
713            (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>;
714
715def : Pat<(v8i1 (X86pcmpeqm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
716            (COPY_TO_REGCLASS (VPCMPEQDZrr 
717            (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
718            (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>;
719
720multiclass avx512_icmp_cc<bits<8> opc, RegisterClass KRC,
721              RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag, 
722              SDNode OpNode, ValueType vt, Operand CC, string asm,
723              string asm_alt> {
724  def rri : AVX512AIi8<opc, MRMSrcReg,
725             (outs KRC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
726             [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2), imm:$cc))], 
727             IIC_SSE_ALU_F32P_RR>, EVEX_4V;
728  def rmi : AVX512AIi8<opc, MRMSrcMem,
729             (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
730             [(set KRC:$dst, (OpNode (vt RC:$src1), (memop_frag addr:$src2),
731                              imm:$cc))], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
732  // Accept explicit immediate argument form instead of comparison code.
733  let neverHasSideEffects = 1 in {
734    def rri_alt : AVX512AIi8<opc, MRMSrcReg,
735               (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
736               asm_alt, [], IIC_SSE_ALU_F32P_RR>, EVEX_4V;
737    def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
738               (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
739               asm_alt, [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
740  }
741}
742
743defm VPCMPDZ : avx512_icmp_cc<0x1F, VK16, VR512, i512mem, memopv16i32,
744                              X86cmpm, v16i32, AVXCC,
745              "vpcmp${cc}d\t{$src2, $src1, $dst|$dst, $src1, $src2}",
746              "vpcmpd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
747              EVEX_V512, EVEX_CD8<32, CD8VF>;
748defm VPCMPUDZ : avx512_icmp_cc<0x1E, VK16, VR512, i512mem, memopv16i32,
749                               X86cmpmu, v16i32, AVXCC,
750              "vpcmp${cc}ud\t{$src2, $src1, $dst|$dst, $src1, $src2}",
751              "vpcmpud\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
752              EVEX_V512, EVEX_CD8<32, CD8VF>;
753
754defm VPCMPQZ : avx512_icmp_cc<0x1F, VK8, VR512, i512mem, memopv8i64,
755                              X86cmpm, v8i64, AVXCC,
756              "vpcmp${cc}q\t{$src2, $src1, $dst|$dst, $src1, $src2}",
757              "vpcmpq\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
758              VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
759defm VPCMPUQZ : avx512_icmp_cc<0x1E, VK8, VR512, i512mem, memopv8i64,
760                               X86cmpmu, v8i64, AVXCC,
761              "vpcmp${cc}uq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
762              "vpcmpuq\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
763              VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
764
765// avx512_cmp_packed - sse 1 & 2 compare packed instructions
766multiclass avx512_cmp_packed<RegisterClass KRC, RegisterClass RC,
767                           X86MemOperand x86memop, Operand CC,
768                           SDNode OpNode, ValueType vt, string asm,
769                           string asm_alt, Domain d> {
770  def rri : AVX512PIi8<0xC2, MRMSrcReg,
771             (outs KRC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
772             [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2), imm:$cc))], d>;
773  def rmi : AVX512PIi8<0xC2, MRMSrcMem,
774             (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
775             [(set KRC:$dst,
776              (OpNode (vt RC:$src1), (memop addr:$src2), imm:$cc))], d>;
777
778  // Accept explicit immediate argument form instead of comparison code.
779  let neverHasSideEffects = 1 in {
780    def rri_alt : AVX512PIi8<0xC2, MRMSrcReg,
781               (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
782               asm_alt, [], d>;
783    def rmi_alt : AVX512PIi8<0xC2, MRMSrcMem,
784               (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
785               asm_alt, [], d>;
786  }
787}
788
789defm VCMPPSZ : avx512_cmp_packed<VK16, VR512, f512mem, AVXCC, X86cmpm, v16f32,
790               "vcmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
791               "vcmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
792               SSEPackedSingle>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VF>;
793defm VCMPPDZ : avx512_cmp_packed<VK8, VR512, f512mem, AVXCC, X86cmpm, v8f64,
794               "vcmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
795               "vcmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
796               SSEPackedDouble>, OpSize, EVEX_4V, VEX_W, EVEX_V512,
797               EVEX_CD8<64, CD8VF>;
798
799def : Pat<(v8i1 (X86cmpm (v8f32 VR256X:$src1), (v8f32 VR256X:$src2), imm:$cc)),
800          (COPY_TO_REGCLASS (VCMPPSZrri
801            (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
802            (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
803            imm:$cc), VK8)>;
804def : Pat<(v8i1 (X86cmpm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
805          (COPY_TO_REGCLASS (VPCMPDZrri
806            (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
807            (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
808            imm:$cc), VK8)>;
809def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
810          (COPY_TO_REGCLASS (VPCMPUDZrri
811            (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
812            (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
813            imm:$cc), VK8)>;
814               
815// Mask register copy, including
816// - copy between mask registers
817// - load/store mask registers
818// - copy from GPR to mask register and vice versa
819//
820multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
821                         string OpcodeStr, RegisterClass KRC,
822                         ValueType vt, X86MemOperand x86memop> {
823  let neverHasSideEffects = 1 in {
824    def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
825               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
826    let mayLoad = 1 in
827    def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
828               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
829               [(set KRC:$dst, (vt (load addr:$src)))]>;
830    let mayStore = 1 in
831    def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
832               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
833  }
834}
835
836multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
837                             string OpcodeStr,
838                             RegisterClass KRC, RegisterClass GRC> {
839  let neverHasSideEffects = 1 in {
840    def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
841               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
842    def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
843               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
844  }
845}
846
847let Predicates = [HasAVX512] in {
848  defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
849               VEX, TB;
850  defm KMOVW : avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
851               VEX, TB;
852}
853
854let Predicates = [HasAVX512] in {
855  // GR16 from/to 16-bit mask
856  def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
857            (KMOVWkr (SUBREG_TO_REG (i32 0), GR16:$src, sub_16bit))>;
858  def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
859            (EXTRACT_SUBREG (KMOVWrk VK16:$src), sub_16bit)>;
860
861  // Store kreg in memory
862  def : Pat<(store (v16i1 VK16:$src), addr:$dst),
863            (KMOVWmk addr:$dst, VK16:$src)>;
864
865  def : Pat<(store (v8i1 VK8:$src), addr:$dst),
866            (KMOVWmk addr:$dst, (v16i1 (COPY_TO_REGCLASS VK8:$src, VK16)))>;
867}
868// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
869let Predicates = [HasAVX512] in {
870  // GR from/to 8-bit mask without native support
871  def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
872            (COPY_TO_REGCLASS
873              (KMOVWkr (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit)),
874              VK8)>;
875  def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
876            (EXTRACT_SUBREG
877              (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16)),
878              sub_8bit)>;
879}
880
881// Mask unary operation
882// - KNOT
883multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
884                         RegisterClass KRC, SDPatternOperator OpNode> {
885  let Predicates = [HasAVX512] in
886    def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
887               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
888               [(set KRC:$dst, (OpNode KRC:$src))]>;
889}
890
891multiclass avx512_mask_unop_w<bits<8> opc, string OpcodeStr,
892                               SDPatternOperator OpNode> {
893  defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode>,
894                          VEX, TB;
895}
896
897defm KNOT : avx512_mask_unop_w<0x44, "knot", not>;
898
899def : Pat<(xor VK16:$src1, (v16i1 immAllOnesV)), (KNOTWrr VK16:$src1)>;
900def : Pat<(xor VK8:$src1,  (v8i1 immAllOnesV)),
901          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src1, VK16)), VK8)>;
902
903// With AVX-512, 8-bit mask is promoted to 16-bit mask.
904def : Pat<(not VK8:$src),
905          (COPY_TO_REGCLASS
906            (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
907
908// Mask binary operation
909// - KADD, KAND, KANDN, KOR, KXNOR, KXOR
910multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
911                           RegisterClass KRC, SDPatternOperator OpNode> {
912  let Predicates = [HasAVX512] in
913    def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
914               !strconcat(OpcodeStr,
915                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
916               [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>;
917}
918
919multiclass avx512_mask_binop_w<bits<8> opc, string OpcodeStr,
920                             SDPatternOperator OpNode> {
921  defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode>,
922                           VEX_4V, VEX_L, TB;
923}
924
925def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
926def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
927
928let isCommutable = 1 in {
929  defm KADD  : avx512_mask_binop_w<0x4a, "kadd",  add>;
930  defm KAND  : avx512_mask_binop_w<0x41, "kand",  and>;
931  let isCommutable = 0 in
932  defm KANDN : avx512_mask_binop_w<0x42, "kandn", andn>;
933  defm KOR   : avx512_mask_binop_w<0x45, "kor",   or>;
934  defm KXNOR : avx512_mask_binop_w<0x46, "kxnor", xnor>;
935  defm KXOR  : avx512_mask_binop_w<0x47, "kxor",  xor>;
936}
937
938multiclass avx512_mask_binop_int<string IntName, string InstName> {
939  let Predicates = [HasAVX512] in
940    def : Pat<(!cast<Intrinsic>("int_x86_"##IntName##"_v16i1")
941                VK16:$src1, VK16:$src2),
942              (!cast<Instruction>(InstName##"Wrr") VK16:$src1, VK16:$src2)>;
943}
944
945defm : avx512_mask_binop_int<"kadd",  "KADD">;
946defm : avx512_mask_binop_int<"kand",  "KAND">;
947defm : avx512_mask_binop_int<"kandn", "KANDN">;
948defm : avx512_mask_binop_int<"kor",   "KOR">;
949defm : avx512_mask_binop_int<"kxnor", "KXNOR">;
950defm : avx512_mask_binop_int<"kxor",  "KXOR">;
951// With AVX-512, 8-bit mask is promoted to 16-bit mask.
952multiclass avx512_binop_pat<SDPatternOperator OpNode, Instruction Inst> {
953  let Predicates = [HasAVX512] in
954    def : Pat<(OpNode VK8:$src1, VK8:$src2),
955              (COPY_TO_REGCLASS
956                (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
957                      (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
958}
959
960defm : avx512_binop_pat<and,  KANDWrr>;
961defm : avx512_binop_pat<andn, KANDNWrr>;
962defm : avx512_binop_pat<or,   KORWrr>;
963defm : avx512_binop_pat<xnor, KXNORWrr>;
964defm : avx512_binop_pat<xor,  KXORWrr>;
965
966// Mask unpacking
967multiclass avx512_mask_unpck<bits<8> opc, string OpcodeStr,
968                           RegisterClass KRC1, RegisterClass KRC2> {
969  let Predicates = [HasAVX512] in
970    def rr : I<opc, MRMSrcReg, (outs KRC1:$dst), (ins KRC2:$src1, KRC2:$src2),
971               !strconcat(OpcodeStr,
972                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
973}
974
975multiclass avx512_mask_unpck_bw<bits<8> opc, string OpcodeStr> {
976  defm BW : avx512_mask_unpck<opc, !strconcat(OpcodeStr, "bw"), VK16, VK8>,
977                            VEX_4V, VEX_L, OpSize, TB;
978}
979
980defm KUNPCK : avx512_mask_unpck_bw<0x4b, "kunpck">;
981
982multiclass avx512_mask_unpck_int<string IntName, string InstName> {
983  let Predicates = [HasAVX512] in
984    def : Pat<(!cast<Intrinsic>("int_x86_"##IntName##"_v16i1")
985                VK8:$src1, VK8:$src2),
986              (!cast<Instruction>(InstName##"BWrr") VK8:$src1, VK8:$src2)>;
987}
988
989defm : avx512_mask_unpck_int<"kunpck", "KUNPCK">;
990// Mask bit testing
991multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
992                            SDNode OpNode> {
993  let Predicates = [HasAVX512], Defs = [EFLAGS] in
994    def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
995               !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
996               [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>;
997}
998
999multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode> {
1000  defm W : avx512_mask_testop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode>,
1001                            VEX, TB;
1002}
1003
1004defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest>;
1005defm KTEST   : avx512_mask_testop_w<0x99, "ktest", X86ktest>;
1006
1007// Mask shift
1008multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
1009                             SDNode OpNode> {
1010  let Predicates = [HasAVX512] in
1011    def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, i8imm:$imm),
1012                 !strconcat(OpcodeStr,
1013                            "\t{$imm, $src, $dst|$dst, $src, $imm}"),
1014                            [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))]>;
1015}
1016
1017multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
1018                               SDNode OpNode> {
1019  defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode>,
1020                             VEX, OpSize, TA, VEX_W;
1021}
1022
1023defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", shl>;
1024defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", srl>;
1025
1026// Mask setting all 0s or 1s
1027multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
1028  let Predicates = [HasAVX512] in
1029    let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1 in
1030      def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
1031                     [(set KRC:$dst, (VT Val))]>;
1032}
1033
1034multiclass avx512_mask_setop_w<PatFrag Val> {
1035  defm B : avx512_mask_setop<VK8,  v8i1, Val>;
1036  defm W : avx512_mask_setop<VK16, v16i1, Val>;
1037}
1038
1039defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
1040defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
1041
1042// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
1043let Predicates = [HasAVX512] in {
1044  def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
1045  def : Pat<(v8i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK8)>;
1046}
1047def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 0))),
1048          (v8i1 (COPY_TO_REGCLASS VK16:$src, VK8))>;
1049
1050def : Pat<(v16i1 (insert_subvector undef, (v8i1 VK8:$src), (iPTR 0))),
1051          (v16i1 (COPY_TO_REGCLASS VK8:$src, VK16))>;
1052
1053def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 8))),
1054          (v8i1 (COPY_TO_REGCLASS (KSHIFTRWri VK16:$src, (i8 8)), VK8))>;
1055
1056//===----------------------------------------------------------------------===//
1057// AVX-512 - Aligned and unaligned load and store
1058//
1059
1060multiclass avx512_mov_packed<bits<8> opc, RegisterClass RC, RegisterClass KRC,
1061                            X86MemOperand x86memop, PatFrag ld_frag, 
1062                            string asm, Domain d> {
1063let neverHasSideEffects = 1 in
1064  def rr : AVX512PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
1065              !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>,
1066              EVEX;
1067let canFoldAsLoad = 1 in
1068  def rm : AVX512PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
1069              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
1070               [(set RC:$dst, (ld_frag addr:$src))], d>, EVEX;
1071let Constraints = "$src1 = $dst" in {
1072  def rrk : AVX512PI<opc, MRMSrcReg, (outs RC:$dst), 
1073                                     (ins RC:$src1, KRC:$mask, RC:$src2),
1074              !strconcat(asm, 
1075              "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), [], d>,
1076              EVEX, EVEX_K;
1077  def rmk : AVX512PI<opc, MRMSrcMem, (outs RC:$dst),
1078                                (ins RC:$src1, KRC:$mask, x86memop:$src2),
1079              !strconcat(asm, 
1080              "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
1081               [], d>, EVEX, EVEX_K;
1082}
1083}
1084
1085defm VMOVAPSZ : avx512_mov_packed<0x28, VR512, VK16WM, f512mem, alignedloadv16f32,
1086                              "vmovaps", SSEPackedSingle>,
1087                               EVEX_V512, EVEX_CD8<32, CD8VF>;
1088defm VMOVAPDZ : avx512_mov_packed<0x28, VR512, VK8WM, f512mem, alignedloadv8f64,
1089                              "vmovapd", SSEPackedDouble>,
1090                              OpSize, EVEX_V512, VEX_W,
1091                              EVEX_CD8<64, CD8VF>;
1092defm VMOVUPSZ : avx512_mov_packed<0x10, VR512, VK16WM, f512mem, loadv16f32,
1093                              "vmovups", SSEPackedSingle>,
1094                              EVEX_V512, EVEX_CD8<32, CD8VF>;
1095defm VMOVUPDZ : avx512_mov_packed<0x10, VR512, VK8WM, f512mem, loadv8f64,
1096                              "vmovupd", SSEPackedDouble>,
1097                               OpSize, EVEX_V512, VEX_W,
1098                               EVEX_CD8<64, CD8VF>;
1099def VMOVAPSZmr : AVX512PI<0x29, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$src),
1100                    "vmovaps\t{$src, $dst|$dst, $src}",
1101                    [(alignedstore512 (v16f32 VR512:$src), addr:$dst)],
1102                    SSEPackedSingle>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
1103def VMOVAPDZmr : AVX512PI<0x29, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$src),
1104                    "vmovapd\t{$src, $dst|$dst, $src}",
1105                    [(alignedstore512 (v8f64 VR512:$src), addr:$dst)],
1106                    SSEPackedDouble>, EVEX, EVEX_V512,
1107                    OpSize, VEX_W, EVEX_CD8<64, CD8VF>;
1108def VMOVUPSZmr : AVX512PI<0x11, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$src),
1109                    "vmovups\t{$src, $dst|$dst, $src}",
1110                    [(store (v16f32 VR512:$src), addr:$dst)],
1111                    SSEPackedSingle>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
1112def VMOVUPDZmr : AVX512PI<0x11, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$src),
1113                    "vmovupd\t{$src, $dst|$dst, $src}",
1114                    [(store (v8f64 VR512:$src), addr:$dst)],
1115                    SSEPackedDouble>, EVEX, EVEX_V512,
1116                    OpSize, VEX_W, EVEX_CD8<64, CD8VF>;
1117
1118let neverHasSideEffects = 1 in {
1119  def VMOVDQA32rr  : AVX512BI<0x6F, MRMSrcReg, (outs VR512:$dst),
1120                             (ins VR512:$src),
1121                             "vmovdqa32\t{$src, $dst|$dst, $src}", []>,
1122                             EVEX, EVEX_V512;
1123  def VMOVDQA64rr  : AVX512BI<0x6F, MRMSrcReg, (outs VR512:$dst),
1124                             (ins VR512:$src),
1125                             "vmovdqa64\t{$src, $dst|$dst, $src}", []>,
1126                             EVEX, EVEX_V512, VEX_W;
1127let mayStore = 1 in {
1128  def VMOVDQA32mr  : AVX512BI<0x7F, MRMDestMem, (outs),
1129                     (ins i512mem:$dst, VR512:$src),
1130                     "vmovdqa32\t{$src, $dst|$dst, $src}", []>,
1131                     EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
1132  def VMOVDQA64mr  : AVX512BI<0x7F, MRMDestMem, (outs),
1133                     (ins i512mem:$dst, VR512:$src),
1134                     "vmovdqa64\t{$src, $dst|$dst, $src}", []>,
1135                     EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
1136}
1137let mayLoad = 1 in {
1138def VMOVDQA32rm  : AVX512BI<0x6F, MRMSrcMem, (outs VR512:$dst), 
1139                           (ins i512mem:$src),
1140                           "vmovdqa32\t{$src, $dst|$dst, $src}", []>,
1141                           EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
1142def VMOVDQA64rm  : AVX512BI<0x6F, MRMSrcMem, (outs VR512:$dst),
1143                           (ins i512mem:$src),
1144                           "vmovdqa64\t{$src, $dst|$dst, $src}", []>,
1145                           EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
1146}
1147}
1148
1149// 512-bit aligned load/store
1150def : Pat<(alignedloadv8i64 addr:$src),  (VMOVDQA64rm addr:$src)>;
1151def : Pat<(alignedloadv16i32 addr:$src), (VMOVDQA32rm addr:$src)>;
1152
1153def : Pat<(alignedstore512 (v8i64  VR512:$src), addr:$dst),
1154          (VMOVDQA64mr addr:$dst, VR512:$src)>;
1155def : Pat<(alignedstore512 (v16i32 VR512:$src), addr:$dst),
1156          (VMOVDQA32mr addr:$dst, VR512:$src)>;
1157
1158multiclass avx512_mov_int<bits<8> load_opc, bits<8> store_opc, string asm,
1159                          RegisterClass RC, RegisterClass KRC,
1160                          PatFrag ld_frag, X86MemOperand x86memop> {
1161let neverHasSideEffects = 1 in
1162  def rr : AVX512XSI<load_opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
1163                     !strconcat(asm, "\t{$src, $dst|$dst, $src}"), []>, EVEX;
1164let canFoldAsLoad = 1 in
1165  def rm : AVX512XSI<load_opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
1166                     !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
1167                     [(set RC:$dst, (ld_frag addr:$src))]>, EVEX;
1168let mayStore = 1 in
1169  def mr : AVX512XSI<store_opc, MRMDestMem, (outs),
1170                     (ins x86memop:$dst, VR512:$src),
1171                     !strconcat(asm, "\t{$src, $dst|$dst, $src}"), []>, EVEX;
1172let Constraints = "$src1 = $dst" in {
1173  def rrk : AVX512XSI<load_opc, MRMSrcReg, (outs RC:$dst),
1174                                      (ins RC:$src1, KRC:$mask, RC:$src2),
1175              !strconcat(asm, 
1176              "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), []>,
1177              EVEX, EVEX_K;
1178  def rmk : AVX512XSI<load_opc, MRMSrcMem, (outs RC:$dst),
1179                                  (ins RC:$src1, KRC:$mask, x86memop:$src2),
1180              !strconcat(asm, 
1181              "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
1182               []>, EVEX, EVEX_K;
1183}
1184}
1185
1186defm VMOVDQU32 : avx512_mov_int<0x6F, 0x7F, "vmovdqu32", VR512, VK16WM,
1187                                memopv16i32, i512mem>,
1188                                EVEX_V512, EVEX_CD8<32, CD8VF>;
1189defm VMOVDQU64 : avx512_mov_int<0x6F, 0x7F, "vmovdqu64", VR512, VK8WM,
1190                                memopv8i64, i512mem>,
1191                                EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
1192
1193// 512-bit unaligned load/store
1194def : Pat<(loadv8i64 addr:$src),         (VMOVDQU64rm addr:$src)>;
1195def : Pat<(loadv16i32 addr:$src),        (VMOVDQU32rm addr:$src)>;
1196
1197def : Pat<(store (v8i64  VR512:$src), addr:$dst),
1198          (VMOVDQU64mr addr:$dst, VR512:$src)>;
1199def : Pat<(store (v16i32 VR512:$src), addr:$dst),
1200          (VMOVDQU32mr addr:$dst, VR512:$src)>;
1201
1202let AddedComplexity = 20 in {
1203def : Pat<(v16f32 (vselect VK16WM:$mask, (v16f32 VR512:$src1), 
1204                           (v16f32 VR512:$src2))),
1205                  (VMOVUPSZrrk VR512:$src2, VK16WM:$mask, VR512:$src1)>;
1206def : Pat<(v8f64 (vselect VK8WM:$mask, (v8f64 VR512:$src1), 
1207                           (v8f64 VR512:$src2))),
1208                  (VMOVUPDZrrk VR512:$src2, VK8WM:$mask, VR512:$src1)>;
1209def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 VR512:$src1), 
1210                           (v16i32 VR512:$src2))),
1211                  (VMOVDQU32rrk VR512:$src2, VK16WM:$mask, VR512:$src1)>;
1212def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 VR512:$src1), 
1213                           (v8i64 VR512:$src2))),
1214                  (VMOVDQU64rrk VR512:$src2, VK8WM:$mask, VR512:$src1)>;
1215}
1216// Move Int Doubleword to Packed Double Int
1217//
1218def VMOVDI2PDIZrr : AVX512SI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
1219                      "vmovd{z}\t{$src, $dst|$dst, $src}",
1220                      [(set VR128X:$dst,
1221                        (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
1222                        EVEX, VEX_LIG;
1223def VMOVDI2PDIZrm : AVX512SI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
1224                      "vmovd{z}\t{$src, $dst|$dst, $src}",
1225                      [(set VR128X:$dst,
1226                        (v4i32 (scalar_to_vector (loadi32 addr:$src))))],
1227                        IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
1228def VMOV64toPQIZrr : AVX512SI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
1229                      "vmovq{z}\t{$src, $dst|$dst, $src}",
1230                        [(set VR128X:$dst,
1231                          (v2i64 (scalar_to_vector GR64:$src)))],
1232                          IIC_SSE_MOVDQ>, EVEX, VEX_W, VEX_LIG;
1233let isCodeGenOnly = 1 in {
1234def VMOV64toSDZrr : AVX512SI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
1235                       "vmovq{z}\t{$src, $dst|$dst, $src}",
1236                       [(set FR64:$dst, (bitconvert GR64:$src))],
1237                       IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
1238def VMOVSDto64Zrr : AVX512SI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
1239                         "vmovq{z}\t{$src, $dst|$dst, $src}",
1240                         [(set GR64:$dst, (bitconvert FR64:$src))],
1241                         IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
1242}
1243def VMOVSDto64Zmr : AVX512SI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
1244                         "vmovq{z}\t{$src, $dst|$dst, $src}",
1245                         [(store (i64 (bitconvert FR64:$src)), addr:$dst)],
1246                         IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>,
1247                         EVEX_CD8<64, CD8VT1>;
1248
1249// Move Int Doubleword to Single Scalar
1250//
1251let isCodeGenOnly = 1 in {
1252def VMOVDI2SSZrr  : AVX512SI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
1253                      "vmovd{z}\t{$src, $dst|$dst, $src}",
1254                      [(set FR32X:$dst, (bitconvert GR32:$src))],
1255                      IIC_SSE_MOVDQ>, EVEX, VEX_LIG;
1256
1257def VMOVDI2SSZrm  : AVX512SI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
1258                      "vmovd{z}\t{$src, $dst|$dst, $src}",
1259                      [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))],
1260                      IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
1261}
1262
1263// Move Packed Doubleword Int to Packed Double Int
1264//
1265def VMOVPDI2DIZrr  : AVX512SI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
1266                       "vmovd{z}\t{$src, $dst|$dst, $src}",
1267                       [(set GR32:$dst, (vector_extract (v4i32 VR128X:$src),
1268                                        (iPTR 0)))], IIC_SSE_MOVD_ToGP>,
1269                       EVEX, VEX_LIG;
1270def VMOVPDI2DIZmr  : AVX512SI<0x7E, MRMDestMem, (outs),
1271                       (ins i32mem:$dst, VR128X:$src),
1272                       "vmovd{z}\t{$src, $dst|$dst, $src}",
1273                       [(store (i32 (vector_extract (v4i32 VR128X:$src),
1274                                     (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
1275                       EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
1276
1277// Move Packed Doubleword Int first element to Doubleword Int
1278//
1279def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
1280                      "vmovq{z}\t{$src, $dst|$dst, $src}",
1281                      [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
1282                                                   (iPTR 0)))],
1283                      IIC_SSE_MOVD_ToGP>, TB, OpSize, EVEX, VEX_LIG, VEX_W,
1284                      Requires<[HasAVX512, In64BitMode]>;
1285
1286def VMOVPQIto64Zmr : I<0xD6, MRMDestMem, (outs),
1287                       (ins i64mem:$dst, VR128X:$src),
1288                       "vmovq{z}\t{$src, $dst|$dst, $src}",
1289                       [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
1290                               addr:$dst)], IIC_SSE_MOVDQ>,
1291                       EVEX, OpSize, VEX_LIG, VEX_W, TB, EVEX_CD8<64, CD8VT1>,
1292                       Sched<[WriteStore]>, Requires<[HasAVX512, In64BitMode]>;
1293
1294// Move Scalar Single to Double Int
1295//
1296let isCodeGenOnly = 1 in {
1297def VMOVSS2DIZrr  : AVX512SI<0x7E, MRMDestReg, (outs GR32:$dst),
1298                      (ins FR32X:$src),
1299                      "vmovd{z}\t{$src, $dst|$dst, $src}",
1300                      [(set GR32:$dst, (bitconvert FR32X:$src))],
1301                      IIC_SSE_MOVD_ToGP>, EVEX, VEX_LIG;
1302def VMOVSS2DIZmr  : AVX512SI<0x7E, MRMDestMem, (outs),
1303                      (ins i32mem:$dst, FR32X:$src),
1304                      "vmovd{z}\t{$src, $dst|$dst, $src}",
1305                      [(store (i32 (bitconvert FR32X:$src)), addr:$dst)],
1306                      IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
1307}
1308
1309// Move Quadword Int to Packed Quadword Int
1310//
1311def VMOVQI2PQIZrm : AVX512SI<0x6E, MRMSrcMem, (outs VR128X:$dst),
1312                      (ins i64mem:$src),
1313                      "vmovq{z}\t{$src, $dst|$dst, $src}",
1314                      [(set VR128X:$dst,
1315                        (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
1316                      EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
1317
1318//===----------------------------------------------------------------------===//
1319// AVX-512  MOVSS, MOVSD
1320//===----------------------------------------------------------------------===//
1321
1322multiclass avx512_move_scalar <string asm, RegisterClass RC, 
1323                              SDNode OpNode, ValueType vt,
1324                              X86MemOperand x86memop, PatFrag mem_pat> {
1325  def rr : SI<0x10, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, RC:$src2), 
1326              !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1327              [(set VR128X:$dst, (vt (OpNode VR128X:$src1,
1328                                      (scalar_to_vector RC:$src2))))],
1329              IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG;
1330  def rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
1331              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
1332              [(set RC:$dst, (mem_pat addr:$src))], IIC_SSE_MOV_S_RM>,
1333              EVEX, VEX_LIG;
1334  def mr: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
1335             !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
1336             [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
1337             EVEX, VEX_LIG;
1338}
1339
1340let ExeDomain = SSEPackedSingle in
1341defm VMOVSSZ : avx512_move_scalar<"movss{z}", FR32X, X86Movss, v4f32, f32mem,
1342                                 loadf32>, XS, EVEX_CD8<32, CD8VT1>;
1343
1344let ExeDomain = SSEPackedDouble in
1345defm VMOVSDZ : avx512_move_scalar<"movsd{z}", FR64X, X86Movsd, v2f64, f64mem,
1346                                 loadf64>, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
1347
1348
1349// For the disassembler
1350let isCodeGenOnly = 1 in {
1351  def VMOVSSZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst),
1352                        (ins VR128X:$src1, FR32X:$src2),
1353                        "movss{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
1354                        IIC_SSE_MOV_S_RR>,
1355                        XS, EVEX_4V, VEX_LIG;
1356  def VMOVSDZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst),
1357                        (ins VR128X:$src1, FR64X:$src2),
1358                        "movsd{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
1359                        IIC_SSE_MOV_S_RR>,
1360                        XD, EVEX_4V, VEX_LIG, VEX_W;
1361}
1362
1363let Predicates = [HasAVX512] in {
1364  let AddedComplexity = 15 in {
1365  // Move scalar to XMM zero-extended, zeroing a VR128X then do a
1366  // MOVS{S,D} to the lower bits.
1367  def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32X:$src)))),
1368            (VMOVSSZrr (v4f32 (V_SET0)), FR32X:$src)>;
1369  def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
1370            (VMOVSSZrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
1371  def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
1372            (VMOVSSZrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
1373  def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64X:$src)))),
1374            (VMOVSDZrr (v2f64 (V_SET0)), FR64X:$src)>;
1375
1376  // Move low f32 and clear high bits.
1377  def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
1378            (SUBREG_TO_REG (i32 0),
1379             (VMOVSSZrr (v4f32 (V_SET0)), 
1380              (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)), sub_xmm)>;
1381  def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
1382            (SUBREG_TO_REG (i32 0),
1383             (VMOVSSZrr (v4i32 (V_SET0)),
1384                       (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)), sub_xmm)>;
1385  }
1386
1387  let AddedComplexity = 20 in {
1388  // MOVSSrm zeros the high parts of the register; represent this
1389  // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
1390  def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
1391            (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
1392  def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
1393            (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
1394  def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
1395            (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
1396
1397  // MOVSDrm zeros the high parts of the register; represent this
1398  // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
1399  def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
1400            (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
1401  def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
1402            (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
1403  def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
1404            (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
1405  def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
1406            (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
1407  def : Pat<(v2f64 (X86vzload addr:$src)),
1408            (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
1409
1410  // Represent the same patterns above but in the form they appear for
1411  // 256-bit types
1412  def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
1413                   (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
1414            (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
1415  def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
1416                   (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
1417            (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
1418  def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
1419                   (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
1420            (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
1421  }
1422  def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
1423                   (v4f32 (scalar_to_vector FR32X:$src)), (iPTR 0)))),
1424            (SUBREG_TO_REG (i32 0), (v4f32 (VMOVSSZrr (v4f32 (V_SET0)),
1425                                            FR32X:$src)), sub_xmm)>;
1426  def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
1427                   (v2f64 (scalar_to_vector FR64X:$src)), (iPTR 0)))),
1428            (SUBREG_TO_REG (i64 0), (v2f64 (VMOVSDZrr (v2f64 (V_SET0)),
1429                                     FR64X:$src)), sub_xmm)>;
1430  def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
1431                   (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
1432            (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
1433
1434  // Move low f64 and clear high bits.
1435  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
1436            (SUBREG_TO_REG (i32 0),
1437             (VMOVSDZrr (v2f64 (V_SET0)),
1438                       (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)), sub_xmm)>;
1439
1440  def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
1441            (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (V_SET0)),
1442                       (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)), sub_xmm)>;
1443
1444  // Extract and store.
1445  def : Pat<(store (f32 (vector_extract (v4f32 VR128X:$src), (iPTR 0))),
1446                   addr:$dst),
1447            (VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>;
1448  def : Pat<(store (f64 (vector_extract (v2f64 VR128X:$src), (iPTR 0))),
1449                   addr:$dst),
1450            (VMOVSDZmr addr:$dst, (COPY_TO_REGCLASS (v2f64 VR128X:$src), FR64X))>;
1451
1452  // Shuffle with VMOVSS
1453  def : Pat<(v4i32 (X86Movss VR128X:$src1, VR128X:$src2)),
1454            (VMOVSSZrr (v4i32 VR128X:$src1),
1455                      (COPY_TO_REGCLASS (v4i32 VR128X:$src2), FR32X))>;
1456  def : Pat<(v4f32 (X86Movss VR128X:$src1, VR128X:$src2)),
1457            (VMOVSSZrr (v4f32 VR128X:$src1),
1458                      (COPY_TO_REGCLASS (v4f32 VR128X:$src2), FR32X))>;
1459
1460  // 256-bit variants
1461  def : Pat<(v8i32 (X86Movss VR256X:$src1, VR256X:$src2)),
1462            (SUBREG_TO_REG (i32 0),
1463              (VMOVSSZrr (EXTRACT_SUBREG (v8i32 VR256X:$src1), sub_xmm),
1464                        (EXTRACT_SUBREG (v8i32 VR256X:$src2), sub_xmm)),
1465              sub_xmm)>;
1466  def : Pat<(v8f32 (X86Movss VR256X:$src1, VR256X:$src2)),
1467            (SUBREG_TO_REG (i32 0),
1468              (VMOVSSZrr (EXTRACT_SUBREG (v8f32 VR256X:$src1), sub_xmm),
1469                        (EXTRACT_SUBREG (v8f32 VR256X:$src2), sub_xmm)),
1470              sub_xmm)>;
1471
1472  // Shuffle with VMOVSD
1473  def : Pat<(v2i64 (X86Movsd VR128X:$src1, VR128X:$src2)),
1474            (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
1475  def : Pat<(v2f64 (X86Movsd VR128X:$src1, VR128X:$src2)),
1476            (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
1477  def : Pat<(v4f32 (X86Movsd VR128X:$src1, VR128X:$src2)),
1478            (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
1479  def : Pat<(v4i32 (X86Movsd VR128X:$src1, VR128X:$src2)),
1480            (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
1481
1482  // 256-bit variants
1483  def : Pat<(v4i64 (X86Movsd VR256X:$src1, VR256X:$src2)),
1484            (SUBREG_TO_REG (i32 0),
1485              (VMOVSDZrr (EXTRACT_SUBREG (v4i64 VR256X:$src1), sub_xmm),
1486                        (EXTRACT_SUBREG (v4i64 VR256X:$src2), sub_xmm)),
1487              sub_xmm)>;
1488  def : Pat<(v4f64 (X86Movsd VR256X:$src1, VR256X:$src2)),
1489            (SUBREG_TO_REG (i32 0),
1490              (VMOVSDZrr (EXTRACT_SUBREG (v4f64 VR256X:$src1), sub_xmm),
1491                        (EXTRACT_SUBREG (v4f64 VR256X:$src2), sub_xmm)),
1492              sub_xmm)>;
1493
1494  def : Pat<(v2f64 (X86Movlpd VR128X:$src1, VR128X:$src2)),
1495            (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
1496  def : Pat<(v2i64 (X86Movlpd VR128X:$src1, VR128X:$src2)),
1497            (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
1498  def : Pat<(v4f32 (X86Movlps VR128X:$src1, VR128X:$src2)),
1499            (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
1500  def : Pat<(v4i32 (X86Movlps VR128X:$src1, VR128X:$src2)),
1501            (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
1502}
1503
1504let AddedComplexity = 15 in
1505def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
1506                                (ins VR128X:$src),
1507                                "vmovq{z}\t{$src, $dst|$dst, $src}",
1508                                [(set VR128X:$dst, (v2i64 (X86vzmovl 
1509                                                   (v2i64 VR128X:$src))))],
1510                                IIC_SSE_MOVQ_RR>, EVEX, VEX_W;
1511
1512let AddedComplexity = 20 in
1513def VMOVZPQILo2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
1514                                 (ins i128mem:$src),
1515                                 "vmovq{z}\t{$src, $dst|$dst, $src}",
1516                                 [(set VR128X:$dst, (v2i64 (X86vzmovl
1517                                                     (loadv2i64 addr:$src))))],
1518                                 IIC_SSE_MOVDQ>, EVEX, VEX_W,
1519                                 EVEX_CD8<8, CD8VT8>;
1520
1521let Predicates = [HasAVX512] in {
1522  // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
1523  let AddedComplexity = 20 in {
1524    def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
1525              (VMOVDI2PDIZrm addr:$src)>;
1526    def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
1527              (VMOV64toPQIZrr GR64:$src)>;
1528    def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
1529              (VMOVDI2PDIZrr GR32:$src)>;
1530              
1531    def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
1532              (VMOVDI2PDIZrm addr:$src)>;
1533    def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
1534              (VMOVDI2PDIZrm addr:$src)>;
1535    def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
1536            (VMOVZPQILo2PQIZrm addr:$src)>;
1537    def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
1538            (VMOVZPQILo2PQIZrr VR128X:$src)>;
1539  }
1540
1541  // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
1542  def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
1543                               (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
1544            (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>;
1545  def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
1546                               (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
1547            (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>;
1548}
1549
1550def : Pat<(v16i32 (X86Vinsert (v16i32 immAllZerosV), GR32:$src2, (iPTR 0))),
1551        (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src2), sub_xmm)>;
1552
1553def : Pat<(v8i64 (X86Vinsert (bc_v8i64 (v16i32 immAllZerosV)), GR64:$src2, (iPTR 0))),
1554        (SUBREG_TO_REG (i32 0), (VMOV64toPQIZrr GR64:$src2), sub_xmm)>;
1555
1556def : Pat<(v16i32 (X86Vinsert undef, GR32:$src2, (iPTR 0))),
1557        (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src2), sub_xmm)>;
1558
1559def : Pat<(v8i64 (X86Vinsert undef, GR64:$src2, (iPTR 0))),
1560        (SUBREG_TO_REG (i32 0), (VMOV64toPQIZrr GR64:$src2), sub_xmm)>;
1561
1562//===----------------------------------------------------------------------===//
1563// AVX-512 - Integer arithmetic
1564//
1565multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
1566                        ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
1567                        X86MemOperand x86memop, PatFrag scalar_mfrag,
1568                        X86MemOperand x86scalar_mop, string BrdcstStr,
1569                        OpndItins itins, bit IsCommutable = 0> {
1570  let isCommutable = IsCommutable in
1571  def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
1572       (ins RC:$src1, RC:$src2),
1573       !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1574       [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (OpVT RC:$src2))))], 
1575       itins.rr>, EVEX_4V;
1576  def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
1577       (ins RC:$src1, x86memop:$src2),
1578       !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1579       [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (memop_frag addr:$src2))))],
1580                                     itins.rm>, EVEX_4V;
1581  def rmb : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
1582       (ins RC:$src1, x86scalar_mop:$src2),
1583       !strconcat(OpcodeStr, "\t{${src2}", BrdcstStr,
1584                  ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
1585       [(set RC:$dst, (OpNode RC:$src1, 
1586                       (OpVT (X86VBroadcast (scalar_mfrag addr:$src2)))))],
1587                        itins.rm>, EVEX_4V, EVEX_B;
1588}
1589multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
1590                         ValueType DstVT, ValueType SrcVT, RegisterClass RC,
1591                         PatFrag memop_frag, X86MemOperand x86memop,
1592                         OpndItins itins,
1593                         bit IsCommutable = 0> {
1594  let isCommutable = IsCommutable in
1595  def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
1596       (ins RC:$src1, RC:$src2),
1597       !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1598       []>, EVEX_4V, VEX_W;
1599  def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
1600       (ins RC:$src1, x86memop:$src2),
1601       !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1602       []>, EVEX_4V, VEX_W;
1603}
1604
1605defm VPADDDZ : avx512_binop_rm<0xFE, "vpaddd", add, v16i32, VR512, memopv16i32,
1606                   i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>,
1607                   EVEX_V512, EVEX_CD8<32, CD8VF>;
1608
1609defm VPSUBDZ : avx512_binop_rm<0xFA, "vpsubd", sub, v16i32, VR512, memopv16i32,
1610                   i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 0>,
1611                   EVEX_V512, EVEX_CD8<32, CD8VF>;
1612
1613defm VPMULLDZ : avx512_binop_rm<0x40, "vpmulld", mul, v16i32, VR512, memopv16i32,
1614                   i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>,
1615                   T8, EVEX_V512, EVEX_CD8<32, CD8VF>;
1616
1617defm VPADDQZ : avx512_binop_rm<0xD4, "vpaddq", add, v8i64, VR512, memopv8i64,
1618                   i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 1>, 
1619                   EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_W;
1620
1621defm VPSUBQZ : avx512_binop_rm<0xFB, "vpsubq", sub, v8i64, VR512, memopv8i64,
1622                   i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 0>,
1623                   EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
1624
1625defm VPMULDQZ : avx512_binop_rm2<0x28, "vpmuldq", v8i64, v16i32,
1626                VR512, memopv8i64, i512mem, SSE_INTALU_ITINS_P, 1>, T8,
1627                EVEX_V512, EVEX_CD8<64, CD8VF>;
1628
1629defm VPMULUDQZ : avx512_binop_rm2<0xF4, "vpmuludq", v8i64, v16i32,
1630                 VR512, memopv8i64, i512mem, SSE_INTMUL_ITINS_P, 1>, EVEX_V512,
1631                 EVEX_CD8<64, CD8VF>;
1632
1633def : Pat<(v8i64 (X86pmuludq (v16i32 VR512:$src1), (v16i32 VR512:$src2))),
1634          (VPMULUDQZrr VR512:$src1, VR512:$src2)>;
1635
1636defm VPMAXUDZ : avx512_binop_rm<0x3F, "vpmaxud", X86umax, v16i32, VR512, memopv16i32,
1637                   i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>,
1638                   T8, EVEX_V512, EVEX_CD8<32, CD8VF>;
1639defm VPMAXUQZ : avx512_binop_rm<0x3F, "vpmaxuq", X86umax, v8i64, VR512, memopv8i64,
1640                   i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 0>,
1641                   T8, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
1642
1643defm VPMAXSDZ : avx512_binop_rm<0x3D, "vpmaxsd", X86smax, v16i32, VR512, memopv16i32,
1644                   i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>,
1645                   EVEX_V512, EVEX_CD8<32, CD8VF>;
1646defm VPMAXSQZ : avx512_binop_rm<0x3D, "vpmaxsq", X86smax, v8i64, VR512, memopv8i64,
1647                   i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 0>,
1648                   T8, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
1649
1650defm VPMINUDZ : avx512_binop_rm<0x3B, "vpminud", X86umin, v16i32, VR512, memopv16i32,
1651                   i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>,
1652                   T8, EVEX_V512, EVEX_CD8<32, CD8VF>;
1653defm VPMINUQZ : avx512_binop_rm<0x3B, "vpminuq", X86umin, v8i64, VR512, memopv8i64,
1654                   i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 0>,
1655                   T8, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
1656
1657defm VPMINSDZ : avx512_binop_rm<0x39, "vpminsd", X86smin, v16i32, VR512, memopv16i32,
1658                   i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>,
1659                   T8, EVEX_V512, EVEX_CD8<32, CD8VF>;
1660defm VPMINSQZ : avx512_binop_rm<0x39, "vpminsq", X86smin, v8i64, VR512, memopv8i64,
1661                   i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 0>,
1662                   T8, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
1663
1664//===----------------------------------------------------------------------===//
1665// AVX-512 - Unpack Instructions
1666//===----------------------------------------------------------------------===//
1667
1668multiclass avx512_unpack_fp<bits<8> opc, SDNode OpNode, ValueType vt,
1669                                   PatFrag mem_frag, RegisterClass RC,
1670                                   X86MemOperand x86memop, string asm,
1671                                   Domain d> {
1672    def rr : AVX512PI<opc, MRMSrcReg,
1673                (outs RC:$dst), (ins RC:$src1, RC:$src2),
1674                asm, [(set RC:$dst,
1675                           (vt (OpNode RC:$src1, RC:$src2)))],
1676                           d>, EVEX_4V;
1677    def rm : AVX512PI<opc, MRMSrcMem,
1678                (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
1679                asm, [(set RC:$dst,
1680                       (vt (OpNode RC:$src1,
1681                            (bitconvert (mem_frag addr:$src2)))))],
1682                        d>, EVEX_4V;
1683}
1684
1685defm VUNPCKHPSZ: avx512_unpack_fp<0x15, X86Unpckh, v16f32, memopv8f64,
1686      VR512, f512mem, "vunpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1687      SSEPackedSingle>, EVEX_V512, EVEX_CD8<32, CD8VF>;
1688defm VUNPCKHPDZ: avx512_unpack_fp<0x15, X86Unpckh, v8f64, memopv8f64,
1689      VR512, f512mem, "vunpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1690      SSEPackedDouble>, OpSize, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
1691defm VUNPCKLPSZ: avx512_unpack_fp<0x14, X86Unpckl, v16f32, memopv8f64,
1692      VR512, f512mem, "vunpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1693      SSEPackedSingle>, EVEX_V512, EVEX_CD8<32, CD8VF>;
1694defm VUNPCKLPDZ: avx512_unpack_fp<0x14, X86Unpckl, v8f64, memopv8f64,
1695      VR512, f512mem, "vunpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1696      SSEPackedDouble>, OpSize, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
1697
1698multiclass avx512_unpack_int<bits<8> opc, string OpcodeStr, SDNode OpNode,
1699                        ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
1700                        X86MemOperand x86memop> {
1701  def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
1702       (ins RC:$src1, RC:$src2),
1703       !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1704       [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (OpVT RC:$src2))))], 
1705       IIC_SSE_UNPCK>, EVEX_4V;
1706  def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
1707       (ins RC:$src1, x86memop:$src2),
1708       !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1709       [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1),
1710                                     (bitconvert (memop_frag addr:$src2)))))],
1711                                     IIC_SSE_UNPCK>, EVEX_4V;
1712}
1713defm VPUNPCKLDQZ  : avx512_unpack_int<0x62, "vpunpckldq", X86Unpckl, v16i32,
1714                                VR512, memopv16i32, i512mem>, EVEX_V512,
1715                                EVEX_CD8<32, CD8VF>;
1716defm VPUNPCKLQDQZ : avx512_unpack_int<0x6C, "vpunpcklqdq", X86Unpckl, v8i64,
1717                                VR512, memopv8i64, i512mem>, EVEX_V512,
1718                                VEX_W, EVEX_CD8<64, CD8VF>;
1719defm VPUNPCKHDQZ  : avx512_unpack_int<0x6A, "vpunpckhdq", X86Unpckh, v16i32,
1720                                VR512, memopv16i32, i512mem>, EVEX_V512,
1721                                EVEX_CD8<32, CD8VF>;
1722defm VPUNPCKHQDQZ : avx512_unpack_int<0x6D, "vpunpckhqdq", X86Unpckh, v8i64,
1723                                VR512, memopv8i64, i512mem>, EVEX_V512,
1724                                VEX_W, EVEX_CD8<64, CD8VF>;
1725//===----------------------------------------------------------------------===//
1726// AVX-512 - PSHUFD
1727//
1728
1729multiclass avx512_pshuf_imm<bits<8> opc, string OpcodeStr, RegisterClass RC,
1730                         SDNode OpNode, PatFrag mem_frag, 
1731                         X86MemOperand x86memop, ValueType OpVT> {
1732  def ri : AVX512Ii8<opc, MRMSrcReg, (outs RC:$dst),
1733                     (ins RC:$src1, i8imm:$src2),
1734                     !strconcat(OpcodeStr,
1735                         "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1736                     [(set RC:$dst,
1737                       (OpVT (OpNode RC:$src1, (i8 imm:$src2))))]>,
1738                     EVEX;
1739  def mi : AVX512Ii8<opc, MRMSrcMem, (outs RC:$dst),
1740                     (ins x86memop:$src1, i8imm:$src2),
1741                     !strconcat(OpcodeStr,
1742                         "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1743                     [(set RC:$dst,
1744                       (OpVT (OpNode (mem_frag addr:$src1),
1745                              (i8 imm:$src2))))]>, EVEX;
1746}
1747
1748defm VPSHUFDZ : avx512_pshuf_imm<0x70, "vpshufd", VR512, X86PShufd, memopv16i32,
1749                      i512mem, v16i32>, OpSize, EVEX_V512, EVEX_CD8<32, CD8VF>;
1750
1751let ExeDomain = SSEPackedSingle in
1752defm VPERMILPSZ : avx512_pshuf_imm<0x04, "vpermilps", VR512, X86VPermilp,
1753                      memopv16f32, i512mem, v16f32>, OpSize, TA, EVEX_V512,
1754                      EVEX_CD8<32, CD8VF>;
1755let ExeDomain = SSEPackedDouble in
1756defm VPERMILPDZ : avx512_pshuf_imm<0x05, "vpermilpd", VR512, X86VPermilp,
1757                      memopv8f64, i512mem, v8f64>, OpSize, TA, EVEX_V512,
1758                      VEX_W, EVEX_CD8<32, CD8VF>;
1759
1760def : Pat<(v16i32 (X86VPermilp VR512:$src1, (i8 imm:$imm))),
1761          (VPERMILPSZri VR512:$src1, imm:$imm)>;
1762def : Pat<(v8i64 (X86VPermilp VR512:$src1, (i8 imm:$imm))),
1763          (VPERMILPDZri VR512:$src1, imm:$imm)>;
1764
1765//===----------------------------------------------------------------------===//
1766// AVX-512  Logical Instructions
1767//===----------------------------------------------------------------------===//
1768
1769defm VPANDDZ : avx512_binop_rm<0xDB, "vpandd", and, v16i32, VR512, memopv16i32,
1770                      i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
1771                      EVEX_V512, EVEX_CD8<32, CD8VF>;
1772defm VPANDQZ : avx512_binop_rm<0xDB, "vpandq", and, v8i64, VR512, memopv8i64,
1773                      i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
1774                      EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
1775defm VPORDZ  : avx512_binop_rm<0xEB, "vpord", or, v16i32, VR512, memopv16i32,
1776                      i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
1777                      EVEX_V512, EVEX_CD8<32, CD8VF>;
1778defm VPORQZ  : avx512_binop_rm<0xEB, "vporq", or, v8i64, VR512, memopv8i64,
1779                      i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
1780                      EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
1781defm VPXORDZ : avx512_binop_rm<0xEF, "vpxord", xor, v16i32, VR512, memopv16i32,
1782                      i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
1783                      EVEX_V512, EVEX_CD8<32, CD8VF>;
1784defm VPXORQZ : avx512_binop_rm<0xEF, "vpxorq", xor, v8i64, VR512, memopv8i64,
1785                      i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
1786                      EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
1787defm VPANDNDZ : avx512_binop_rm<0xDF, "vpandnd", X86andnp, v16i32, VR512,
1788                      memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
1789                      SSE_BIT_ITINS_P, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>;
1790defm VPANDNQZ : avx512_binop_rm<0xDF, "vpandnq", X86andnp, v8i64, VR512, memopv8i64,
1791                      i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 0>,
1792                      EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
1793
1794//===----------------------------------------------------------------------===//
1795// AVX-512  FP arithmetic
1796//===----------------------------------------------------------------------===//
1797
1798multiclass avx512_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
1799                                  SizeItins itins> {
1800  defm SSZ : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss{z}"), OpNode, FR32X,
1801                             f32mem, itins.s, 0>, XS, EVEX_4V, VEX_LIG,
1802                             EVEX_CD8<32, CD8VT1>;
1803  defm SDZ : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd{z}"), OpNode, FR64X,
1804                             f64mem, itins.d, 0>, XD, VEX_W, EVEX_4V, VEX_LIG,
1805                             EVEX_CD8<64, CD8VT1>;
1806}
1807
1808let isCommutable = 1 in {
1809defm VADD : avx512_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>;
1810defm VMUL : avx512_binop_s<0x59, "mul", fmul, SSE_ALU_ITINS_S>;
1811defm VMIN : avx512_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>;
1812defm VMAX : avx512_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>;
1813}
1814let isCommutable = 0 in {
1815defm VSUB : avx512_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>;
1816defm VDIV : avx512_binop_s<0x5E, "div", fdiv, SSE_ALU_ITINS_S>;
1817}
1818
1819multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
1820                           RegisterClass RC, ValueType vt,
1821                           X86MemOperand x86memop, PatFrag mem_frag,
1822                           X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
1823                           string BrdcstStr,
1824                           Domain d, OpndItins itins, bit commutable> {
1825  let isCommutable = commutable in
1826    def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
1827       !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1828       [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>,
1829       EVEX_4V, TB;
1830  let mayLoad = 1 in {
1831    def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
1832       !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1833       [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))],
1834          itins.rm, d>, EVEX_4V, TB;
1835    def rmb : PI<opc, MRMSrcMem, (outs RC:$dst),
1836       (ins RC:$src1, x86scalar_mop:$src2),
1837       !strconcat(OpcodeStr, "\t{${src2}", BrdcstStr,
1838                  ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
1839       [(set RC:$dst, (OpNode RC:$src1, 
1840                       (vt (X86VBroadcast (scalar_mfrag addr:$src2)))))],
1841       itins.rm, d>, EVEX_4V, EVEX_B, TB;
1842    }
1843}
1844
1845defm VADDPSZ : avx512_fp_packed<0x58, "addps", fadd, VR512, v16f32, f512mem,
1846                   memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle, 
1847                   SSE_ALU_ITINS_P.s, 1>, EVEX_V512, EVEX_CD8<32, CD8VF>;
1848                   
1849defm VADDPDZ : avx512_fp_packed<0x58, "addpd", fadd, VR512, v8f64, f512mem,
1850                   memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
1851                   SSE_ALU_ITINS_P.d, 1>,
1852                   EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>;
1853
1854defm VMULPSZ : avx512_fp_packed<0x59, "mulps", fmul, VR512, v16f32, f512mem,
1855                   memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
1856                   SSE_ALU_ITINS_P.s, 1>, EVEX_V512, EVEX_CD8<32, CD8VF>;
1857defm VMULPDZ : avx512_fp_packed<0x59, "mulpd", fmul, VR512, v8f64, f512mem,
1858                   memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
1859                   SSE_ALU_ITINS_P.d, 1>,
1860                   EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>;
1861
1862defm VMINPSZ : avx512_fp_packed<0x5D, "minps", X86fmin, VR512, v16f32, f512mem,
1863                   memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
1864                   SSE_ALU_ITINS_P.s, 1>,
1865                   EVEX_V512, EVEX_CD8<32, CD8VF>;
1866defm VMAXPSZ : avx512_fp_packed<0x5F, "maxps", X86fmax, VR512, v16f32, f512mem,
1867                   memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
1868                   SSE_ALU_ITINS_P.s, 1>,
1869                   EVEX_V512, EVEX_CD8<32, CD8VF>;
1870
1871defm VMINPDZ : avx512_fp_packed<0x5D, "minpd", X86fmin, VR512, v8f64, f512mem,
1872                   memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
1873                   SSE_ALU_ITINS_P.d, 1>,
1874                   EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>;
1875defm VMAXPDZ : avx512_fp_packed<0x5F, "maxpd", X86fmax, VR512, v8f64, f512mem,
1876                   memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
1877                   SSE_ALU_ITINS_P.d, 1>,
1878                   EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>;
1879
1880defm VSUBPSZ : avx512_fp_packed<0x5C, "subps", fsub, VR512, v16f32, f512mem,
1881                   memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
1882                   SSE_ALU_ITINS_P.s, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>;
1883defm VDIVPSZ : avx512_fp_packed<0x5E, "divps", fdiv, VR512, v16f32, f512mem,
1884                   memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
1885                   SSE_ALU_ITINS_P.s, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>;
1886
1887defm VSUBPDZ : avx512_fp_packed<0x5C, "subpd", fsub, VR512, v8f64, f512mem, 
1888                   memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
1889                   SSE_ALU_ITINS_P.d, 0>, 
1890                   EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>;
1891defm VDIVPDZ : avx512_fp_packed<0x5E, "divpd", fdiv, VR512, v8f64, f512mem, 
1892                   memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
1893                   SSE_ALU_ITINS_P.d, 0>, 
1894                   EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>;
1895
1896//===----------------------------------------------------------------------===//
1897// AVX-512  VPTESTM instructions
1898//===----------------------------------------------------------------------===//
1899
1900multiclass avx512_vptest<bits<8> opc, string OpcodeStr, RegisterClass KRC, 
1901              RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag, 
1902              SDNode OpNode, ValueType vt> {
1903  def rr : AVX5128I<opc, MRMSrcReg,
1904             (outs KRC:$dst), (ins RC:$src1, RC:$src2), 
1905             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1906             [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2)))]>, EVEX_4V;
1907  def rm : AVX5128I<opc, MRMSrcMem,
1908             (outs KRC:$dst), (ins RC:$src1, x86memop:$src2), 
1909             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1910             [(set KRC:$dst, (OpNode (vt RC:$src1), 
1911              (bitconvert (memop_frag addr:$src2))))]>, EVEX_4V;
1912}
1913
1914defm VPTESTMDZ  : avx512_vptest<0x27, "vptestmd", VK16, VR512,  f512mem,
1915                              memopv16i32, X86testm, v16i32>, EVEX_V512,
1916                              EVEX_CD8<32, CD8VF>;
1917defm VPTESTMQZ  : avx512_vptest<0x27, "vptestmq", VK8, VR512,  f512mem,
1918                              memopv8i64, X86testm, v8i64>, EVEX_V512, VEX_W,
1919                              EVEX_CD8<64, CD8VF>;
1920
1921//===----------------------------------------------------------------------===//
1922// AVX-512  Shift instructions
1923//===----------------------------------------------------------------------===//
1924multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
1925                         string OpcodeStr, SDNode OpNode, RegisterClass RC,
1926                         ValueType vt, X86MemOperand x86memop, PatFrag mem_frag,
1927                         RegisterClass KRC> {
1928  def ri : AVX512BIi8<opc, ImmFormR, (outs RC:$dst),
1929       (ins RC:$src1, i8imm:$src2),
1930           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1931       [(set RC:$dst, (vt (OpNode RC:$src1, (i8 imm:$src2))))],
1932        SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V;
1933  def rik : AVX512BIi8<opc, ImmFormR, (outs RC:$dst),
1934       (ins KRC:$mask, RC:$src1, i8imm:$src2),
1935           !strconcat(OpcodeStr,
1936                "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
1937       [], SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V, EVEX_K;
1938  def mi: AVX512BIi8<opc, ImmFormM, (outs RC:$dst),
1939       (ins x86memop:$src1, i8imm:$src2),
1940           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1941       [(set RC:$dst, (OpNode (mem_frag addr:$src1),
1942                     (i8 imm:$src2)))], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V;
1943  def mik: AVX512BIi8<opc, ImmFormM, (outs RC:$dst),
1944       (ins KRC:$mask, x86memop:$src1, i8imm:$src2),
1945           !strconcat(OpcodeStr,
1946                "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
1947       [], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V, EVEX_K;
1948}
1949
1950multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
1951                          RegisterClass RC, ValueType vt, ValueType SrcVT,
1952                          PatFrag bc_frag, RegisterClass KRC> {
1953  // src2 is always 128-bit
1954  def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
1955       (ins RC:$src1, VR128X:$src2),
1956           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1957       [(set RC:$dst, (vt (OpNode RC:$src1, (SrcVT VR128X:$src2))))],
1958        SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V;
1959  def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
1960       (ins KRC:$mask, RC:$src1, VR128X:$src2),
1961           !strconcat(OpcodeStr,
1962                "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
1963       [], SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V, EVEX_K;
1964  def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
1965       (ins RC:$src1, i128mem:$src2),
1966           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1967       [(set RC:$dst, (vt (OpNode RC:$src1,
1968                       (bc_frag (memopv2i64 addr:$src2)))))],
1969                        SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V;
1970  def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
1971       (ins KRC:$mask, RC:$src1, i128mem:$src2),
1972           !strconcat(OpcodeStr,
1973                "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
1974       [], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V, EVEX_K;
1975}
1976
1977defm VPSRLDZ : avx512_shift_rmi<0x72, MRM2r, MRM2m, "vpsrld", X86vsrli,
1978                           VR512, v16i32, i512mem, memopv16i32, VK16WM>,
1979                           EVEX_V512, EVEX_CD8<32, CD8VF>;
1980defm VPSRLDZ : avx512_shift_rrm<0xD2, "vpsrld", X86vsrl,
1981                           VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
1982                           EVEX_CD8<32, CD8VQ>;
1983                           
1984defm VPSRLQZ : avx512_shift_rmi<0x73, MRM2r, MRM2m, "vpsrlq", X86vsrli,
1985                           VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
1986                           EVEX_CD8<64, CD8VF>, VEX_W;
1987defm VPSRLQZ : avx512_shift_rrm<0xD3, "vpsrlq", X86vsrl,
1988                           VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
1989                           EVEX_CD8<64, CD8VQ>, VEX_W;
1990
1991defm VPSLLDZ : avx512_shift_rmi<0x72, MRM6r, MRM6m, "vpslld", X86vshli,
1992                           VR512, v16i32, i512mem, memopv16i32, VK16WM>, EVEX_V512,
1993                           EVEX_CD8<32, CD8VF>;
1994defm VPSLLDZ : avx512_shift_rrm<0xF2, "vpslld", X86vshl,
1995                           VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
1996                           EVEX_CD8<32, CD8VQ>;
1997                           
1998defm VPSLLQZ : avx512_shift_rmi<0x73, MRM6r, MRM6m, "vpsllq", X86vshli,
1999                           VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
2000                           EVEX_CD8<64, CD8VF>, VEX_W;
2001defm VPSLLQZ : avx512_shift_rrm<0xF3, "vpsllq", X86vshl,
2002                           VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
2003                           EVEX_CD8<64, CD8VQ>, VEX_W;
2004
2005defm VPSRADZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsrad", X86vsrai,
2006                           VR512, v16i32, i512mem, memopv16i32, VK16WM>,
2007                           EVEX_V512, EVEX_CD8<32, CD8VF>;
2008defm VPSRADZ : avx512_shift_rrm<0xE2, "vpsrad", X86vsra,
2009                           VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
2010                           EVEX_CD8<32, CD8VQ>;
2011                           
2012defm VPSRAQZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsraq", X86vsrai,
2013                           VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
2014                           EVEX_CD8<64, CD8VF>, VEX_W;
2015defm VPSRAQZ : avx512_shift_rrm<0xE2, "vpsraq", X86vsra,
2016                           VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
2017                           EVEX_CD8<64, CD8VQ>, VEX_W;
2018
2019//===-------------------------------------------------------------------===//
2020// Variable Bit Shifts
2021//===-------------------------------------------------------------------===//
2022multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
2023                           RegisterClass RC, ValueType vt,
2024                           X86MemOperand x86memop, PatFrag mem_frag> {
2025  def rr  : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
2026             (ins RC:$src1, RC:$src2),
2027             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2028             [(set RC:$dst,
2029               (vt (OpNode RC:$src1, (vt RC:$src2))))]>,
2030             EVEX_4V;
2031  def rm  : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
2032             (ins RC:$src1, x86memop:$src2),
2033             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2034             [(set RC:$dst,
2035               (vt (OpNode RC:$src1, (mem_frag addr:$src2))))]>,
2036             EVEX_4V;
2037}
2038
2039defm VPSLLVDZ : avx512_var_shift<0x47, "vpsllvd", shl, VR512, v16i32, 
2040                               i512mem, memopv16i32>, EVEX_V512,
2041                               EVEX_CD8<32, CD8VF>;
2042defm VPSLLVQZ : avx512_var_shift<0x47, "vpsllvq", shl, VR512, v8i64, 
2043                               i512mem, memopv8i64>, EVEX_V512, VEX_W,
2044                               EVEX_CD8<64, CD8VF>;
2045defm VPSRLVDZ : avx512_var_shift<0x45, "vpsrlvd", srl, VR512, v16i32, 
2046                               i512mem, memopv16i32>, EVEX_V512,
2047                               EVEX_CD8<32, CD8VF>;
2048defm VPSRLVQZ : avx512_var_shift<0x45, "vpsrlvq", srl, VR512, v8i64, 
2049                               i512mem, memopv8i64>, EVEX_V512, VEX_W,
2050                               EVEX_CD8<64, CD8VF>;
2051defm VPSRAVDZ : avx512_var_shift<0x46, "vpsravd", sra, VR512, v16i32, 
2052                               i512mem, memopv16i32>, EVEX_V512,
2053                               EVEX_CD8<32, CD8VF>;
2054defm VPSRAVQZ : avx512_var_shift<0x46, "vpsravq", sra, VR512, v8i64, 
2055                               i512mem, memopv8i64>, EVEX_V512, VEX_W,
2056                               EVEX_CD8<64, CD8VF>;
2057
2058//===----------------------------------------------------------------------===//
2059// AVX-512 - MOVDDUP
2060//===----------------------------------------------------------------------===//
2061
2062multiclass avx512_movddup<string OpcodeStr, RegisterClass RC, ValueType VT, 
2063                        X86MemOperand x86memop, PatFrag memop_frag> {
2064def rr  : AVX512PDI<0x12, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
2065                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2066                    [(set RC:$dst, (VT (X86Movddup RC:$src)))]>, EVEX;
2067def rm  : AVX512PDI<0x12, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
2068                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2069                    [(set RC:$dst,
2070                      (VT (X86Movddup (memop_frag addr:$src))))]>, EVEX;
2071}
2072
2073defm VMOVDDUPZ : avx512_movddup<"vmovddup", VR512, v8f64, f512mem, memopv8f64>,
2074                 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
2075def : Pat<(X86Movddup (v8f64 (scalar_to_vector (loadf64 addr:$src)))),
2076          (VMOVDDUPZrm addr:$src)>;
2077
2078//===---------------------------------------------------------------------===//
2079// Replicate Single FP - MOVSHDUP and MOVSLDUP
2080//===---------------------------------------------------------------------===//
2081multiclass avx512_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr,
2082                              ValueType vt, RegisterClass RC, PatFrag mem_frag,
2083                              X86MemOperand x86memop> {
2084  def rr : AVX512XSI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
2085                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2086                      [(set RC:$dst, (vt (OpNode RC:$src)))]>, EVEX;
2087  let mayLoad = 1 in
2088  def rm : AVX512XSI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
2089                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2090                      [(set RC:$dst, (OpNode (mem_frag addr:$src)))]>, EVEX;
2091}
2092
2093defm VMOVSHDUPZ  : avx512_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
2094                       v16f32, VR512, memopv16f32, f512mem>, EVEX_V512,
2095                       EVEX_CD8<32, CD8VF>;
2096defm VMOVSLDUPZ  : avx512_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
2097                       v16f32, VR512, memopv16f32, f512mem>, EVEX_V512,
2098                       EVEX_CD8<32, CD8VF>;
2099
2100def : Pat<(v16i32 (X86Movshdup VR512:$src)), (VMOVSHDUPZrr VR512:$src)>;
2101def : Pat<(v16i32 (X86Movshdup (memopv16i32 addr:$src))),
2102           (VMOVSHDUPZrm addr:$src)>;
2103def : Pat<(v16i32 (X86Movsldup VR512:$src)), (VMOVSLDUPZrr VR512:$src)>;
2104def : Pat<(v16i32 (X86Movsldup (memopv16i32 addr:$src))),
2105           (VMOVSLDUPZrm addr:$src)>;
2106
2107//===----------------------------------------------------------------------===//
2108// Move Low to High and High to Low packed FP Instructions
2109//===----------------------------------------------------------------------===//
2110def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
2111          (ins VR128X:$src1, VR128X:$src2),
2112          "vmovlhps{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2113          [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))],
2114           IIC_SSE_MOV_LH>, EVEX_4V;
2115def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
2116          (ins VR128X:$src1, VR128X:$src2),
2117          "vmovhlps{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2118          [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))],
2119          IIC_SSE_MOV_LH>, EVEX_4V;
2120
2121let Predicates = [HasAVX512] in {
2122  // MOVLHPS patterns
2123  def : Pat<(v4i32 (X86Movlhps VR128X:$src1, VR128X:$src2)),
2124            (VMOVLHPSZrr VR128X:$src1, VR128X:$src2)>;
2125  def : Pat<(v2i64 (X86Movlhps VR128X:$src1, VR128X:$src2)),
2126            (VMOVLHPSZrr (v2i64 VR128X:$src1), VR128X:$src2)>;
2127
2128  // MOVHLPS patterns
2129  def : Pat<(v4i32 (X86Movhlps VR128X:$src1, VR128X:$src2)),
2130            (VMOVHLPSZrr VR128X:$src1, VR128X:$src2)>;
2131}
2132
2133//===----------------------------------------------------------------------===//
2134// FMA - Fused Multiply Operations
2135//
2136let Constraints = "$src1 = $dst" in {
2137multiclass avx512_fma3p_rm<bits<8> opc, string OpcodeStr,
2138            RegisterClass RC, X86MemOperand x86memop,
2139            PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
2140            string BrdcstStr, SDNode OpNode, ValueType OpVT> {
2141  def r: AVX512FMA3<opc, MRMSrcReg, (outs RC:$dst),
2142          (ins RC:$src1, RC:$src2, RC:$src3),
2143          !strconcat(OpcodeStr,"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
2144          [(set RC:$dst, (OpVT(OpNode RC:$src1, RC:$src2, RC:$src3)))]>;
2145
2146  let mayLoad = 1 in
2147  def m: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
2148          (ins RC:$src1, RC:$src2, x86memop:$src3),
2149          !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
2150          [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2,
2151                                               (mem_frag addr:$src3))))]>;
2152   def mb: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
2153           (ins RC:$src1, RC:$src2, x86scalar_mop:$src3),
2154           !strconcat(OpcodeStr, "\t{${src3}", BrdcstStr, 
2155            ", $src2, $dst|$dst, $src2, ${src3}", BrdcstStr, "}"),
2156           [(set RC:$dst, (OpNode RC:$src1, RC:$src2,
2157           (OpVT (X86VBroadcast (scalar_mfrag addr:$src3)))))]>, EVEX_B;
2158}
2159} // Constraints = "$src1 = $dst"
2160
2161let ExeDomain = SSEPackedSingle in {
2162  defm VFMADD213PSZ    : avx512_fma3p_rm<0xA8, "vfmadd213ps", VR512, f512mem,
2163                                    memopv16f32, f32mem, loadf32, "{1to16}",
2164                                    X86Fmadd, v16f32>, EVEX_V512,
2165                                    EVEX_CD8<32, CD8VF>;
2166  defm VFMSUB213PSZ    : avx512_fma3p_rm<0xAA, "vfmsub213ps", VR512, f512mem,
2167                                    memopv16f32, f32mem, loadf32, "{1to16}",
2168                                    X86Fmsub, v16f32>, EVEX_V512,
2169                                    EVEX_CD8<32, CD8VF>;
2170  defm VFMADDSUB213PSZ : avx512_fma3p_rm<0xA6, "vfmaddsub213ps", VR512, f512mem,
2171                                    memopv16f32, f32mem, loadf32, "{1to16}",
2172                                    X86Fmaddsub, v16f32>,
2173                                    EVEX_V512, EVEX_CD8<32, CD8VF>;
2174  defm VFMSUBADD213PSZ : avx512_fma3p_rm<0xA7, "vfmsubadd213ps", VR512, f512mem,
2175                                    memopv16f32, f32mem, loadf32, "{1to16}",
2176                                    X86Fmsubadd, v16f32>,
2177                                    EVEX_V512, EVEX_CD8<32, CD8VF>;
2178  defm VFNMADD213PSZ   : avx512_fma3p_rm<0xAC, "vfnmadd213ps", VR512, f512mem,
2179                                    memopv16f32, f32mem, loadf32, "{1to16}",
2180                                    X86Fnmadd, v16f32>, EVEX_V512,
2181                                    EVEX_CD8<32, CD8VF>;
2182  defm VFNMSUB213PSZ   : avx512_fma3p_rm<0xAE, "vfnmsub213ps", VR512, f512mem,
2183                                    memopv16f32, f32mem, loadf32, "{1to16}",
2184                                    X86Fnmsub, v16f32>, EVEX_V512,
2185                                    EVEX_CD8<32, CD8VF>;
2186}
2187let ExeDomain = SSEPackedDouble in {
2188  defm VFMADD213PDZ    : avx512_fma3p_rm<0xA8, "vfmadd213pd", VR512, f512mem,
2189                                    memopv8f64, f64mem, loadf64, "{1to8}",
2190                                    X86Fmadd, v8f64>, EVEX_V512,
2191                                    VEX_W, EVEX_CD8<64, CD8VF>;
2192  defm VFMSUB213PDZ    : avx512_fma3p_rm<0xAA, "vfmsub213pd", VR512, f512mem,
2193                                    memopv8f64, f64mem, loadf64, "{1to8}",
2194                                    X86Fmsub, v8f64>, EVEX_V512, VEX_W,
2195                                    EVEX_CD8<64, CD8VF>;
2196  defm VFMADDSUB213PDZ : avx512_fma3p_rm<0xA6, "vfmaddsub213pd", VR512, f512mem,
2197                                    memopv8f64, f64mem, loadf64, "{1to8}",
2198                                    X86Fmaddsub, v8f64>, EVEX_V512, VEX_W,
2199                                    EVEX_CD8<64, CD8VF>;
2200  defm VFMSUBADD213PDZ : avx512_fma3p_rm<0xA7, "vfmsubadd213pd", VR512, f512mem,
2201                                    memopv8f64, f64mem, loadf64, "{1to8}",
2202                                    X86Fmsubadd, v8f64>, EVEX_V512, VEX_W,
2203                                    EVEX_CD8<64, CD8VF>;
2204  defm VFNMADD213PDZ : avx512_fma3p_rm<0xAC, "vfnmadd213pd", VR512, f512mem,
2205                                  memopv8f64, f64mem, loadf64, "{1to8}",
2206                                  X86Fnmadd, v8f64>, EVEX_V512, VEX_W,
2207                                  EVEX_CD8<64, CD8VF>;
2208  defm VFNMSUB213PDZ : avx512_fma3p_rm<0xAE, "vfnmsub213pd", VR512, f512mem,
2209                                  memopv8f64, f64mem, loadf64, "{1to8}",
2210                                  X86Fnmsub, v8f64>, EVEX_V512, VEX_W,
2211                                  EVEX_CD8<64, CD8VF>;
2212}
2213
2214let Constraints = "$src1 = $dst" in {
2215multiclass avx512_fma3p_m132<bits<8> opc, string OpcodeStr,
2216            RegisterClass RC, X86MemOperand x86memop,
2217            PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
2218            string BrdcstStr, SDNode OpNode, ValueType OpVT> {
2219  let mayLoad = 1 in
2220  def m: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
2221          (ins RC:$src1, RC:$src3, x86memop:$src2),
2222          !strconcat(OpcodeStr, "\t{$src2, $src3, $dst|$dst, $src3, $src2}"),
2223          [(set RC:$dst, (OpVT (OpNode RC:$src1, (mem_frag addr:$src2), RC:$src3)))]>;
2224   def mb: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
2225           (ins RC:$src1, RC:$src3, x86scalar_mop:$src2),
2226           !strconcat(OpcodeStr, "\t{${src2}", BrdcstStr, 
2227            ", $src3, $dst|$dst, $src3, ${src2}", BrdcstStr, "}"),
2228           [(set RC:$dst, (OpNode RC:$src1, 
2229           (OpVT (X86VBroadcast (scalar_mfrag addr:$src2))), RC:$src3))]>, EVEX_B;
2230}
2231} // Constraints = "$src1 = $dst"
2232
2233
2234let ExeDomain = SSEPackedSingle in {
2235  defm VFMADD132PSZ    : avx512_fma3p_m132<0x98, "vfmadd132ps", VR512, f512mem,
2236                                    memopv16f32, f32mem, loadf32, "{1to16}",
2237                                    X86Fmadd, v16f32>, EVEX_V512,
2238                                    EVEX_CD8<32, CD8VF>;
2239  defm VFMSUB132PSZ    : avx512_fma3p_m132<0x9A, "vfmsub132ps", VR512, f512mem,
2240                                    memopv16f32, f32mem, loadf32, "{1to16}",
2241                                    X86Fmsub, v16f32>, EVEX_V512,
2242                                    EVEX_CD8<32, CD8VF>;
2243  defm VFMADDSUB132PSZ : avx512_fma3p_m132<0x96, "vfmaddsub132ps", VR512, f512mem,
2244                                    memopv16f32, f32mem, loadf32, "{1to16}",
2245                                    X86Fmaddsub, v16f32>,
2246                                    EVEX_V512, EVEX_CD8<32, CD8VF>;
2247  defm VFMSUBADD132PSZ : avx512_fma3p_m132<0x97, "vfmsubadd132ps", VR512, f512mem,
2248                                    memopv16f32, f32mem, loadf32, "{1to16}",
2249                                    X86Fmsubadd, v16f32>,
2250                                    EVEX_V512, EVEX_CD8<32, CD8VF>;
2251  defm VFNMADD132PSZ   : avx512_fma3p_m132<0x9C, "vfnmadd132ps", VR512, f512mem,
2252                                    memopv16f32, f32mem, loadf32, "{1to16}",
2253                                    X86Fnmadd, v16f32>, EVEX_V512,
2254                                    EVEX_CD8<32, CD8VF>;
2255  defm VFNMSUB132PSZ   : avx512_fma3p_m132<0x9E, "vfnmsub132ps", VR512, f512mem,
2256                                    memopv16f32, f32mem, loadf32, "{1to16}",
2257                                    X86Fnmsub, v16f32>, EVEX_V512,
2258                                    EVEX_CD8<32, CD8VF>;
2259}
2260let ExeDomain = SSEPackedDouble in {
2261  defm VFMADD132PDZ    : avx512_fma3p_m132<0x98, "vfmadd132pd", VR512, f512mem,
2262                                    memopv8f64, f64mem, loadf64, "{1to8}",
2263                                    X86Fmadd, v8f64>, EVEX_V512,
2264                                    VEX_W, EVEX_CD8<64, CD8VF>;
2265  defm VFMSUB132PDZ    : avx512_fma3p_m132<0x9A, "vfmsub132pd", VR512, f512mem,
2266                                    memopv8f64, f64mem, loadf64, "{1to8}",
2267                                    X86Fmsub, v8f64>, EVEX_V512, VEX_W,
2268                                    EVEX_CD8<64, CD8VF>;
2269  defm VFMADDSUB132PDZ : avx512_fma3p_m132<0x96, "vfmaddsub132pd", VR512, f512mem,
2270                                    memopv8f64, f64mem, loadf64, "{1to8}",
2271                                    X86Fmaddsub, v8f64>, EVEX_V512, VEX_W,
2272                                    EVEX_CD8<64, CD8VF>;
2273  defm VFMSUBADD132PDZ : avx512_fma3p_m132<0x97, "vfmsubadd132pd", VR512, f512mem,
2274                                    memopv8f64, f64mem, loadf64, "{1to8}",
2275                                    X86Fmsubadd, v8f64>, EVEX_V512, VEX_W,
2276                                    EVEX_CD8<64, CD8VF>;
2277  defm VFNMADD132PDZ : avx512_fma3p_m132<0x9C, "vfnmadd132pd", VR512, f512mem,
2278                                  memopv8f64, f64mem, loadf64, "{1to8}",
2279                                  X86Fnmadd, v8f64>, EVEX_V512, VEX_W,
2280                                  EVEX_CD8<64, CD8VF>;
2281  defm VFNMSUB132PDZ : avx512_fma3p_m132<0x9E, "vfnmsub132pd", VR512, f512mem,
2282                                  memopv8f64, f64mem, loadf64, "{1to8}",
2283                                  X86Fnmsub, v8f64>, EVEX_V512, VEX_W,
2284                                  EVEX_CD8<64, CD8VF>;
2285}
2286
2287// Scalar FMA
2288let Constraints = "$src1 = $dst" in {
2289multiclass avx512_fma3s_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 
2290                 RegisterClass RC, ValueType OpVT, 
2291                 X86MemOperand x86memop, Operand memop, 
2292                 PatFrag mem_frag> {
2293  let isCommutable = 1 in
2294  def r     : AVX512FMA3<opc, MRMSrcReg, (outs RC:$dst),
2295                   (ins RC:$src1, RC:$src2, RC:$src3),
2296                   !strconcat(OpcodeStr,
2297                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
2298                   [(set RC:$dst,
2299                     (OpVT (OpNode RC:$src2, RC:$src1, RC:$src3)))]>;
2300  let mayLoad = 1 in
2301  def m     : AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
2302                   (ins RC:$src1, RC:$src2, f128mem:$src3),
2303                   !strconcat(OpcodeStr,
2304                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
2305                   [(set RC:$dst,
2306                     (OpVT (OpNode RC:$src2, RC:$src1,
2307                            (mem_frag addr:$src3))))]>;
2308}
2309
2310} // Constraints = "$src1 = $dst"
2311
2312defm VFMADDSSZ  : avx512_fma3s_rm<0xA9, "vfmadd213ss{z}", X86Fmadd, FR32X, 
2313                      f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
2314defm VFMADDSDZ  : avx512_fma3s_rm<0xA9, "vfmadd213sd{z}", X86Fmadd, FR64X,
2315                      f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
2316defm VFMSUBSSZ  : avx512_fma3s_rm<0xAB, "vfmsub213ss{z}", X86Fmsub, FR32X, 
2317                      f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
2318defm VFMSUBSDZ  : avx512_fma3s_rm<0xAB, "vfmsub213sd{z}", X86Fmsub, FR64X,
2319                      f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
2320defm VFNMADDSSZ  : avx512_fma3s_rm<0xAD, "vfnmadd213ss{z}", X86Fnmadd, FR32X, 
2321                      f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
2322defm VFNMADDSDZ  : avx512_fma3s_rm<0xAD, "vfnmadd213sd{z}", X86Fnmadd, FR64X,
2323                      f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
2324defm VFNMSUBSSZ  : avx512_fma3s_rm<0xAF, "vfnmsub213ss{z}", X86Fnmsub, FR32X, 
2325                      f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
2326defm VFNMSUBSDZ  : avx512_fma3s_rm<0xAF, "vfnmsub213sd{z}", X86Fnmsub, FR64X,
2327                      f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
2328
2329//===----------------------------------------------------------------------===//
2330// AVX-512  Scalar convert from sign integer to float/double
2331//===----------------------------------------------------------------------===//
2332
2333multiclass avx512_vcvtsi<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
2334                          X86MemOperand x86memop, string asm> {
2335let neverHasSideEffects = 1 in {
2336  def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
2337              !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
2338              EVEX_4V;
2339  let mayLoad = 1 in
2340  def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
2341              (ins DstRC:$src1, x86memop:$src),
2342              !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
2343              EVEX_4V;
2344} // neverHasSideEffects = 1
2345}
2346let Predicates = [HasAVX512] in {
2347defm VCVTSI2SSZ   : avx512_vcvtsi<0x2A, GR32, FR32X, i32mem, "cvtsi2ss{l}{z}">,
2348                                  XS, VEX_LIG, EVEX_CD8<32, CD8VT1>;
2349defm VCVTSI642SSZ : avx512_vcvtsi<0x2A, GR64, FR32X, i64mem, "cvtsi2ss{q}{z}">,
2350                                  XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
2351defm VCVTSI2SDZ   : avx512_vcvtsi<0x2A, GR32, FR64X, i32mem, "cvtsi2sd{l}{z}">,
2352                                  XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
2353defm VCVTSI642SDZ : avx512_vcvtsi<0x2A, GR64, FR64X, i64mem, "cvtsi2sd{q}{z}">,
2354                                  XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
2355
2356def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
2357          (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
2358def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
2359          (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
2360def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
2361          (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
2362def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
2363          (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
2364
2365def : Pat<(f32 (sint_to_fp GR32:$src)),
2366          (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
2367def : Pat<(f32 (sint_to_fp GR64:$src)),
2368          (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
2369def : Pat<(f64 (sint_to_fp GR32:$src)),
2370          (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
2371def : Pat<(f64 (sint_to_fp GR64:$src)),
2372          (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
2373
2374defm VCVTUSI2SSZ   : avx512_vcvtsi<0x7B, GR32, FR32X, i32mem, "cvtusi2ss{l}{z}">,
2375                                  XS, VEX_LIG, EVEX_CD8<32, CD8VT1>;
2376defm VCVTUSI642SSZ : avx512_vcvtsi<0x7B, GR64, FR32X, i64mem, "cvtusi2ss{q}{z}">,
2377                                  XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
2378defm VCVTUSI2SDZ   : avx512_vcvtsi<0x7B, GR32, FR64X, i32mem, "cvtusi2sd{l}{z}">,
2379                                  XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
2380defm VCVTUSI642SDZ : avx512_vcvtsi<0x7B, GR64, FR64X, i64mem, "cvtusi2sd{q}{z}">,
2381                                  XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
2382
2383def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
2384          (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
2385def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))),
2386          (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
2387def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))),
2388          (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
2389def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))),
2390          (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
2391
2392def : Pat<(f32 (uint_to_fp GR32:$src)),
2393          (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
2394def : Pat<(f32 (uint_to_fp GR64:$src)),
2395          (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
2396def : Pat<(f64 (uint_to_fp GR32:$src)),
2397          (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
2398def : Pat<(f64 (uint_to_fp GR64:$src)),
2399          (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
2400}
2401
2402//===----------------------------------------------------------------------===//
2403// AVX-512  Scalar convert from float/double to integer
2404//===----------------------------------------------------------------------===//
2405multiclass avx512_cvt_s_int<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
2406                          Intrinsic Int, Operand memop, ComplexPattern mem_cpat,
2407                          string asm> {
2408let neverHasSideEffects = 1 in {
2409  def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
2410              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
2411              [(set DstRC:$dst, (Int SrcRC:$src))]>, EVEX, VEX_LIG;
2412  let mayLoad = 1 in
2413  def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src),
2414              !strconcat(asm,"\t{$src, $dst|$dst, $src}"), []>, EVEX, VEX_LIG;
2415} // neverHasSideEffects = 1
2416}
2417let Predicates = [HasAVX512] in {
2418// Convert float/double to signed/unsigned int 32/64
2419defm VCVTSS2SIZ:    avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse_cvtss2si,
2420                                   ssmem, sse_load_f32, "cvtss2si{z}">,
2421                                   XS, EVEX_CD8<32, CD8VT1>;
2422defm VCVTSS2SI64Z:  avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse_cvtss2si64,
2423                                   ssmem, sse_load_f32, "cvtss2si{z}">,
2424                                   XS, VEX_W, EVEX_CD8<32, CD8VT1>;
2425defm VCVTSS2USIZ:   avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtss2usi,
2426                                   ssmem, sse_load_f32, "cvtss2usi{z}">,
2427                                   XS, EVEX_CD8<32, CD8VT1>;
2428defm VCVTSS2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64,
2429                                   int_x86_avx512_cvtss2usi64, ssmem,
2430                                   sse_load_f32, "cvtss2usi{z}">, XS, VEX_W,
2431                                   EVEX_CD8<32, CD8VT1>;
2432defm VCVTSD2SIZ:    avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse2_cvtsd2si,
2433                                   sdmem, sse_load_f64, "cvtsd2si{z}">,
2434                                   XD, EVEX_CD8<64, CD8VT1>;
2435defm VCVTSD2SI64Z:  avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse2_cvtsd2si64,
2436                                   sdmem, sse_load_f64, "cvtsd2si{z}">,
2437                                   XD, VEX_W, EVEX_CD8<64, CD8VT1>;
2438defm VCVTSD2USIZ:   avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtsd2usi,
2439                                   sdmem, sse_load_f64, "cvtsd2usi{z}">,
2440                                   XD, EVEX_CD8<64, CD8VT1>;
2441defm VCVTSD2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64,
2442                                   int_x86_avx512_cvtsd2usi64, sdmem,
2443                                   sse_load_f64, "cvtsd2usi{z}">, XD, VEX_W,
2444                                   EVEX_CD8<64, CD8VT1>;
2445
2446defm Int_VCVTSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
2447          int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}{z}",
2448          SSE_CVT_Scalar, 0>, XS, EVEX_4V;
2449defm Int_VCVTSI2SS64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
2450          int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss{q}{z}",
2451          SSE_CVT_Scalar, 0>, XS, EVEX_4V, VEX_W;
2452defm Int_VCVTSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
2453          int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd{l}{z}",
2454          SSE_CVT_Scalar, 0>, XD, EVEX_4V;
2455defm Int_VCVTSI2SD64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
2456          int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}{z}",
2457          SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W;
2458
2459defm Int_VCVTUSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
2460          int_x86_avx512_cvtusi2ss, i32mem, loadi32, "cvtusi2ss{l}{z}",
2461          SSE_CVT_Scalar, 0>, XS, EVEX_4V;
2462defm Int_VCVTUSI2SS64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
2463          int_x86_avx512_cvtusi642ss, i64mem, loadi64, "cvtusi2ss{q}{z}",
2464          SSE_CVT_Scalar, 0>, XS, EVEX_4V, VEX_W;
2465defm Int_VCVTUSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
2466          int_x86_avx512_cvtusi2sd, i32mem, loadi32, "cvtusi2sd{l}{z}",
2467          SSE_CVT_Scalar, 0>, XD, EVEX_4V;
2468defm Int_VCVTUSI2SD64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
2469          int_x86_avx512_cvtusi642sd, i64mem, loadi64, "cvtusi2sd{q}{z}",
2470          SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W;
2471
2472// Convert float/double to signed/unsigned int 32/64 with truncation
2473defm Int_VCVTTSS2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse_cvttss2si,
2474                                   ssmem, sse_load_f32, "cvttss2si{z}">,
2475                                   XS, EVEX_CD8<32, CD8VT1>;
2476defm Int_VCVTTSS2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64,
2477                                   int_x86_sse_cvttss2si64, ssmem, sse_load_f32,
2478                                   "cvttss2si{z}">, XS, VEX_W,
2479                                   EVEX_CD8<32, CD8VT1>;
2480defm Int_VCVTTSD2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse2_cvttsd2si,
2481                                   sdmem, sse_load_f64, "cvttsd2si{z}">, XD,
2482                                   EVEX_CD8<64, CD8VT1>;
2483defm Int_VCVTTSD2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64,
2484                                   int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
2485                                   "cvttsd2si{z}">, XD, VEX_W,
2486                                   EVEX_CD8<64, CD8VT1>;
2487defm Int_VCVTTSS2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32,
2488                                   int_x86_avx512_cvttss2usi, ssmem, sse_load_f32,
2489                                   "cvttss2si{z}">, XS, EVEX_CD8<32, CD8VT1>;
2490defm Int_VCVTTSS2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64,
2491                                   int_x86_avx512_cvttss2usi64, ssmem,
2492                                   sse_load_f32, "cvttss2usi{z}">, XS, VEX_W,
2493                                   EVEX_CD8<32, CD8VT1>;
2494defm Int_VCVTTSD2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32,
2495                                   int_x86_avx512_cvttsd2usi,
2496                                   sdmem, sse_load_f64, "cvttsd2usi{z}">, XD,
2497                                   EVEX_CD8<64, CD8VT1>;
2498defm Int_VCVTTSD2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64,
2499                                   int_x86_avx512_cvttsd2usi64, sdmem,
2500                                   sse_load_f64, "cvttsd2usi{z}">, XD, VEX_W,
2501                                   EVEX_CD8<64, CD8VT1>;
2502}
2503
2504multiclass avx512_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
2505                         SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
2506                         string asm> {
2507  def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
2508              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
2509              [(set DstRC:$dst, (OpNode SrcRC:$src))]>, EVEX;
2510  def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
2511              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
2512              [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>, EVEX;
2513}
2514
2515defm VCVTTSS2SIZ    : avx512_cvt_s<0x2C, FR32X, GR32, fp_to_sint, f32mem,
2516                                  loadf32, "cvttss2si{z}">, XS,
2517                                  EVEX_CD8<32, CD8VT1>;
2518defm VCVTTSS2USIZ   : avx512_cvt_s<0x78, FR32X, GR32, fp_to_uint, f32mem,
2519                                  loadf32, "cvttss2usi{z}">, XS,
2520                                  EVEX_CD8<32, CD8VT1>;
2521defm VCVTTSS2SI64Z  : avx512_cvt_s<0x2C, FR32X, GR64, fp_to_sint, f32mem,
2522                                  loadf32, "cvttss2si{z}">, XS, VEX_W,
2523                                  EVEX_CD8<32, CD8VT1>;
2524defm VCVTTSS2USI64Z : avx512_cvt_s<0x78, FR32X, GR64, fp_to_uint, f32mem,
2525                                  loadf32, "cvttss2usi{z}">, XS, VEX_W,
2526                                  EVEX_CD8<32, CD8VT1>;
2527defm VCVTTSD2SIZ    : avx512_cvt_s<0x2C, FR64X, GR32, fp_to_sint, f64mem,
2528                                  loadf64, "cvttsd2si{z}">, XD,
2529                                  EVEX_CD8<64, CD8VT1>;
2530defm VCVTTSD2USIZ   : avx512_cvt_s<0x78, FR64X, GR32, fp_to_uint, f64mem,
2531                                  loadf64, "cvttsd2usi{z}">, XD,
2532                                  EVEX_CD8<64, CD8VT1>;
2533defm VCVTTSD2SI64Z  : avx512_cvt_s<0x2C, FR64X, GR64, fp_to_sint, f64mem,
2534                                  loadf64, "cvttsd2si{z}">, XD, VEX_W,
2535                                  EVEX_CD8<64, CD8VT1>;
2536defm VCVTTSD2USI64Z : avx512_cvt_s<0x78, FR64X, GR64, fp_to_uint, f64mem,
2537                                  loadf64, "cvttsd2usi{z}">, XD, VEX_W,
2538                                  EVEX_CD8<64, CD8VT1>;
2539//===----------------------------------------------------------------------===//
2540// AVX-512  Convert form float to double and back
2541//===----------------------------------------------------------------------===//
2542let neverHasSideEffects = 1 in {
2543def VCVTSS2SDZrr : AVX512XSI<0x5A, MRMSrcReg, (outs FR64X:$dst),
2544                    (ins FR32X:$src1, FR32X:$src2),
2545                    "vcvtss2sd{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2546                    []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>;
2547let mayLoad = 1 in
2548def VCVTSS2SDZrm : AVX512XSI<0x5A, MRMSrcMem, (outs FR64X:$dst),
2549                    (ins FR32X:$src1, f32mem:$src2),
2550                    "vcvtss2sd{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2551                    []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>,
2552                    EVEX_CD8<32, CD8VT1>;
2553
2554// Convert scalar double to scalar single
2555def VCVTSD2SSZrr  : AVX512XDI<0x5A, MRMSrcReg, (outs FR32X:$dst),
2556                      (ins FR64X:$src1, FR64X:$src2),
2557                      "vcvtsd2ss{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2558                      []>, EVEX_4V, VEX_LIG, VEX_W, Sched<[WriteCvtF2F]>;
2559let mayLoad = 1 in
2560def VCVTSD2SSZrm  : AVX512XDI<0x5A, MRMSrcMem, (outs FR32X:$dst),
2561                      (ins FR64X:$src1, f64mem:$src2),
2562                      "vcvtsd2ss{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2563                      []>, EVEX_4V, VEX_LIG, VEX_W,
2564                      Sched<[WriteCvtF2FLd, ReadAfterLd]>, EVEX_CD8<64, CD8VT1>;
2565}
2566
2567def : Pat<(f64 (fextend FR32X:$src)), (VCVTSS2SDZrr FR32X:$src, FR32X:$src)>,
2568      Requires<[HasAVX512]>;
2569def : Pat<(fextend (loadf32 addr:$src)),
2570    (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX512]>;
2571
2572def : Pat<(extloadf32 addr:$src),
2573    (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>,
2574      Requires<[HasAVX512, OptForSize]>;
2575
2576def : Pat<(extloadf32 addr:$src),
2577    (VCVTSS2SDZrr (f32 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>,
2578    Requires<[HasAVX512, OptForSpeed]>;
2579
2580def : Pat<(f32 (fround FR64X:$src)), (VCVTSD2SSZrr FR64X:$src, FR64X:$src)>,
2581           Requires<[HasAVX512]>;
2582
2583multiclass avx512_vcvt_fp<bits<8> opc, string asm, RegisterClass SrcRC, 
2584               RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag, 
2585               X86MemOperand x86memop, ValueType OpVT, ValueType InVT,
2586               Domain d> {
2587let neverHasSideEffects = 1 in {
2588  def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
2589              !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 
2590              [(set DstRC:$dst,
2591                (OpVT (OpNode (InVT SrcRC:$src))))], d>, EVEX;
2592  let mayLoad = 1 in
2593  def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
2594              !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 
2595              [(set DstRC:$dst,
2596                (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))], d>, EVEX;
2597} // neverHasSideEffects = 1
2598}
2599
2600defm VCVTPD2PSZ : avx512_vcvt_fp<0x5A, "vcvtpd2ps", VR512, VR256X, fround,
2601                                memopv8f64, f512mem, v8f32, v8f64,
2602                                SSEPackedSingle>, EVEX_V512, VEX_W, OpSize,
2603                                EVEX_CD8<64, CD8VF>;
2604
2605defm VCVTPS2PDZ : avx512_vcvt_fp<0x5A, "vcvtps2pd", VR256X, VR512, fextend,
2606                                memopv4f64, f256mem, v8f64, v8f32,
2607                                SSEPackedDouble>, EVEX_V512, EVEX_CD8<32, CD8VH>;
2608def : Pat<(v8f64 (extloadv8f32 addr:$src)),
2609            (VCVTPS2PDZrm addr:$src)>;
2610
2611//===----------------------------------------------------------------------===//
2612// AVX-512  Vector convert from sign integer to float/double
2613//===----------------------------------------------------------------------===//
2614
2615defm VCVTDQ2PSZ : avx512_vcvt_fp<0x5B, "vcvtdq2ps", VR512, VR512, sint_to_fp,
2616                                memopv8i64, i512mem, v16f32, v16i32,
2617                                SSEPackedSingle>, EVEX_V512, EVEX_CD8<32, CD8VF>;
2618
2619defm VCVTDQ2PDZ : avx512_vcvt_fp<0xE6, "vcvtdq2pd", VR256X, VR512, sint_to_fp,
2620                                memopv4i64, i256mem, v8f64, v8i32,
2621                                SSEPackedDouble>, EVEX_V512, XS,
2622                                EVEX_CD8<32, CD8VH>;
2623
2624defm VCVTTPS2DQZ : avx512_vcvt_fp<0x5B, "vcvttps2dq", VR512, VR512, fp_to_sint,
2625                                 memopv16f32, f512mem, v16i32, v16f32,
2626                                 SSEPackedSingle>, EVEX_V512, XS,
2627                                 EVEX_CD8<32, CD8VF>;
2628
2629defm VCVTTPD2DQZ : avx512_vcvt_fp<0xE6, "vcvttpd2dq", VR512, VR256X, fp_to_sint,
2630                                 memopv8f64, f512mem, v8i32, v8f64, 
2631                                 SSEPackedDouble>, EVEX_V512, OpSize, VEX_W,
2632                                 EVEX_CD8<64, CD8VF>;
2633
2634defm VCVTTPS2UDQZ : avx512_vcvt_fp<0x78, "vcvttps2udq", VR512, VR512, fp_to_uint,
2635                                 memopv16f32, f512mem, v16i32, v16f32,
2636                                 SSEPackedSingle>, EVEX_V512, 
2637                                 EVEX_CD8<32, CD8VF>;
2638
2639defm VCVTTPD2UDQZ : avx512_vcvt_fp<0x78, "vcvttpd2udq", VR512, VR256X, fp_to_uint,
2640                                 memopv8f64, f512mem, v8i32, v8f64,
2641                                 SSEPackedDouble>, EVEX_V512, VEX_W,
2642                                 EVEX_CD8<64, CD8VF>;
2643                                 
2644defm VCVTUDQ2PDZ : avx512_vcvt_fp<0x7A, "vcvtudq2pd", VR256X, VR512, uint_to_fp,
2645                                 memopv4i64, f256mem, v8f64, v8i32,
2646                                 SSEPackedDouble>, EVEX_V512, XS,
2647                                 EVEX_CD8<32, CD8VH>;
2648                                 
2649defm VCVTUDQ2PSZ : avx512_vcvt_fp<0x7A, "vcvtudq2ps", VR512, VR512, uint_to_fp,
2650                                 memopv16i32, f512mem, v16f32, v16i32,
2651                                 SSEPackedSingle>, EVEX_V512, XD,
2652                                 EVEX_CD8<32, CD8VF>;
2653
2654def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
2655          (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr 
2656           (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
2657                                 
2658
2659def : Pat<(int_x86_avx512_cvtdq2_ps_512 VR512:$src),
2660          (VCVTDQ2PSZrr VR512:$src)>;
2661def : Pat<(int_x86_avx512_cvtdq2_ps_512 (bitconvert (memopv8i64 addr:$src))),
2662          (VCVTDQ2PSZrm addr:$src)>;
2663
2664def VCVTPS2DQZrr : AVX512BI<0x5B, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
2665                        "vcvtps2dq\t{$src, $dst|$dst, $src}",
2666                        [(set VR512:$dst,
2667                          (int_x86_avx512_cvt_ps2dq_512 VR512:$src))],
2668                        IIC_SSE_CVT_PS_RR>, EVEX, EVEX_V512;
2669def VCVTPS2DQZrm : AVX512BI<0x5B, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
2670                        "vcvtps2dq\t{$src, $dst|$dst, $src}",
2671                        [(set VR512:$dst,
2672                          (int_x86_avx512_cvt_ps2dq_512 (memopv16f32 addr:$src)))],
2673                        IIC_SSE_CVT_PS_RM>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
2674
2675
2676let Predicates = [HasAVX512] in {
2677  def : Pat<(v8f32 (fround (loadv8f64 addr:$src))),
2678            (VCVTPD2PSZrm addr:$src)>;
2679  def : Pat<(v8f64 (extloadv8f32 addr:$src)),
2680            (VCVTPS2PDZrm addr:$src)>;
2681}
2682
2683//===----------------------------------------------------------------------===//
2684// Half precision conversion instructions
2685//===----------------------------------------------------------------------===//
2686multiclass avx512_f16c_ph2ps<RegisterClass destRC, RegisterClass srcRC,
2687                             X86MemOperand x86memop, Intrinsic Int> {
2688  def rr : AVX5128I<0x13, MRMSrcReg, (outs destRC:$dst), (ins srcRC:$src),
2689             "vcvtph2ps\t{$src, $dst|$dst, $src}",
2690             [(set destRC:$dst, (Int srcRC:$src))]>, EVEX;
2691  let neverHasSideEffects = 1, mayLoad = 1 in
2692  def rm : AVX5128I<0x13, MRMSrcMem, (outs destRC:$dst), (ins x86memop:$src),
2693             "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, EVEX;
2694}
2695
2696multiclass avx512_f16c_ps2ph<RegisterClass destRC, RegisterClass srcRC,
2697                             X86MemOperand x86memop, Intrinsic Int> {
2698  def rr : AVX512AIi8<0x1D, MRMDestReg, (outs destRC:$dst),
2699               (ins srcRC:$src1, i32i8imm:$src2),
2700               "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2701               [(set destRC:$dst, (Int srcRC:$src1, imm:$src2))]>, EVEX;
2702  let neverHasSideEffects = 1, mayStore = 1 in
2703  def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
2704               (ins x86memop:$dst, srcRC:$src1, i32i8imm:$src2),
2705               "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, EVEX;
2706}
2707
2708defm VCVTPH2PSZ : avx512_f16c_ph2ps<VR512, VR256X, f256mem,
2709                                    int_x86_avx512_vcvtph2ps_512>, EVEX_V512,
2710                                    EVEX_CD8<32, CD8VH>;
2711defm VCVTPS2PHZ : avx512_f16c_ps2ph<VR256X, VR512, f256mem,
2712                                    int_x86_avx512_vcvtps2ph_512>, EVEX_V512,
2713                                    EVEX_CD8<32, CD8VH>;
2714
2715let Defs = [EFLAGS], Predicates = [HasAVX512] in {
2716  defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
2717                                 "ucomiss{z}">, TB, EVEX, VEX_LIG,
2718                                 EVEX_CD8<32, CD8VT1>;
2719  defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64,
2720                                  "ucomisd{z}">, TB, OpSize, EVEX,
2721                                  VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
2722  let Pattern = []<dag> in {
2723    defm VCOMISSZ  : sse12_ord_cmp<0x2F, VR128X, undef, v4f32, f128mem, load,
2724                                   "comiss{z}">, TB, EVEX, VEX_LIG,
2725                                   EVEX_CD8<32, CD8VT1>;
2726    defm VCOMISDZ  : sse12_ord_cmp<0x2F, VR128X, undef, v2f64, f128mem, load,
2727                                   "comisd{z}">, TB, OpSize, EVEX,
2728                                    VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
2729  }
2730  defm Int_VUCOMISSZ  : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v4f32, f128mem,
2731                            load, "ucomiss">, TB, EVEX, VEX_LIG,
2732                            EVEX_CD8<32, CD8VT1>;
2733  defm Int_VUCOMISDZ  : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v2f64, f128mem,
2734                            load, "ucomisd">, TB, OpSize, EVEX,
2735                            VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
2736
2737  defm Int_VCOMISSZ  : sse12_ord_cmp<0x2F, VR128X, X86comi, v4f32, f128mem,
2738                            load, "comiss">, TB, EVEX, VEX_LIG,
2739                            EVEX_CD8<32, CD8VT1>;
2740  defm Int_VCOMISDZ  : sse12_ord_cmp<0x2F, VR128X, X86comi, v2f64, f128mem,
2741                            load, "comisd">, TB, OpSize, EVEX,
2742                            VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
2743}
2744  
2745/// avx512_unop_p - AVX-512 unops in packed form.
2746multiclass avx512_fp_unop_p<bits<8> opc, string OpcodeStr, SDNode OpNode> {
2747  def PSZr : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
2748                        !strconcat(OpcodeStr,
2749                                   "ps\t{$src, $dst|$dst, $src}"),
2750                        [(set VR512:$dst, (v16f32 (OpNode VR512:$src)))]>,
2751                        EVEX, EVEX_V512;
2752  def PSZm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins f256mem:$src),
2753                        !strconcat(OpcodeStr,
2754                                   "ps\t{$src, $dst|$dst, $src}"),
2755                        [(set VR512:$dst, (OpNode (memopv16f32 addr:$src)))]>,
2756                        EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
2757  def PDZr : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
2758                        !strconcat(OpcodeStr,
2759                                   "pd\t{$src, $dst|$dst, $src}"),
2760                        [(set VR512:$dst, (v8f64 (OpNode VR512:$src)))]>,
2761                        EVEX, EVEX_V512, VEX_W;
2762  def PDZm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
2763                        !strconcat(OpcodeStr,
2764                                   "pd\t{$src, $dst|$dst, $src}"),
2765                        [(set VR512:$dst, (OpNode (memopv16f32 addr:$src)))]>,
2766                        EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2767}
2768
2769/// avx512_fp_unop_p_int - AVX-512 intrinsics unops in packed forms.
2770multiclass avx512_fp_unop_p_int<bits<8> opc, string OpcodeStr,
2771                              Intrinsic V16F32Int, Intrinsic V8F64Int> {
2772  def PSZr_Int : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
2773                           !strconcat(OpcodeStr,
2774                                      "ps\t{$src, $dst|$dst, $src}"),
2775                           [(set VR512:$dst, (V16F32Int VR512:$src))]>, 
2776                           EVEX, EVEX_V512;
2777  def PSZm_Int : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
2778                         !strconcat(OpcodeStr,
2779                         "ps\t{$src, $dst|$dst, $src}"),
2780                         [(set VR512:$dst, 
2781                           (V16F32Int (memopv16f32 addr:$src)))]>, EVEX,
2782                         EVEX_V512, EVEX_CD8<32, CD8VF>;
2783  def PDZr_Int : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
2784                           !strconcat(OpcodeStr,
2785                                      "pd\t{$src, $dst|$dst, $src}"),
2786                           [(set VR512:$dst, (V8F64Int VR512:$src))]>, 
2787                           EVEX, EVEX_V512, VEX_W;
2788  def PDZm_Int : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
2789                         !strconcat(OpcodeStr,
2790                         "pd\t{$src, $dst|$dst, $src}"),
2791                         [(set VR512:$dst, 
2792                           (V8F64Int (memopv8f64 addr:$src)))]>,
2793                            EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2794}
2795
2796/// avx512_fp_unop_s - AVX-512 unops in scalar form.
2797multiclass avx512_fp_unop_s<bits<8> opc, string OpcodeStr> {
2798  let hasSideEffects = 0 in {
2799  def SSZr : AVX5128I<opc, MRMSrcReg, (outs FR32X:$dst),
2800               (ins FR32X:$src1, FR32X:$src2),
2801               !strconcat(OpcodeStr,
2802                          "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2803                      []>, EVEX_4V;
2804  let mayLoad = 1 in {
2805  def SSZm : AVX5128I<opc, MRMSrcMem, (outs FR32X:$dst),
2806               (ins FR32X:$src1, f32mem:$src2),
2807               !strconcat(OpcodeStr,
2808                          "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2809                      []>, EVEX_4V, EVEX_CD8<32, CD8VT1>;
2810  def SSZm_Int : AVX5128I<opc, MRMSrcMem, (outs VR128X:$dst),
2811                   (ins VR128X:$src1, ssmem:$src2),
2812                   !strconcat(OpcodeStr,
2813                              "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2814                   []>, EVEX_4V, EVEX_CD8<32, CD8VT1>;
2815  }
2816  def SDZr : AVX5128I<opc, MRMSrcReg, (outs FR64X:$dst),
2817               (ins FR64X:$src1, FR64X:$src2),
2818               !strconcat(OpcodeStr,
2819                          "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, 
2820                      EVEX_4V, VEX_W;
2821  let mayLoad = 1 in {
2822  def SDZm : AVX5128I<opc, MRMSrcMem, (outs FR64X:$dst),
2823               (ins FR64X:$src1, f64mem:$src2),
2824               !strconcat(OpcodeStr,
2825                          "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, 
2826               EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
2827  def SDZm_Int : AVX5128I<opc, MRMSrcMem, (outs VR128X:$dst),
2828                  (ins VR128X:$src1, sdmem:$src2),
2829                   !strconcat(OpcodeStr,
2830                              "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2831                  []>, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
2832  }
2833}
2834}
2835
2836defm VRCP14   : avx512_fp_unop_s<0x4D, "vrcp14">,
2837                avx512_fp_unop_p<0x4C, "vrcp14", X86frcp>,
2838                avx512_fp_unop_p_int<0x4C, "vrcp14", 
2839                    int_x86_avx512_rcp14_ps_512, int_x86_avx512_rcp14_pd_512>;
2840
2841defm VRSQRT14  : avx512_fp_unop_s<0x4F, "vrsqrt14">,
2842                avx512_fp_unop_p<0x4E, "vrsqrt14", X86frsqrt>,
2843                avx512_fp_unop_p_int<0x4E, "vrsqrt14", 
2844                    int_x86_avx512_rsqrt14_ps_512, int_x86_avx512_rsqrt14_pd_512>;
2845
2846def : Pat<(int_x86_avx512_rsqrt14_ss VR128X:$src),
2847          (COPY_TO_REGCLASS (VRSQRT14SSZr (f32 (IMPLICIT_DEF)),
2848                                        (COPY_TO_REGCLASS VR128X:$src, FR32)),
2849                            VR128X)>;
2850def : Pat<(int_x86_avx512_rsqrt14_ss sse_load_f32:$src),
2851          (VRSQRT14SSZm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
2852
2853def : Pat<(int_x86_avx512_rcp14_ss VR128X:$src),
2854          (COPY_TO_REGCLASS (VRCP14SSZr (f32 (IMPLICIT_DEF)),
2855                                      (COPY_TO_REGCLASS VR128X:$src, FR32)),
2856                            VR128X)>;
2857def : Pat<(int_x86_avx512_rcp14_ss sse_load_f32:$src),
2858          (VRCP14SSZm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
2859
2860let AddedComplexity = 20, Predicates = [HasERI] in {
2861defm VRCP28   : avx512_fp_unop_s<0xCB, "vrcp28">,
2862                avx512_fp_unop_p<0xCA, "vrcp28", X86frcp>,
2863                avx512_fp_unop_p_int<0xCA, "vrcp28",
2864                    int_x86_avx512_rcp28_ps_512, int_x86_avx512_rcp28_pd_512>;
2865
2866defm VRSQRT28  : avx512_fp_unop_s<0xCD, "vrsqrt28">,
2867                avx512_fp_unop_p<0xCC, "vrsqrt28", X86frsqrt>,
2868                avx512_fp_unop_p_int<0xCC, "vrsqrt28",
2869                    int_x86_avx512_rsqrt28_ps_512, int_x86_avx512_rsqrt28_pd_512>;
2870}
2871
2872let Predicates = [HasERI] in {
2873  def : Pat<(int_x86_avx512_rsqrt28_ss VR128X:$src),
2874            (COPY_TO_REGCLASS (VRSQRT28SSZr (f32 (IMPLICIT_DEF)),
2875                                         (COPY_TO_REGCLASS VR128X:$src, FR32)),
2876                              VR128X)>;
2877  def : Pat<(int_x86_avx512_rsqrt28_ss sse_load_f32:$src),
2878            (VRSQRT28SSZm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
2879
2880  def : Pat<(int_x86_avx512_rcp28_ss VR128X:$src),
2881            (COPY_TO_REGCLASS (VRCP28SSZr (f32 (IMPLICIT_DEF)),
2882                                       (COPY_TO_REGCLASS VR128X:$src, FR32)),
2883                              VR128X)>;
2884  def : Pat<(int_x86_avx512_rcp28_ss sse_load_f32:$src),
2885            (VRCP28SSZm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
2886}
2887multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
2888                              Intrinsic V16F32Int, Intrinsic V8F64Int,
2889                              OpndItins itins_s, OpndItins itins_d> {
2890  def PSZrr :AVX512PSI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
2891             !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2892             [(set VR512:$dst, (v16f32 (OpNode VR512:$src)))], itins_s.rr>,
2893             EVEX, EVEX_V512;
2894
2895  let mayLoad = 1 in
2896  def PSZrm : AVX512PSI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
2897              !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2898              [(set VR512:$dst, 
2899                (OpNode (v16f32 (bitconvert (memopv16f32 addr:$src)))))],
2900              itins_s.rm>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
2901
2902  def PDZrr : AVX512PDI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
2903              !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2904              [(set VR512:$dst, (v8f64 (OpNode VR512:$src)))], itins_d.rr>,
2905              EVEX, EVEX_V512;
2906
2907  let mayLoad = 1 in
2908    def PDZrm : AVX512PDI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
2909                !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2910                [(set VR512:$dst, (OpNode
2911                  (v8f64 (bitconvert (memopv16f32 addr:$src)))))],
2912                itins_d.rm>, EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>;
2913
2914  def PSZr_Int : AVX512PSI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
2915                           !strconcat(OpcodeStr,
2916                                      "ps\t{$src, $dst|$dst, $src}"),
2917                           [(set VR512:$dst, (V16F32Int VR512:$src))]>, 
2918                           EVEX, EVEX_V512;
2919  def PSZm_Int : AVX512PSI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
2920                          !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
2921                          [(set VR512:$dst, 
2922                           (V16F32Int (memopv16f32 addr:$src)))]>, EVEX,
2923                          EVEX_V512, EVEX_CD8<32, CD8VF>;
2924  def PDZr_Int : AVX512PDI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
2925                           !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
2926                           [(set VR512:$dst, (V8F64Int VR512:$src))]>, 
2927                           EVEX, EVEX_V512, VEX_W;
2928  def PDZm_Int : AVX512PDI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
2929                         !strconcat(OpcodeStr,
2930                         "pd\t{$src, $dst|$dst, $src}"),
2931                         [(set VR512:$dst, (V8F64Int (memopv8f64 addr:$src)))]>,
2932                         EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2933}
2934
2935multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,
2936                          Intrinsic F32Int, Intrinsic F64Int,
2937                          OpndItins itins_s, OpndItins itins_d> {
2938  def SSZr : SI<opc, MRMSrcReg, (outs FR32X:$dst),
2939               (ins FR32X:$src1, FR32X:$src2),
2940               !strconcat(OpcodeStr,
2941                          "ss{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2942                      [], itins_s.rr>, XS, EVEX_4V;
2943  def SSZr_Int : SIi8<opc, MRMSrcReg, (outs VR128X:$dst),
2944               (ins VR128X:$src1, VR128X:$src2),
2945               !strconcat(OpcodeStr,
2946                "ss{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2947               [(set VR128X:$dst, 
2948                 (F32Int VR128X:$src1, VR128X:$src2))],
2949               itins_s.rr>, XS, EVEX_4V;
2950  let mayLoad = 1 in {
2951  def SSZm : SI<opc, MRMSrcMem, (outs FR32X:$dst),
2952               (ins FR32X:$src1, f32mem:$src2),
2953               !strconcat(OpcodeStr,
2954                          "ss{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2955                      [], itins_s.rm>, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>;
2956  def SSZm_Int : SIi8<opc, MRMSrcMem, (outs VR128X:$dst),
2957                   (ins VR128X:$src1, ssmem:$src2),
2958                   !strconcat(OpcodeStr,
2959                 "ss{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2960                   [(set VR128X:$dst, 
2961                     (F32Int VR128X:$src1, sse_load_f32:$src2))],
2962                   itins_s.rm>, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>;
2963  }
2964  def SDZr : SI<opc, MRMSrcReg, (outs FR64X:$dst),
2965               (ins FR64X:$src1, FR64X:$src2),
2966               !strconcat(OpcodeStr,
2967                          "sd{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, 
2968                      XD, EVEX_4V, VEX_W;
2969  def SDZr_Int : SIi8<opc, MRMSrcReg, (outs VR128X:$dst),
2970               (ins VR128X:$src1, VR128X:$src2),
2971               !strconcat(OpcodeStr,
2972                "sd{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2973               [(set VR128X:$dst, 
2974                 (F64Int VR128X:$src1, VR128X:$src2))],
2975               itins_s.rr>, XD, EVEX_4V, VEX_W;
2976  let mayLoad = 1 in {
2977  def SDZm : SI<opc, MRMSrcMem, (outs FR64X:$dst),
2978               (ins FR64X:$src1, f64mem:$src2),
2979               !strconcat(OpcodeStr,
2980                  "sd{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, 
2981               XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
2982  def SDZm_Int : SIi8<opc, MRMSrcMem, (outs VR128X:$dst),
2983                  (ins VR128X:$src1, sdmem:$src2),
2984                   !strconcat(OpcodeStr,
2985                  "sd{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2986                  [(set VR128X:$dst, 
2987                    (F64Int VR128X:$src1, sse_load_f64:$src2))]>, 
2988                  XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
2989  }
2990}
2991
2992
2993defm VSQRT  : avx512_sqrt_scalar<0x51, "sqrt", 
2994                int_x86_avx512_sqrt_ss, int_x86_avx512_sqrt_sd, 
2995                SSE_SQRTSS, SSE_SQRTSD>,
2996              avx512_sqrt_packed<0x51, "vsqrt", fsqrt,
2997                int_x86_avx512_sqrt_ps_512, int_x86_avx512_sqrt_pd_512,
2998                SSE_SQRTPS, SSE_SQRTPD>;
2999
3000let Predicates = [HasAVX512] in {
3001  def : Pat<(f32 (fsqrt FR32X:$src)),
3002            (VSQRTSSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
3003  def : Pat<(f32 (fsqrt (load addr:$src))),
3004            (VSQRTSSZm (f32 (IMPLICIT_DEF)), addr:$src)>,
3005            Requires<[OptForSize]>;
3006  def : Pat<(f64 (fsqrt FR64X:$src)),
3007            (VSQRTSDZr (f64 (IMPLICIT_DEF)), FR64X:$src)>;
3008  def : Pat<(f64 (fsqrt (load addr:$src))),
3009            (VSQRTSDZm (f64 (IMPLICIT_DEF)), addr:$src)>,
3010            Requires<[OptForSize]>;
3011
3012  def : Pat<(f32 (X86frsqrt FR32X:$src)),
3013            (VRSQRT14SSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
3014  def : Pat<(f32 (X86frsqrt (load addr:$src))),
3015            (VRSQRT14SSZm (f32 (IMPLICIT_DEF)), addr:$src)>,
3016            Requires<[OptForSize]>;
3017
3018  def : Pat<(f32 (X86frcp FR32X:$src)),
3019            (VRCP14SSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
3020  def : Pat<(f32 (X86frcp (load addr:$src))),
3021            (VRCP14SSZm (f32 (IMPLICIT_DEF)), addr:$src)>,
3022            Requires<[OptForSize]>;
3023
3024  def : Pat<(int_x86_sse_sqrt_ss VR128X:$src),
3025            (COPY_TO_REGCLASS (VSQRTSSZr (f32 (IMPLICIT_DEF)),
3026                                        (COPY_TO_REGCLASS VR128X:$src, FR32)),
3027                              VR128X)>;
3028  def : Pat<(int_x86_sse_sqrt_ss sse_load_f32:$src),
3029            (VSQRTSSZm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
3030
3031  def : Pat<(int_x86_sse2_sqrt_sd VR128X:$src),
3032            (COPY_TO_REGCLASS (VSQRTSDZr (f64 (IMPLICIT_DEF)),
3033                                        (COPY_TO_REGCLASS VR128X:$src, FR64)),
3034                              VR128X)>;
3035  def : Pat<(int_x86_sse2_sqrt_sd sse_load_f64:$src),
3036            (VSQRTSDZm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>;
3037}
3038
3039
3040multiclass avx512_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
3041                            X86MemOperand x86memop, RegisterClass RC,
3042                            PatFrag mem_frag32, PatFrag mem_frag64,
3043                            Intrinsic V4F32Int, Intrinsic V2F64Int,
3044                            CD8VForm VForm> {
3045let ExeDomain = SSEPackedSingle in {
3046  // Intrinsic operation, reg.
3047  // Vector intrinsic operation, reg
3048  def PSr : AVX512AIi8<opcps, MRMSrcReg,
3049                    (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
3050                    !strconcat(OpcodeStr,
3051                    "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3052                    [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))]>;
3053
3054  // Vector intrinsic operation, mem
3055  def PSm : AVX512AIi8<opcps, MRMSrcMem,
3056                    (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
3057                    !strconcat(OpcodeStr,
3058                    "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3059                    [(set RC:$dst,
3060                          (V4F32Int (mem_frag32 addr:$src1),imm:$src2))]>,
3061                    EVEX_CD8<32, VForm>;
3062} // ExeDomain = SSEPackedSingle
3063
3064let ExeDomain = SSEPackedDouble in {
3065  // Vector intrinsic operation, reg
3066  def PDr : AVX512AIi8<opcpd, MRMSrcReg,
3067                     (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
3068                     !strconcat(OpcodeStr,
3069                     "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3070                     [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))]>;
3071
3072  // Vector intrinsic operation, mem
3073  def PDm : AVX512AIi8<opcpd, MRMSrcMem,
3074                     (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
3075                     !strconcat(OpcodeStr,
3076                     "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3077                     [(set RC:$dst,
3078                          (V2F64Int (mem_frag64 addr:$src1),imm:$src2))]>,
3079                     EVEX_CD8<64, VForm>;
3080} // ExeDomain = SSEPackedDouble
3081}
3082
3083multiclass avx512_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
3084                            string OpcodeStr,
3085                            Intrinsic F32Int,
3086                            Intrinsic F64Int> {
3087let ExeDomain = GenericDomain in {
3088  // Operation, reg.
3089  let hasSideEffects = 0 in
3090  def SSr : AVX512AIi8<opcss, MRMSrcReg,
3091      (outs FR32X:$dst), (ins FR32X:$src1, FR32X:$src2, i32i8imm:$src3),
3092      !strconcat(OpcodeStr,
3093              "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
3094      []>;
3095
3096  // Intrinsic operation, reg.
3097  def SSr_Int : AVX512AIi8<opcss, MRMSrcReg,
3098        (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2, i32i8imm:$src3),
3099        !strconcat(OpcodeStr,
3100                "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
3101        [(set VR128X:$dst, (F32Int VR128X:$src1, VR128X:$src2, imm:$src3))]>;
3102
3103  // Intrinsic operation, mem.
3104  def SSm : AVX512AIi8<opcss, MRMSrcMem, (outs VR128X:$dst),
3105                     (ins VR128X:$src1, ssmem:$src2, i32i8imm:$src3),
3106                     !strconcat(OpcodeStr,
3107                   "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
3108                     [(set VR128X:$dst, (F32Int VR128X:$src1, 
3109                                         sse_load_f32:$src2, imm:$src3))]>,
3110                     EVEX_CD8<32, CD8VT1>;
3111
3112  // Operation, reg.
3113  let hasSideEffects = 0 in
3114  def SDr : AVX512AIi8<opcsd, MRMSrcReg,
3115        (outs FR64X:$dst), (ins FR64X:$src1, FR64X:$src2, i32i8imm:$src3),
3116        !strconcat(OpcodeStr,
3117                "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
3118        []>, VEX_W;
3119
3120  // Intrinsic operation, reg.
3121  def SDr_Int : AVX512AIi8<opcsd, MRMSrcReg,
3122        (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2, i32i8imm:$src3),
3123        !strconcat(OpcodeStr,
3124                "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
3125        [(set VR128X:$dst, (F64Int VR128X:$src1, VR128X:$src2, imm:$src3))]>,
3126        VEX_W;
3127
3128  // Intrinsic operation, mem.
3129  def SDm : AVX512AIi8<opcsd, MRMSrcMem,
3130        (outs VR128X:$dst), (ins VR128X:$src1, sdmem:$src2, i32i8imm:$src3),
3131        !strconcat(OpcodeStr,
3132                "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
3133        [(set VR128X:$dst,
3134              (F64Int VR128X:$src1, sse_load_f64:$src2, imm:$src3))]>,
3135        VEX_W, EVEX_CD8<64, CD8VT1>;
3136} // ExeDomain = GenericDomain
3137}
3138
3139let Predicates = [HasAVX512] in {
3140  defm VRNDSCALE  : avx512_fp_binop_rm<0x0A, 0x0B, "vrndscale",
3141                              int_x86_avx512_rndscale_ss,
3142                              int_x86_avx512_rndscale_sd>, EVEX_4V;
3143
3144  defm VRNDSCALEZ : avx512_fp_unop_rm<0x08, 0x09, "vrndscale", f256mem, VR512,
3145                                  memopv16f32, memopv8f64,
3146                                  int_x86_avx512_rndscale_ps_512,
3147                                  int_x86_avx512_rndscale_pd_512, CD8VF>, 
3148                                  EVEX, EVEX_V512;
3149}
3150
3151def : Pat<(ffloor FR32X:$src),
3152          (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x1))>;
3153def : Pat<(f64 (ffloor FR64X:$src)),
3154          (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x1))>;
3155def : Pat<(f32 (fnearbyint FR32X:$src)),
3156          (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0xC))>;
3157def : Pat<(f64 (fnearbyint FR64X:$src)),
3158          (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0xC))>;
3159def : Pat<(f32 (fceil FR32X:$src)),
3160          (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x2))>;
3161def : Pat<(f64 (fceil FR64X:$src)),
3162          (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x2))>;
3163def : Pat<(f32 (frint FR32X:$src)),
3164          (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x4))>;
3165def : Pat<(f64 (frint FR64X:$src)),
3166          (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x4))>;
3167def : Pat<(f32 (ftrunc FR32X:$src)),
3168          (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x3))>;
3169def : Pat<(f64 (ftrunc FR64X:$src)),
3170          (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x3))>;
3171
3172def : Pat<(v16f32 (ffloor VR512:$src)),
3173          (VRNDSCALEZPSr VR512:$src, (i32 0x1))>;
3174def : Pat<(v16f32 (fnearbyint VR512:$src)),
3175          (VRNDSCALEZPSr VR512:$src, (i32 0xC))>;
3176def : Pat<(v16f32 (fceil VR512:$src)),
3177          (VRNDSCALEZPSr VR512:$src, (i32 0x2))>;
3178def : Pat<(v16f32 (frint VR512:$src)),
3179          (VRNDSCALEZPSr VR512:$src, (i32 0x4))>;
3180def : Pat<(v16f32 (ftrunc VR512:$src)),
3181          (VRNDSCALEZPSr VR512:$src, (i32 0x3))>;
3182
3183def : Pat<(v8f64 (ffloor VR512:$src)),
3184          (VRNDSCALEZPDr VR512:$src, (i32 0x1))>;
3185def : Pat<(v8f64 (fnearbyint VR512:$src)),
3186          (VRNDSCALEZPDr VR512:$src, (i32 0xC))>;
3187def : Pat<(v8f64 (fceil VR512:$src)),
3188          (VRNDSCALEZPDr VR512:$src, (i32 0x2))>;
3189def : Pat<(v8f64 (frint VR512:$src)),
3190          (VRNDSCALEZPDr VR512:$src, (i32 0x4))>;
3191def : Pat<(v8f64 (ftrunc VR512:$src)),
3192          (VRNDSCALEZPDr VR512:$src, (i32 0x3))>;
3193
3194//-------------------------------------------------
3195// Integer truncate and extend operations
3196//-------------------------------------------------
3197
3198multiclass avx512_trunc_sat<bits<8> opc, string OpcodeStr,
3199                          RegisterClass dstRC, RegisterClass srcRC,
3200                          RegisterClass KRC, X86MemOperand x86memop> {
3201  def rr : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
3202               (ins srcRC:$src),
3203               !strconcat(OpcodeStr,"\t{$src, $dst|$dst, $src}"),
3204               []>, EVEX;
3205
3206  def krr : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
3207               (ins KRC:$mask, srcRC:$src),
3208               !strconcat(OpcodeStr,
3209                 "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
3210               []>, EVEX, EVEX_KZ;
3211
3212  def mr : AVX512XS8I<opc, MRMDestMem, (outs), (ins x86memop:$dst, srcRC:$src),
3213               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3214               []>, EVEX;
3215}
3216defm VPMOVQB    : avx512_trunc_sat<0x32, "vpmovqb",   VR128X, VR512, VK8WM, 
3217                                 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
3218defm VPMOVSQB   : avx512_trunc_sat<0x22, "vpmovsqb",  VR128X, VR512, VK8WM,
3219                                 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
3220defm VPMOVUSQB  : avx512_trunc_sat<0x12, "vpmovusqb", VR128X, VR512, VK8WM,
3221                                 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
3222defm VPMOVQW    : avx512_trunc_sat<0x34, "vpmovqw",   VR128X, VR512, VK8WM,
3223                                 i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
3224defm VPMOVSQW   : avx512_trunc_sat<0x24, "vpmovsqw",  VR128X, VR512, VK8WM,
3225                                 i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
3226defm VPMOVUSQW  : avx512_trunc_sat<0x14, "vpmovusqw", VR128X, VR512, VK8WM,
3227                                 i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
3228defm VPMOVQD    : avx512_trunc_sat<0x35, "vpmovqd",   VR256X, VR512, VK8WM,
3229                                 i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
3230defm VPMOVSQD   : avx512_trunc_sat<0x25, "vpmovsqd",  VR256X, VR512, VK8WM,
3231                                 i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
3232defm VPMOVUSQD  : avx512_trunc_sat<0x15, "vpmovusqd", VR256X, VR512, VK8WM,
3233                                 i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
3234defm VPMOVDW    : avx512_trunc_sat<0x33, "vpmovdw",   VR256X, VR512, VK16WM,
3235                                 i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
3236defm VPMOVSDW   : avx512_trunc_sat<0x23, "vpmovsdw",  VR256X, VR512, VK16WM,
3237                                 i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
3238defm VPMOVUSDW  : avx512_trunc_sat<0x13, "vpmovusdw", VR256X, VR512, VK16WM,
3239                                 i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
3240defm VPMOVDB    : avx512_trunc_sat<0x31, "vpmovdb",   VR128X, VR512, VK16WM,
3241                                 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
3242defm VPMOVSDB   : avx512_trunc_sat<0x21, "vpmovsdb",  VR128X, VR512, VK16WM,
3243                                 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
3244defm VPMOVUSDB  : avx512_trunc_sat<0x11, "vpmovusdb", VR128X, VR512, VK16WM,
3245                                 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
3246
3247def : Pat<(v16i8  (X86vtrunc (v8i64  VR512:$src))), (VPMOVQBrr  VR512:$src)>;
3248def : Pat<(v8i16  (X86vtrunc (v8i64  VR512:$src))), (VPMOVQWrr  VR512:$src)>;
3249def : Pat<(v16i16 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDWrr  VR512:$src)>;
3250def : Pat<(v16i8  (X86vtrunc (v16i32 VR512:$src))), (VPMOVDBrr  VR512:$src)>;
3251def : Pat<(v8i32  (X86vtrunc (v8i64  VR512:$src))), (VPMOVQDrr  VR512:$src)>;
3252
3253def : Pat<(v16i8  (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))),
3254                  (VPMOVDBkrr VK16WM:$mask, VR512:$src)>;
3255def : Pat<(v16i16 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))),
3256                  (VPMOVDWkrr VK16WM:$mask, VR512:$src)>;
3257def : Pat<(v8i16  (X86vtruncm VK8WM:$mask,  (v8i64 VR512:$src))),
3258                  (VPMOVQWkrr  VK8WM:$mask, VR512:$src)>;
3259def : Pat<(v8i32  (X86vtruncm VK8WM:$mask,  (v8i64 VR512:$src))),
3260                  (VPMOVQDkrr  VK8WM:$mask, VR512:$src)>;
3261
3262
3263multiclass avx512_extend<bits<8> opc, string OpcodeStr, RegisterClass DstRC,
3264                      RegisterClass SrcRC, SDNode OpNode, PatFrag mem_frag, 
3265                      X86MemOperand x86memop, ValueType OpVT, ValueType InVT> {
3266
3267  def rr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
3268              (ins SrcRC:$src),
3269              !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3270              [(set DstRC:$dst, (OpVT (OpNode (InVT SrcRC:$src))))]>, EVEX;
3271  def rm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
3272              (ins x86memop:$src),
3273              !strconcat(OpcodeStr,"\t{$src, $dst|$dst, $src}"),
3274              [(set DstRC:$dst,
3275                (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))]>,
3276              EVEX;
3277}
3278
3279defm VPMOVZXBDZ: avx512_extend<0x31, "vpmovzxbd", VR512, VR128X, X86vzext, 
3280                             memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512,
3281                             EVEX_CD8<8, CD8VQ>;
3282defm VPMOVZXBQZ: avx512_extend<0x32, "vpmovzxbq", VR512, VR128X, X86vzext, 
3283                             memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512,
3284                             EVEX_CD8<8, CD8VO>;
3285defm VPMOVZXWDZ: avx512_extend<0x33, "vpmovzxwd", VR512, VR256X, X86vzext, 
3286                             memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512,
3287                             EVEX_CD8<16, CD8VH>;
3288defm VPMOVZXWQZ: avx512_extend<0x34, "vpmovzxwq", VR512, VR128X, X86vzext, 
3289                             memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512,
3290                             EVEX_CD8<16, CD8VQ>;
3291defm VPMOVZXDQZ: avx512_extend<0x35, "vpmovzxdq", VR512, VR256X, X86vzext, 
3292                             memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512,
3293                             EVEX_CD8<32, CD8VH>;
3294                             
3295defm VPMOVSXBDZ: avx512_extend<0x21, "vpmovsxbd", VR512, VR128X, X86vsext, 
3296                             memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512,
3297                             EVEX_CD8<8, CD8VQ>;
3298defm VPMOVSXBQZ: avx512_extend<0x22, "vpmovsxbq", VR512, VR128X, X86vsext, 
3299                             memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512,
3300                             EVEX_CD8<8, CD8VO>;
3301defm VPMOVSXWDZ: avx512_extend<0x23, "vpmovsxwd", VR512, VR256X, X86vsext, 
3302                             memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512,
3303                             EVEX_CD8<16, CD8VH>;
3304defm VPMOVSXWQZ: avx512_extend<0x24, "vpmovsxwq", VR512, VR128X, X86vsext, 
3305                             memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512,
3306                             EVEX_CD8<16, CD8VQ>;
3307defm VPMOVSXDQZ: avx512_extend<0x25, "vpmovsxdq", VR512, VR256X, X86vsext, 
3308                             memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512,
3309                             EVEX_CD8<32, CD8VH>;
3310
3311//===----------------------------------------------------------------------===//
3312// GATHER - SCATTER Operations
3313
3314multiclass avx512_gather<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3315                       RegisterClass RC, X86MemOperand memop> {
3316let mayLoad = 1,
3317  Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb" in
3318  def rm  : AVX5128I<opc, MRMSrcMem, (outs RC:$dst, KRC:$mask_wb),
3319            (ins RC:$src1, KRC:$mask, memop:$src2),
3320            !strconcat(OpcodeStr,
3321            "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
3322            []>, EVEX, EVEX_K;
3323}
3324defm VGATHERDPDZ : avx512_gather<0x92, "vgatherdpd", VK8WM, VR512, vy64xmem>,
3325                                 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
3326defm VGATHERDPSZ : avx512_gather<0x92, "vgatherdps", VK16WM, VR512, vz32mem>,
3327                                 EVEX_V512, EVEX_CD8<32, CD8VT1>;
3328
3329defm VGATHERQPDZ : avx512_gather<0x93, "vgatherqpd", VK8WM, VR512, vz64mem>,
3330                                 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
3331defm VGATHERQPSZ : avx512_gather<0x93, "vgatherqps", VK8WM, VR256X, vz64mem>,
3332                                 EVEX_V512, EVEX_CD8<32, CD8VT1>;
3333  
3334defm VPGATHERDQZ : avx512_gather<0x90, "vpgatherdq", VK8WM, VR512,  vy64xmem>,
3335                                 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
3336defm VPGATHERDDZ : avx512_gather<0x90, "vpgatherdd", VK16WM, VR512, vz32mem>,
3337                                 EVEX_V512, EVEX_CD8<32, CD8VT1>;
3338
3339defm VPGATHERQQZ : avx512_gather<0x91, "vpgatherqq", VK8WM, VR512,  vz64mem>,
3340                                 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
3341defm VPGATHERQDZ : avx512_gather<0x91, "vpgatherqd", VK8WM, VR256X,  vz64mem>,
3342                                 EVEX_V512, EVEX_CD8<32, CD8VT1>;
3343
3344multiclass avx512_scatter<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3345                       RegisterClass RC, X86MemOperand memop> {
3346let mayStore = 1, Constraints = "$mask = $mask_wb" in
3347  def mr  : AVX5128I<opc, MRMDestMem, (outs KRC:$mask_wb),
3348            (ins memop:$dst, KRC:$mask, RC:$src2),
3349            !strconcat(OpcodeStr,
3350            "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
3351            []>, EVEX, EVEX_K;
3352}
3353
3354defm VSCATTERDPDZ : avx512_scatter<0xA2, "vscatterdpd", VK8WM, VR512, vy64xmem>,
3355                                   EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
3356defm VSCATTERDPSZ : avx512_scatter<0xA2, "vscatterdps", VK16WM, VR512, vz32mem>,
3357                                   EVEX_V512, EVEX_CD8<32, CD8VT1>;
3358
3359defm VSCATTERQPDZ : avx512_scatter<0xA3, "vscatterqpd", VK8WM, VR512, vz64mem>,
3360                                   EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
3361defm VSCATTERQPSZ : avx512_scatter<0xA3, "vscatterqps", VK8WM, VR256X, vz64mem>,
3362                                   EVEX_V512, EVEX_CD8<32, CD8VT1>;
3363  
3364defm VPSCATTERDQZ : avx512_scatter<0xA0, "vpscatterdq", VK8WM, VR512, vy64xmem>,
3365                                   EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
3366defm VPSCATTERDDZ : avx512_scatter<0xA0, "vpscatterdd", VK16WM, VR512, vz32mem>,
3367                                   EVEX_V512, EVEX_CD8<32, CD8VT1>;
3368
3369defm VPSCATTERQQZ : avx512_scatter<0xA1, "vpscatterqq", VK8WM, VR512, vz64mem>,
3370                                  EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
3371defm VPSCATTERQDZ : avx512_scatter<0xA1, "vpscatterqd", VK8WM, VR256X, vz64mem>,
3372                                  EVEX_V512, EVEX_CD8<32, CD8VT1>;
3373
3374//===----------------------------------------------------------------------===//
3375// VSHUFPS - VSHUFPD Operations
3376
3377multiclass avx512_shufp<RegisterClass RC, X86MemOperand x86memop,
3378                      ValueType vt, string OpcodeStr, PatFrag mem_frag,
3379                      Domain d> {
3380  def rmi : AVX512PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
3381                   (ins RC:$src1, x86memop:$src2, i8imm:$src3),
3382                   !strconcat(OpcodeStr,
3383                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
3384                   [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
3385                                       (i8 imm:$src3))))], d, IIC_SSE_SHUFP>,
3386                   EVEX_4V, Sched<[WriteShuffleLd, ReadAfterLd]>;
3387  def rri : AVX512PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
3388                   (ins RC:$src1, RC:$src2, i8imm:$src3),
3389                   !strconcat(OpcodeStr,
3390                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
3391                   [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
3392                                       (i8 imm:$src3))))], d, IIC_SSE_SHUFP>,
3393                   EVEX_4V, Sched<[WriteShuffle]>;
3394}
3395
3396defm VSHUFPSZ  : avx512_shufp<VR512, f512mem, v16f32, "vshufps", memopv16f32,
3397                  SSEPackedSingle>, EVEX_V512, EVEX_CD8<32, CD8VF>;
3398defm VSHUFPDZ  : avx512_shufp<VR512, f512mem, v8f64, "vshufpd", memopv8f64,
3399                  SSEPackedDouble>, OpSize, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
3400
3401def : Pat<(v16i32 (X86Shufp VR512:$src1, VR512:$src2, (i8 imm:$imm))),
3402          (VSHUFPSZrri VR512:$src1, VR512:$src2, imm:$imm)>;
3403def : Pat<(v16i32 (X86Shufp VR512:$src1,
3404                    (memopv16i32 addr:$src2), (i8 imm:$imm))),
3405          (VSHUFPSZrmi VR512:$src1, addr:$src2, imm:$imm)>;
3406
3407def : Pat<(v8i64 (X86Shufp VR512:$src1, VR512:$src2, (i8 imm:$imm))),
3408          (VSHUFPDZrri VR512:$src1, VR512:$src2, imm:$imm)>;
3409def : Pat<(v8i64 (X86Shufp VR512:$src1,
3410                            (memopv8i64 addr:$src2), (i8 imm:$imm))),
3411          (VSHUFPDZrmi VR512:$src1, addr:$src2, imm:$imm)>;
3412
3413multiclass avx512_alignr<string OpcodeStr, RegisterClass RC,
3414                       X86MemOperand x86memop> {
3415  def rri : AVX512AIi8<0x03, MRMSrcReg, (outs RC:$dst),
3416                     (ins RC:$src1, RC:$src2, i8imm:$src3),
3417                     !strconcat(OpcodeStr,
3418                     "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
3419                     []>, EVEX_4V;
3420  let mayLoad = 1 in
3421  def rmi : AVX512AIi8<0x03, MRMSrcMem, (outs RC:$dst),
3422                     (ins RC:$src1, x86memop:$src2, i8imm:$src3),
3423                     !strconcat(OpcodeStr,
3424                     "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
3425                     []>, EVEX_4V;
3426}
3427defm VALIGND : avx512_alignr<"valignd", VR512, i512mem>, 
3428                 EVEX_V512, EVEX_CD8<32, CD8VF>;
3429defm VALIGNQ : avx512_alignr<"valignq", VR512, i512mem>, 
3430                 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
3431
3432def : Pat<(v16f32 (X86PAlignr VR512:$src1, VR512:$src2, (i8 imm:$imm))),
3433          (VALIGNDrri VR512:$src2, VR512:$src1, imm:$imm)>;
3434def : Pat<(v8f64 (X86PAlignr VR512:$src1, VR512:$src2, (i8 imm:$imm))),
3435          (VALIGNQrri VR512:$src2, VR512:$src1, imm:$imm)>;
3436def : Pat<(v16i32 (X86PAlignr VR512:$src1, VR512:$src2, (i8 imm:$imm))),
3437          (VALIGNDrri VR512:$src2, VR512:$src1, imm:$imm)>;
3438def : Pat<(v8i64 (X86PAlignr VR512:$src1, VR512:$src2, (i8 imm:$imm))),
3439          (VALIGNQrri VR512:$src2, VR512:$src1, imm:$imm)>;
3440
3441multiclass avx512_vpabs<bits<8> opc, string OpcodeStr, RegisterClass RC,
3442                       X86MemOperand x86memop> {
3443  def rr  : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
3444                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
3445                    EVEX;
3446  def rm  : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), 
3447                   (ins x86memop:$src),
3448                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
3449                   EVEX;
3450}
3451
3452defm VPABSD : avx512_vpabs<0x1E, "vpabsd", VR512, i512mem>, EVEX_V512,
3453                        EVEX_CD8<32, CD8VF>;
3454defm VPABSQ : avx512_vpabs<0x1F, "vpabsq", VR512, i512mem>, EVEX_V512, VEX_W,
3455                        EVEX_CD8<64, CD8VF>;
3456
3457multiclass avx512_conflict<bits<8> opc, string OpcodeStr, 
3458                        RegisterClass RC, RegisterClass KRC, PatFrag memop_frag,
3459                        X86MemOperand x86memop, PatFrag scalar_mfrag,
3460                        X86MemOperand x86scalar_mop, string BrdcstStr,
3461                        Intrinsic Int, Intrinsic maskInt, Intrinsic maskzInt> {
3462  def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
3463       (ins RC:$src),
3464       !strconcat(OpcodeStr, "\t{$src, ${dst} |${dst}, $src}"),
3465       [(set RC:$dst, (Int RC:$src))]>, EVEX;
3466  def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
3467       (ins x86memop:$src),
3468       !strconcat(OpcodeStr, "\t{$src, ${dst}|${dst}, $src}"),
3469       [(set RC:$dst, (Int (memop_frag addr:$src)))]>, EVEX;
3470  def rmb : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
3471       (ins x86scalar_mop:$src),
3472       !strconcat(OpcodeStr, "\t{${src}", BrdcstStr,
3473                  ", ${dst}|${dst}, ${src}", BrdcstStr, "}"),
3474       []>, EVEX, EVEX_B;
3475  def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
3476       (ins KRC:$mask, RC:$src),
3477       !strconcat(OpcodeStr,
3478                  "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
3479       [(set RC:$dst, (maskzInt KRC:$mask, RC:$src))]>, EVEX, EVEX_KZ;
3480  def rmkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
3481       (ins KRC:$mask, x86memop:$src),
3482       !strconcat(OpcodeStr,
3483                  "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
3484       [(set RC:$dst, (maskzInt KRC:$mask, (memop_frag addr:$src)))]>,
3485       EVEX, EVEX_KZ;
3486  def rmbkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
3487       (ins KRC:$mask, x86scalar_mop:$src),
3488       !strconcat(OpcodeStr, "\t{${src}", BrdcstStr,
3489                  ", ${dst} {${mask}} {z}|${dst} {${mask}} {z}, ${src}",
3490                  BrdcstStr, "}"),
3491       []>, EVEX, EVEX_KZ, EVEX_B;
3492       
3493  let Constraints = "$src1 = $dst" in {
3494  def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
3495       (ins RC:$src1, KRC:$mask, RC:$src2),
3496       !strconcat(OpcodeStr,
3497                  "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
3498       [(set RC:$dst, (maskInt RC:$src1, KRC:$mask, RC:$src2))]>, EVEX, EVEX_K;
3499  def rmk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
3500       (ins RC:$src1, KRC:$mask, x86memop:$src2),
3501       !strconcat(OpcodeStr,
3502                  "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
3503       [(set RC:$dst, (maskInt RC:$src1, KRC:$mask, (memop_frag addr:$src2)))]>, EVEX, EVEX_K;
3504  def rmbk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
3505       (ins RC:$src1, KRC:$mask, x86scalar_mop:$src2),
3506       !strconcat(OpcodeStr, "\t{${src2}", BrdcstStr,
3507                  ", ${dst} {${mask}}|${dst} {${mask}}, ${src2}", BrdcstStr, "}"),
3508       []>, EVEX, EVEX_K, EVEX_B;
3509   }
3510}
3511
3512let Predicates = [HasCDI] in {
3513defm VPCONFLICTD : avx512_conflict<0xC4, "vpconflictd", VR512, VK16WM,
3514                    memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
3515                    int_x86_avx512_conflict_d_512,
3516                    int_x86_avx512_conflict_d_mask_512,
3517                    int_x86_avx512_conflict_d_maskz_512>,
3518                    EVEX_V512, EVEX_CD8<32, CD8VF>;
3519
3520defm VPCONFLICTQ : avx512_conflict<0xC4, "vpconflictq", VR512, VK8WM,
3521                    memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
3522                    int_x86_avx512_conflict_q_512,
3523                    int_x86_avx512_conflict_q_mask_512,
3524                    int_x86_avx512_conflict_q_maskz_512>,
3525                    EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
3526}
3527