X86InstrSSE.td revision 36b56886974eae4f9c5ebc96befd3e7bfe5de338
16f56ab789cb470620554d624c37f488285b3b04eDan Albert//===-- X86InstrSSE.td - SSE Instruction Set ---------------*- tablegen -*-===// 26f56ab789cb470620554d624c37f488285b3b04eDan Albert// 36f56ab789cb470620554d624c37f488285b3b04eDan Albert// The LLVM Compiler Infrastructure 46f56ab789cb470620554d624c37f488285b3b04eDan Albert// 56f56ab789cb470620554d624c37f488285b3b04eDan Albert// This file is distributed under the University of Illinois Open Source 66f56ab789cb470620554d624c37f488285b3b04eDan Albert// License. See LICENSE.TXT for details. 76f56ab789cb470620554d624c37f488285b3b04eDan Albert// 86f56ab789cb470620554d624c37f488285b3b04eDan Albert//===----------------------------------------------------------------------===// 96f56ab789cb470620554d624c37f488285b3b04eDan Albert// 106f56ab789cb470620554d624c37f488285b3b04eDan Albert// This file describes the X86 SSE instruction set, defining the instructions, 116f56ab789cb470620554d624c37f488285b3b04eDan Albert// and properties of the instructions which are needed for code generation, 126f56ab789cb470620554d624c37f488285b3b04eDan Albert// machine code emission, and analysis. 136f56ab789cb470620554d624c37f488285b3b04eDan Albert// 146f56ab789cb470620554d624c37f488285b3b04eDan Albert//===----------------------------------------------------------------------===// 156f56ab789cb470620554d624c37f488285b3b04eDan Albert 166f56ab789cb470620554d624c37f488285b3b04eDan Albertclass OpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm> { 176f56ab789cb470620554d624c37f488285b3b04eDan Albert InstrItinClass rr = arg_rr; 186f56ab789cb470620554d624c37f488285b3b04eDan Albert InstrItinClass rm = arg_rm; 196f56ab789cb470620554d624c37f488285b3b04eDan Albert // InstrSchedModel info. 206f56ab789cb470620554d624c37f488285b3b04eDan Albert X86FoldableSchedWrite Sched = WriteFAdd; 216f56ab789cb470620554d624c37f488285b3b04eDan Albert} 226f56ab789cb470620554d624c37f488285b3b04eDan Albert 236f56ab789cb470620554d624c37f488285b3b04eDan Albertclass SizeItins<OpndItins arg_s, OpndItins arg_d> { 246f56ab789cb470620554d624c37f488285b3b04eDan Albert OpndItins s = arg_s; 256f56ab789cb470620554d624c37f488285b3b04eDan Albert OpndItins d = arg_d; 266f56ab789cb470620554d624c37f488285b3b04eDan Albert} 276f56ab789cb470620554d624c37f488285b3b04eDan Albert 28 29class ShiftOpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm, 30 InstrItinClass arg_ri> { 31 InstrItinClass rr = arg_rr; 32 InstrItinClass rm = arg_rm; 33 InstrItinClass ri = arg_ri; 34} 35 36 37// scalar 38let Sched = WriteFAdd in { 39def SSE_ALU_F32S : OpndItins< 40 IIC_SSE_ALU_F32S_RR, IIC_SSE_ALU_F32S_RM 41>; 42 43def SSE_ALU_F64S : OpndItins< 44 IIC_SSE_ALU_F64S_RR, IIC_SSE_ALU_F64S_RM 45>; 46} 47 48def SSE_ALU_ITINS_S : SizeItins< 49 SSE_ALU_F32S, SSE_ALU_F64S 50>; 51 52let Sched = WriteFMul in { 53def SSE_MUL_F32S : OpndItins< 54 IIC_SSE_MUL_F32S_RR, IIC_SSE_MUL_F64S_RM 55>; 56 57def SSE_MUL_F64S : OpndItins< 58 IIC_SSE_MUL_F64S_RR, IIC_SSE_MUL_F64S_RM 59>; 60} 61 62def SSE_MUL_ITINS_S : SizeItins< 63 SSE_MUL_F32S, SSE_MUL_F64S 64>; 65 66let Sched = WriteFDiv in { 67def SSE_DIV_F32S : OpndItins< 68 IIC_SSE_DIV_F32S_RR, IIC_SSE_DIV_F64S_RM 69>; 70 71def SSE_DIV_F64S : OpndItins< 72 IIC_SSE_DIV_F64S_RR, IIC_SSE_DIV_F64S_RM 73>; 74} 75 76def SSE_DIV_ITINS_S : SizeItins< 77 SSE_DIV_F32S, SSE_DIV_F64S 78>; 79 80// parallel 81let Sched = WriteFAdd in { 82def SSE_ALU_F32P : OpndItins< 83 IIC_SSE_ALU_F32P_RR, IIC_SSE_ALU_F32P_RM 84>; 85 86def SSE_ALU_F64P : OpndItins< 87 IIC_SSE_ALU_F64P_RR, IIC_SSE_ALU_F64P_RM 88>; 89} 90 91def SSE_ALU_ITINS_P : SizeItins< 92 SSE_ALU_F32P, SSE_ALU_F64P 93>; 94 95let Sched = WriteFMul in { 96def SSE_MUL_F32P : OpndItins< 97 IIC_SSE_MUL_F32P_RR, IIC_SSE_MUL_F64P_RM 98>; 99 100def SSE_MUL_F64P : OpndItins< 101 IIC_SSE_MUL_F64P_RR, IIC_SSE_MUL_F64P_RM 102>; 103} 104 105def SSE_MUL_ITINS_P : SizeItins< 106 SSE_MUL_F32P, SSE_MUL_F64P 107>; 108 109let Sched = WriteFDiv in { 110def SSE_DIV_F32P : OpndItins< 111 IIC_SSE_DIV_F32P_RR, IIC_SSE_DIV_F64P_RM 112>; 113 114def SSE_DIV_F64P : OpndItins< 115 IIC_SSE_DIV_F64P_RR, IIC_SSE_DIV_F64P_RM 116>; 117} 118 119def SSE_DIV_ITINS_P : SizeItins< 120 SSE_DIV_F32P, SSE_DIV_F64P 121>; 122 123let Sched = WriteVecLogic in 124def SSE_VEC_BIT_ITINS_P : OpndItins< 125 IIC_SSE_BIT_P_RR, IIC_SSE_BIT_P_RM 126>; 127 128def SSE_BIT_ITINS_P : OpndItins< 129 IIC_SSE_BIT_P_RR, IIC_SSE_BIT_P_RM 130>; 131 132let Sched = WriteVecALU in { 133def SSE_INTALU_ITINS_P : OpndItins< 134 IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM 135>; 136 137def SSE_INTALUQ_ITINS_P : OpndItins< 138 IIC_SSE_INTALUQ_P_RR, IIC_SSE_INTALUQ_P_RM 139>; 140} 141 142let Sched = WriteVecIMul in 143def SSE_INTMUL_ITINS_P : OpndItins< 144 IIC_SSE_INTMUL_P_RR, IIC_SSE_INTMUL_P_RM 145>; 146 147def SSE_INTSHIFT_ITINS_P : ShiftOpndItins< 148 IIC_SSE_INTSH_P_RR, IIC_SSE_INTSH_P_RM, IIC_SSE_INTSH_P_RI 149>; 150 151def SSE_MOVA_ITINS : OpndItins< 152 IIC_SSE_MOVA_P_RR, IIC_SSE_MOVA_P_RM 153>; 154 155def SSE_MOVU_ITINS : OpndItins< 156 IIC_SSE_MOVU_P_RR, IIC_SSE_MOVU_P_RM 157>; 158 159def SSE_DPPD_ITINS : OpndItins< 160 IIC_SSE_DPPD_RR, IIC_SSE_DPPD_RM 161>; 162 163def SSE_DPPS_ITINS : OpndItins< 164 IIC_SSE_DPPS_RR, IIC_SSE_DPPD_RM 165>; 166 167def DEFAULT_ITINS : OpndItins< 168 IIC_ALU_NONMEM, IIC_ALU_MEM 169>; 170 171def SSE_EXTRACT_ITINS : OpndItins< 172 IIC_SSE_EXTRACTPS_RR, IIC_SSE_EXTRACTPS_RM 173>; 174 175def SSE_INSERT_ITINS : OpndItins< 176 IIC_SSE_INSERTPS_RR, IIC_SSE_INSERTPS_RM 177>; 178 179let Sched = WriteMPSAD in 180def SSE_MPSADBW_ITINS : OpndItins< 181 IIC_SSE_MPSADBW_RR, IIC_SSE_MPSADBW_RM 182>; 183 184def SSE_PMULLD_ITINS : OpndItins< 185 IIC_SSE_PMULLD_RR, IIC_SSE_PMULLD_RM 186>; 187 188// Definitions for backward compatibility. 189// The instructions mapped on these definitions uses a different itinerary 190// than the actual scheduling model. 191let Sched = WriteShuffle in 192def DEFAULT_ITINS_SHUFFLESCHED : OpndItins< 193 IIC_ALU_NONMEM, IIC_ALU_MEM 194>; 195 196let Sched = WriteVecIMul in 197def DEFAULT_ITINS_VECIMULSCHED : OpndItins< 198 IIC_ALU_NONMEM, IIC_ALU_MEM 199>; 200 201let Sched = WriteShuffle in 202def SSE_INTALU_ITINS_SHUFF_P : OpndItins< 203 IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM 204>; 205 206let Sched = WriteMPSAD in 207def DEFAULT_ITINS_MPSADSCHED : OpndItins< 208 IIC_ALU_NONMEM, IIC_ALU_MEM 209>; 210 211let Sched = WriteFBlend in 212def DEFAULT_ITINS_FBLENDSCHED : OpndItins< 213 IIC_ALU_NONMEM, IIC_ALU_MEM 214>; 215 216let Sched = WriteBlend in 217def DEFAULT_ITINS_BLENDSCHED : OpndItins< 218 IIC_ALU_NONMEM, IIC_ALU_MEM 219>; 220 221let Sched = WriteFBlend in 222def SSE_INTALU_ITINS_FBLEND_P : OpndItins< 223 IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM 224>; 225 226//===----------------------------------------------------------------------===// 227// SSE 1 & 2 Instructions Classes 228//===----------------------------------------------------------------------===// 229 230/// sse12_fp_scalar - SSE 1 & 2 scalar instructions class 231multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode, 232 RegisterClass RC, X86MemOperand x86memop, 233 OpndItins itins, 234 bit Is2Addr = 1> { 235 let isCommutable = 1 in { 236 def rr : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 237 !if(Is2Addr, 238 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 239 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 240 [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], itins.rr>, 241 Sched<[itins.Sched]>; 242 } 243 def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 244 !if(Is2Addr, 245 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 246 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 247 [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], itins.rm>, 248 Sched<[itins.Sched.Folded, ReadAfterLd]>; 249} 250 251/// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class 252multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC, 253 string asm, string SSEVer, string FPSizeStr, 254 Operand memopr, ComplexPattern mem_cpat, 255 OpndItins itins, 256 bit Is2Addr = 1> { 257let isCodeGenOnly = 1 in { 258 def rr_Int : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 259 !if(Is2Addr, 260 !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), 261 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 262 [(set RC:$dst, (!cast<Intrinsic>( 263 !strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr)) 264 RC:$src1, RC:$src2))], itins.rr>, 265 Sched<[itins.Sched]>; 266 def rm_Int : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2), 267 !if(Is2Addr, 268 !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), 269 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 270 [(set RC:$dst, (!cast<Intrinsic>(!strconcat("int_x86_sse", 271 SSEVer, "_", OpcodeStr, FPSizeStr)) 272 RC:$src1, mem_cpat:$src2))], itins.rm>, 273 Sched<[itins.Sched.Folded, ReadAfterLd]>; 274} 275} 276 277/// sse12_fp_packed - SSE 1 & 2 packed instructions class 278multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode, 279 RegisterClass RC, ValueType vt, 280 X86MemOperand x86memop, PatFrag mem_frag, 281 Domain d, OpndItins itins, bit Is2Addr = 1> { 282 let isCommutable = 1 in 283 def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 284 !if(Is2Addr, 285 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 286 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 287 [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>, 288 Sched<[itins.Sched]>; 289 let mayLoad = 1 in 290 def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 291 !if(Is2Addr, 292 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 293 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 294 [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))], 295 itins.rm, d>, 296 Sched<[itins.Sched.Folded, ReadAfterLd]>; 297} 298 299/// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class 300multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d, 301 string OpcodeStr, X86MemOperand x86memop, 302 list<dag> pat_rr, list<dag> pat_rm, 303 bit Is2Addr = 1> { 304 let isCommutable = 1, hasSideEffects = 0 in 305 def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 306 !if(Is2Addr, 307 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 308 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 309 pat_rr, NoItinerary, d>, 310 Sched<[WriteVecLogic]>; 311 def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 312 !if(Is2Addr, 313 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 314 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 315 pat_rm, NoItinerary, d>, 316 Sched<[WriteVecLogicLd, ReadAfterLd]>; 317} 318 319//===----------------------------------------------------------------------===// 320// Non-instruction patterns 321//===----------------------------------------------------------------------===// 322 323// A vector extract of the first f32/f64 position is a subregister copy 324def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), 325 (COPY_TO_REGCLASS (v4f32 VR128:$src), FR32)>; 326def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), 327 (COPY_TO_REGCLASS (v2f64 VR128:$src), FR64)>; 328 329// A 128-bit subvector extract from the first 256-bit vector position 330// is a subregister copy that needs no instruction. 331def : Pat<(v4i32 (extract_subvector (v8i32 VR256:$src), (iPTR 0))), 332 (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm))>; 333def : Pat<(v4f32 (extract_subvector (v8f32 VR256:$src), (iPTR 0))), 334 (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm))>; 335 336def : Pat<(v2i64 (extract_subvector (v4i64 VR256:$src), (iPTR 0))), 337 (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm))>; 338def : Pat<(v2f64 (extract_subvector (v4f64 VR256:$src), (iPTR 0))), 339 (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm))>; 340 341def : Pat<(v8i16 (extract_subvector (v16i16 VR256:$src), (iPTR 0))), 342 (v8i16 (EXTRACT_SUBREG (v16i16 VR256:$src), sub_xmm))>; 343def : Pat<(v16i8 (extract_subvector (v32i8 VR256:$src), (iPTR 0))), 344 (v16i8 (EXTRACT_SUBREG (v32i8 VR256:$src), sub_xmm))>; 345 346// A 128-bit subvector insert to the first 256-bit vector position 347// is a subregister copy that needs no instruction. 348let AddedComplexity = 25 in { // to give priority over vinsertf128rm 349def : Pat<(insert_subvector undef, (v2i64 VR128:$src), (iPTR 0)), 350 (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; 351def : Pat<(insert_subvector undef, (v2f64 VR128:$src), (iPTR 0)), 352 (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; 353def : Pat<(insert_subvector undef, (v4i32 VR128:$src), (iPTR 0)), 354 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; 355def : Pat<(insert_subvector undef, (v4f32 VR128:$src), (iPTR 0)), 356 (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; 357def : Pat<(insert_subvector undef, (v8i16 VR128:$src), (iPTR 0)), 358 (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; 359def : Pat<(insert_subvector undef, (v16i8 VR128:$src), (iPTR 0)), 360 (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; 361} 362 363// Implicitly promote a 32-bit scalar to a vector. 364def : Pat<(v4f32 (scalar_to_vector FR32:$src)), 365 (COPY_TO_REGCLASS FR32:$src, VR128)>; 366def : Pat<(v8f32 (scalar_to_vector FR32:$src)), 367 (COPY_TO_REGCLASS FR32:$src, VR128)>; 368// Implicitly promote a 64-bit scalar to a vector. 369def : Pat<(v2f64 (scalar_to_vector FR64:$src)), 370 (COPY_TO_REGCLASS FR64:$src, VR128)>; 371def : Pat<(v4f64 (scalar_to_vector FR64:$src)), 372 (COPY_TO_REGCLASS FR64:$src, VR128)>; 373 374// Bitcasts between 128-bit vector types. Return the original type since 375// no instruction is needed for the conversion 376let Predicates = [HasSSE2] in { 377 def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>; 378 def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>; 379 def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>; 380 def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>; 381 def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>; 382 def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>; 383 def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>; 384 def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>; 385 def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>; 386 def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>; 387 def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>; 388 def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>; 389 def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>; 390 def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>; 391 def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>; 392 def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>; 393 def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>; 394 def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>; 395 def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>; 396 def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>; 397 def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>; 398 def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>; 399 def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>; 400 def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>; 401 def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>; 402 def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>; 403 def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>; 404 def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>; 405 def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>; 406 def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>; 407} 408 409// Bitcasts between 256-bit vector types. Return the original type since 410// no instruction is needed for the conversion 411let Predicates = [HasAVX] in { 412 def : Pat<(v4f64 (bitconvert (v8f32 VR256:$src))), (v4f64 VR256:$src)>; 413 def : Pat<(v4f64 (bitconvert (v8i32 VR256:$src))), (v4f64 VR256:$src)>; 414 def : Pat<(v4f64 (bitconvert (v4i64 VR256:$src))), (v4f64 VR256:$src)>; 415 def : Pat<(v4f64 (bitconvert (v16i16 VR256:$src))), (v4f64 VR256:$src)>; 416 def : Pat<(v4f64 (bitconvert (v32i8 VR256:$src))), (v4f64 VR256:$src)>; 417 def : Pat<(v8f32 (bitconvert (v8i32 VR256:$src))), (v8f32 VR256:$src)>; 418 def : Pat<(v8f32 (bitconvert (v4i64 VR256:$src))), (v8f32 VR256:$src)>; 419 def : Pat<(v8f32 (bitconvert (v4f64 VR256:$src))), (v8f32 VR256:$src)>; 420 def : Pat<(v8f32 (bitconvert (v32i8 VR256:$src))), (v8f32 VR256:$src)>; 421 def : Pat<(v8f32 (bitconvert (v16i16 VR256:$src))), (v8f32 VR256:$src)>; 422 def : Pat<(v4i64 (bitconvert (v8f32 VR256:$src))), (v4i64 VR256:$src)>; 423 def : Pat<(v4i64 (bitconvert (v8i32 VR256:$src))), (v4i64 VR256:$src)>; 424 def : Pat<(v4i64 (bitconvert (v4f64 VR256:$src))), (v4i64 VR256:$src)>; 425 def : Pat<(v4i64 (bitconvert (v32i8 VR256:$src))), (v4i64 VR256:$src)>; 426 def : Pat<(v4i64 (bitconvert (v16i16 VR256:$src))), (v4i64 VR256:$src)>; 427 def : Pat<(v32i8 (bitconvert (v4f64 VR256:$src))), (v32i8 VR256:$src)>; 428 def : Pat<(v32i8 (bitconvert (v4i64 VR256:$src))), (v32i8 VR256:$src)>; 429 def : Pat<(v32i8 (bitconvert (v8f32 VR256:$src))), (v32i8 VR256:$src)>; 430 def : Pat<(v32i8 (bitconvert (v8i32 VR256:$src))), (v32i8 VR256:$src)>; 431 def : Pat<(v32i8 (bitconvert (v16i16 VR256:$src))), (v32i8 VR256:$src)>; 432 def : Pat<(v8i32 (bitconvert (v32i8 VR256:$src))), (v8i32 VR256:$src)>; 433 def : Pat<(v8i32 (bitconvert (v16i16 VR256:$src))), (v8i32 VR256:$src)>; 434 def : Pat<(v8i32 (bitconvert (v8f32 VR256:$src))), (v8i32 VR256:$src)>; 435 def : Pat<(v8i32 (bitconvert (v4i64 VR256:$src))), (v8i32 VR256:$src)>; 436 def : Pat<(v8i32 (bitconvert (v4f64 VR256:$src))), (v8i32 VR256:$src)>; 437 def : Pat<(v16i16 (bitconvert (v8f32 VR256:$src))), (v16i16 VR256:$src)>; 438 def : Pat<(v16i16 (bitconvert (v8i32 VR256:$src))), (v16i16 VR256:$src)>; 439 def : Pat<(v16i16 (bitconvert (v4i64 VR256:$src))), (v16i16 VR256:$src)>; 440 def : Pat<(v16i16 (bitconvert (v4f64 VR256:$src))), (v16i16 VR256:$src)>; 441 def : Pat<(v16i16 (bitconvert (v32i8 VR256:$src))), (v16i16 VR256:$src)>; 442} 443 444// Alias instructions that map fld0 to xorps for sse or vxorps for avx. 445// This is expanded by ExpandPostRAPseudos. 446let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 447 isPseudo = 1, SchedRW = [WriteZero] in { 448 def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "", 449 [(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1]>; 450 def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "", 451 [(set FR64:$dst, fpimm0)]>, Requires<[HasSSE2]>; 452} 453 454//===----------------------------------------------------------------------===// 455// AVX & SSE - Zero/One Vectors 456//===----------------------------------------------------------------------===// 457 458// Alias instruction that maps zero vector to pxor / xorp* for sse. 459// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then 460// swizzled by ExecutionDepsFix to pxor. 461// We set canFoldAsLoad because this can be converted to a constant-pool 462// load of an all-zeros value if folding it would be beneficial. 463let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 464 isPseudo = 1, SchedRW = [WriteZero] in { 465def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "", 466 [(set VR128:$dst, (v4f32 immAllZerosV))]>; 467} 468 469def : Pat<(v2f64 immAllZerosV), (V_SET0)>; 470def : Pat<(v4i32 immAllZerosV), (V_SET0)>; 471def : Pat<(v2i64 immAllZerosV), (V_SET0)>; 472def : Pat<(v8i16 immAllZerosV), (V_SET0)>; 473def : Pat<(v16i8 immAllZerosV), (V_SET0)>; 474 475 476// The same as done above but for AVX. The 256-bit AVX1 ISA doesn't support PI, 477// and doesn't need it because on sandy bridge the register is set to zero 478// at the rename stage without using any execution unit, so SET0PSY 479// and SET0PDY can be used for vector int instructions without penalty 480let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 481 isPseudo = 1, Predicates = [HasAVX], SchedRW = [WriteZero] in { 482def AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "", 483 [(set VR256:$dst, (v8f32 immAllZerosV))]>; 484} 485 486let Predicates = [HasAVX] in 487 def : Pat<(v4f64 immAllZerosV), (AVX_SET0)>; 488 489let Predicates = [HasAVX2] in { 490 def : Pat<(v4i64 immAllZerosV), (AVX_SET0)>; 491 def : Pat<(v8i32 immAllZerosV), (AVX_SET0)>; 492 def : Pat<(v16i16 immAllZerosV), (AVX_SET0)>; 493 def : Pat<(v32i8 immAllZerosV), (AVX_SET0)>; 494} 495 496// AVX1 has no support for 256-bit integer instructions, but since the 128-bit 497// VPXOR instruction writes zero to its upper part, it's safe build zeros. 498let Predicates = [HasAVX1Only] in { 499def : Pat<(v32i8 immAllZerosV), (SUBREG_TO_REG (i8 0), (V_SET0), sub_xmm)>; 500def : Pat<(bc_v32i8 (v8f32 immAllZerosV)), 501 (SUBREG_TO_REG (i8 0), (V_SET0), sub_xmm)>; 502 503def : Pat<(v16i16 immAllZerosV), (SUBREG_TO_REG (i16 0), (V_SET0), sub_xmm)>; 504def : Pat<(bc_v16i16 (v8f32 immAllZerosV)), 505 (SUBREG_TO_REG (i16 0), (V_SET0), sub_xmm)>; 506 507def : Pat<(v8i32 immAllZerosV), (SUBREG_TO_REG (i32 0), (V_SET0), sub_xmm)>; 508def : Pat<(bc_v8i32 (v8f32 immAllZerosV)), 509 (SUBREG_TO_REG (i32 0), (V_SET0), sub_xmm)>; 510 511def : Pat<(v4i64 immAllZerosV), (SUBREG_TO_REG (i64 0), (V_SET0), sub_xmm)>; 512def : Pat<(bc_v4i64 (v8f32 immAllZerosV)), 513 (SUBREG_TO_REG (i64 0), (V_SET0), sub_xmm)>; 514} 515 516// We set canFoldAsLoad because this can be converted to a constant-pool 517// load of an all-ones value if folding it would be beneficial. 518let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 519 isPseudo = 1, SchedRW = [WriteZero] in { 520 def V_SETALLONES : I<0, Pseudo, (outs VR128:$dst), (ins), "", 521 [(set VR128:$dst, (v4i32 immAllOnesV))]>; 522 let Predicates = [HasAVX2] in 523 def AVX2_SETALLONES : I<0, Pseudo, (outs VR256:$dst), (ins), "", 524 [(set VR256:$dst, (v8i32 immAllOnesV))]>; 525} 526 527 528//===----------------------------------------------------------------------===// 529// SSE 1 & 2 - Move FP Scalar Instructions 530// 531// Move Instructions. Register-to-register movss/movsd is not used for FR32/64 532// register copies because it's a partial register update; Register-to-register 533// movss/movsd is not modeled as an INSERT_SUBREG because INSERT_SUBREG requires 534// that the insert be implementable in terms of a copy, and just mentioned, we 535// don't use movss/movsd for copies. 536//===----------------------------------------------------------------------===// 537 538multiclass sse12_move_rr<RegisterClass RC, SDNode OpNode, ValueType vt, 539 X86MemOperand x86memop, string base_opc, 540 string asm_opr> { 541 def rr : SI<0x10, MRMSrcReg, (outs VR128:$dst), 542 (ins VR128:$src1, RC:$src2), 543 !strconcat(base_opc, asm_opr), 544 [(set VR128:$dst, (vt (OpNode VR128:$src1, 545 (scalar_to_vector RC:$src2))))], 546 IIC_SSE_MOV_S_RR>, Sched<[WriteFShuffle]>; 547 548 // For the disassembler 549 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in 550 def rr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), 551 (ins VR128:$src1, RC:$src2), 552 !strconcat(base_opc, asm_opr), 553 [], IIC_SSE_MOV_S_RR>, Sched<[WriteFShuffle]>; 554} 555 556multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt, 557 X86MemOperand x86memop, string OpcodeStr> { 558 // AVX 559 defm V#NAME : sse12_move_rr<RC, OpNode, vt, x86memop, OpcodeStr, 560 "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, 561 VEX_4V, VEX_LIG; 562 563 def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), 564 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 565 [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>, 566 VEX, VEX_LIG, Sched<[WriteStore]>; 567 // SSE1 & 2 568 let Constraints = "$src1 = $dst" in { 569 defm NAME : sse12_move_rr<RC, OpNode, vt, x86memop, OpcodeStr, 570 "\t{$src2, $dst|$dst, $src2}">; 571 } 572 573 def NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), 574 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 575 [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>, 576 Sched<[WriteStore]>; 577} 578 579// Loading from memory automatically zeroing upper bits. 580multiclass sse12_move_rm<RegisterClass RC, X86MemOperand x86memop, 581 PatFrag mem_pat, string OpcodeStr> { 582 def V#NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 583 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 584 [(set RC:$dst, (mem_pat addr:$src))], 585 IIC_SSE_MOV_S_RM>, VEX, VEX_LIG, Sched<[WriteLoad]>; 586 def NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 587 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 588 [(set RC:$dst, (mem_pat addr:$src))], 589 IIC_SSE_MOV_S_RM>, Sched<[WriteLoad]>; 590} 591 592defm MOVSS : sse12_move<FR32, X86Movss, v4f32, f32mem, "movss">, XS; 593defm MOVSD : sse12_move<FR64, X86Movsd, v2f64, f64mem, "movsd">, XD; 594 595let canFoldAsLoad = 1, isReMaterializable = 1 in { 596 defm MOVSS : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS; 597 598 let AddedComplexity = 20 in 599 defm MOVSD : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD; 600} 601 602// Patterns 603let Predicates = [UseAVX] in { 604 let AddedComplexity = 15 in { 605 // Move scalar to XMM zero-extended, zeroing a VR128 then do a 606 // MOVS{S,D} to the lower bits. 607 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))), 608 (VMOVSSrr (v4f32 (V_SET0)), FR32:$src)>; 609 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), 610 (VMOVSSrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>; 611 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), 612 (VMOVSSrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>; 613 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))), 614 (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)>; 615 616 // Move low f32 and clear high bits. 617 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))), 618 (SUBREG_TO_REG (i32 0), 619 (VMOVSSrr (v4f32 (V_SET0)), 620 (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm)), sub_xmm)>; 621 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))), 622 (SUBREG_TO_REG (i32 0), 623 (VMOVSSrr (v4i32 (V_SET0)), 624 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)), sub_xmm)>; 625 } 626 627 let AddedComplexity = 20 in { 628 // MOVSSrm zeros the high parts of the register; represent this 629 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 630 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), 631 (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>; 632 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), 633 (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>; 634 def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), 635 (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>; 636 637 // MOVSDrm zeros the high parts of the register; represent this 638 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 639 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), 640 (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>; 641 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), 642 (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>; 643 def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), 644 (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>; 645 def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), 646 (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>; 647 def : Pat<(v2f64 (X86vzload addr:$src)), 648 (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>; 649 650 // Represent the same patterns above but in the form they appear for 651 // 256-bit types 652 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, 653 (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))), 654 (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>; 655 def : Pat<(v8f32 (X86vzmovl (insert_subvector undef, 656 (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))), 657 (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>; 658 def : Pat<(v4f64 (X86vzmovl (insert_subvector undef, 659 (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))), 660 (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>; 661 } 662 def : Pat<(v8f32 (X86vzmovl (insert_subvector undef, 663 (v4f32 (scalar_to_vector FR32:$src)), (iPTR 0)))), 664 (SUBREG_TO_REG (i32 0), 665 (v4f32 (VMOVSSrr (v4f32 (V_SET0)), FR32:$src)), 666 sub_xmm)>; 667 def : Pat<(v4f64 (X86vzmovl (insert_subvector undef, 668 (v2f64 (scalar_to_vector FR64:$src)), (iPTR 0)))), 669 (SUBREG_TO_REG (i64 0), 670 (v2f64 (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)), 671 sub_xmm)>; 672 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, 673 (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))), 674 (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_xmm)>; 675 676 // Move low f64 and clear high bits. 677 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))), 678 (SUBREG_TO_REG (i32 0), 679 (VMOVSDrr (v2f64 (V_SET0)), 680 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm)), sub_xmm)>; 681 682 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))), 683 (SUBREG_TO_REG (i32 0), 684 (VMOVSDrr (v2i64 (V_SET0)), 685 (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm)), sub_xmm)>; 686 687 // Extract and store. 688 def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), 689 addr:$dst), 690 (VMOVSSmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128:$src), FR32))>; 691 def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), 692 addr:$dst), 693 (VMOVSDmr addr:$dst, (COPY_TO_REGCLASS (v2f64 VR128:$src), FR64))>; 694 695 // Shuffle with VMOVSS 696 def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)), 697 (VMOVSSrr (v4i32 VR128:$src1), 698 (COPY_TO_REGCLASS (v4i32 VR128:$src2), FR32))>; 699 def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)), 700 (VMOVSSrr (v4f32 VR128:$src1), 701 (COPY_TO_REGCLASS (v4f32 VR128:$src2), FR32))>; 702 703 // 256-bit variants 704 def : Pat<(v8i32 (X86Movss VR256:$src1, VR256:$src2)), 705 (SUBREG_TO_REG (i32 0), 706 (VMOVSSrr (EXTRACT_SUBREG (v8i32 VR256:$src1), sub_xmm), 707 (EXTRACT_SUBREG (v8i32 VR256:$src2), sub_xmm)), 708 sub_xmm)>; 709 def : Pat<(v8f32 (X86Movss VR256:$src1, VR256:$src2)), 710 (SUBREG_TO_REG (i32 0), 711 (VMOVSSrr (EXTRACT_SUBREG (v8f32 VR256:$src1), sub_xmm), 712 (EXTRACT_SUBREG (v8f32 VR256:$src2), sub_xmm)), 713 sub_xmm)>; 714 715 // Shuffle with VMOVSD 716 def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)), 717 (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 718 def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)), 719 (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 720 def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)), 721 (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 722 def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)), 723 (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 724 725 // 256-bit variants 726 def : Pat<(v4i64 (X86Movsd VR256:$src1, VR256:$src2)), 727 (SUBREG_TO_REG (i32 0), 728 (VMOVSDrr (EXTRACT_SUBREG (v4i64 VR256:$src1), sub_xmm), 729 (EXTRACT_SUBREG (v4i64 VR256:$src2), sub_xmm)), 730 sub_xmm)>; 731 def : Pat<(v4f64 (X86Movsd VR256:$src1, VR256:$src2)), 732 (SUBREG_TO_REG (i32 0), 733 (VMOVSDrr (EXTRACT_SUBREG (v4f64 VR256:$src1), sub_xmm), 734 (EXTRACT_SUBREG (v4f64 VR256:$src2), sub_xmm)), 735 sub_xmm)>; 736 737 738 // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem 739 // is during lowering, where it's not possible to recognize the fold cause 740 // it has two uses through a bitcast. One use disappears at isel time and the 741 // fold opportunity reappears. 742 def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)), 743 (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 744 def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)), 745 (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 746 def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)), 747 (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 748 def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)), 749 (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 750} 751 752let Predicates = [UseSSE1] in { 753 let AddedComplexity = 15 in { 754 // Move scalar to XMM zero-extended, zeroing a VR128 then do a 755 // MOVSS to the lower bits. 756 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))), 757 (MOVSSrr (v4f32 (V_SET0)), FR32:$src)>; 758 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), 759 (MOVSSrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>; 760 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), 761 (MOVSSrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>; 762 } 763 764 let AddedComplexity = 20 in { 765 // MOVSSrm already zeros the high parts of the register. 766 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), 767 (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>; 768 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), 769 (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>; 770 def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), 771 (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>; 772 } 773 774 // Extract and store. 775 def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), 776 addr:$dst), 777 (MOVSSmr addr:$dst, (COPY_TO_REGCLASS VR128:$src, FR32))>; 778 779 // Shuffle with MOVSS 780 def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)), 781 (MOVSSrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR32))>; 782 def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)), 783 (MOVSSrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR32))>; 784} 785 786let Predicates = [UseSSE2] in { 787 let AddedComplexity = 15 in { 788 // Move scalar to XMM zero-extended, zeroing a VR128 then do a 789 // MOVSD to the lower bits. 790 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))), 791 (MOVSDrr (v2f64 (V_SET0)), FR64:$src)>; 792 } 793 794 let AddedComplexity = 20 in { 795 // MOVSDrm already zeros the high parts of the register. 796 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), 797 (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>; 798 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), 799 (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>; 800 def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), 801 (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>; 802 def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), 803 (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>; 804 def : Pat<(v2f64 (X86vzload addr:$src)), 805 (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>; 806 } 807 808 // Extract and store. 809 def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), 810 addr:$dst), 811 (MOVSDmr addr:$dst, (COPY_TO_REGCLASS VR128:$src, FR64))>; 812 813 // Shuffle with MOVSD 814 def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)), 815 (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 816 def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)), 817 (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 818 def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)), 819 (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 820 def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)), 821 (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 822 823 // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem 824 // is during lowering, where it's not possible to recognize the fold cause 825 // it has two uses through a bitcast. One use disappears at isel time and the 826 // fold opportunity reappears. 827 def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)), 828 (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 829 def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)), 830 (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 831 def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)), 832 (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 833 def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)), 834 (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 835} 836 837//===----------------------------------------------------------------------===// 838// SSE 1 & 2 - Move Aligned/Unaligned FP Instructions 839//===----------------------------------------------------------------------===// 840 841multiclass sse12_mov_packed<bits<8> opc, RegisterClass RC, 842 X86MemOperand x86memop, PatFrag ld_frag, 843 string asm, Domain d, 844 OpndItins itins, 845 bit IsReMaterializable = 1> { 846let neverHasSideEffects = 1 in 847 def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), 848 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], itins.rr, d>, 849 Sched<[WriteFShuffle]>; 850let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in 851 def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 852 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 853 [(set RC:$dst, (ld_frag addr:$src))], itins.rm, d>, 854 Sched<[WriteLoad]>; 855} 856 857defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, 858 "movaps", SSEPackedSingle, SSE_MOVA_ITINS>, 859 PS, VEX; 860defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, 861 "movapd", SSEPackedDouble, SSE_MOVA_ITINS>, 862 PD, VEX; 863defm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, 864 "movups", SSEPackedSingle, SSE_MOVU_ITINS>, 865 PS, VEX; 866defm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, 867 "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>, 868 PD, VEX; 869 870defm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32, 871 "movaps", SSEPackedSingle, SSE_MOVA_ITINS>, 872 PS, VEX, VEX_L; 873defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64, 874 "movapd", SSEPackedDouble, SSE_MOVA_ITINS>, 875 PD, VEX, VEX_L; 876defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32, 877 "movups", SSEPackedSingle, SSE_MOVU_ITINS>, 878 PS, VEX, VEX_L; 879defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, 880 "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>, 881 PD, VEX, VEX_L; 882defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, 883 "movaps", SSEPackedSingle, SSE_MOVA_ITINS>, 884 PS; 885defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, 886 "movapd", SSEPackedDouble, SSE_MOVA_ITINS>, 887 PD; 888defm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, 889 "movups", SSEPackedSingle, SSE_MOVU_ITINS>, 890 PS; 891defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, 892 "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>, 893 PD; 894 895let SchedRW = [WriteStore] in { 896def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 897 "movaps\t{$src, $dst|$dst, $src}", 898 [(alignedstore (v4f32 VR128:$src), addr:$dst)], 899 IIC_SSE_MOVA_P_MR>, VEX; 900def VMOVAPDmr : VPDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 901 "movapd\t{$src, $dst|$dst, $src}", 902 [(alignedstore (v2f64 VR128:$src), addr:$dst)], 903 IIC_SSE_MOVA_P_MR>, VEX; 904def VMOVUPSmr : VPSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 905 "movups\t{$src, $dst|$dst, $src}", 906 [(store (v4f32 VR128:$src), addr:$dst)], 907 IIC_SSE_MOVU_P_MR>, VEX; 908def VMOVUPDmr : VPDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 909 "movupd\t{$src, $dst|$dst, $src}", 910 [(store (v2f64 VR128:$src), addr:$dst)], 911 IIC_SSE_MOVU_P_MR>, VEX; 912def VMOVAPSYmr : VPSI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), 913 "movaps\t{$src, $dst|$dst, $src}", 914 [(alignedstore256 (v8f32 VR256:$src), addr:$dst)], 915 IIC_SSE_MOVA_P_MR>, VEX, VEX_L; 916def VMOVAPDYmr : VPDI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), 917 "movapd\t{$src, $dst|$dst, $src}", 918 [(alignedstore256 (v4f64 VR256:$src), addr:$dst)], 919 IIC_SSE_MOVA_P_MR>, VEX, VEX_L; 920def VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), 921 "movups\t{$src, $dst|$dst, $src}", 922 [(store (v8f32 VR256:$src), addr:$dst)], 923 IIC_SSE_MOVU_P_MR>, VEX, VEX_L; 924def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), 925 "movupd\t{$src, $dst|$dst, $src}", 926 [(store (v4f64 VR256:$src), addr:$dst)], 927 IIC_SSE_MOVU_P_MR>, VEX, VEX_L; 928} // SchedRW 929 930// For disassembler 931let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, 932 SchedRW = [WriteFShuffle] in { 933 def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst), 934 (ins VR128:$src), 935 "movaps\t{$src, $dst|$dst, $src}", [], 936 IIC_SSE_MOVA_P_RR>, VEX; 937 def VMOVAPDrr_REV : VPDI<0x29, MRMDestReg, (outs VR128:$dst), 938 (ins VR128:$src), 939 "movapd\t{$src, $dst|$dst, $src}", [], 940 IIC_SSE_MOVA_P_RR>, VEX; 941 def VMOVUPSrr_REV : VPSI<0x11, MRMDestReg, (outs VR128:$dst), 942 (ins VR128:$src), 943 "movups\t{$src, $dst|$dst, $src}", [], 944 IIC_SSE_MOVU_P_RR>, VEX; 945 def VMOVUPDrr_REV : VPDI<0x11, MRMDestReg, (outs VR128:$dst), 946 (ins VR128:$src), 947 "movupd\t{$src, $dst|$dst, $src}", [], 948 IIC_SSE_MOVU_P_RR>, VEX; 949 def VMOVAPSYrr_REV : VPSI<0x29, MRMDestReg, (outs VR256:$dst), 950 (ins VR256:$src), 951 "movaps\t{$src, $dst|$dst, $src}", [], 952 IIC_SSE_MOVA_P_RR>, VEX, VEX_L; 953 def VMOVAPDYrr_REV : VPDI<0x29, MRMDestReg, (outs VR256:$dst), 954 (ins VR256:$src), 955 "movapd\t{$src, $dst|$dst, $src}", [], 956 IIC_SSE_MOVA_P_RR>, VEX, VEX_L; 957 def VMOVUPSYrr_REV : VPSI<0x11, MRMDestReg, (outs VR256:$dst), 958 (ins VR256:$src), 959 "movups\t{$src, $dst|$dst, $src}", [], 960 IIC_SSE_MOVU_P_RR>, VEX, VEX_L; 961 def VMOVUPDYrr_REV : VPDI<0x11, MRMDestReg, (outs VR256:$dst), 962 (ins VR256:$src), 963 "movupd\t{$src, $dst|$dst, $src}", [], 964 IIC_SSE_MOVU_P_RR>, VEX, VEX_L; 965} 966 967let Predicates = [HasAVX] in { 968def : Pat<(v8i32 (X86vzmovl 969 (insert_subvector undef, (v4i32 VR128:$src), (iPTR 0)))), 970 (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>; 971def : Pat<(v4i64 (X86vzmovl 972 (insert_subvector undef, (v2i64 VR128:$src), (iPTR 0)))), 973 (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>; 974def : Pat<(v8f32 (X86vzmovl 975 (insert_subvector undef, (v4f32 VR128:$src), (iPTR 0)))), 976 (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>; 977def : Pat<(v4f64 (X86vzmovl 978 (insert_subvector undef, (v2f64 VR128:$src), (iPTR 0)))), 979 (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>; 980} 981 982 983def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src), 984 (VMOVUPSYmr addr:$dst, VR256:$src)>; 985def : Pat<(int_x86_avx_storeu_pd_256 addr:$dst, VR256:$src), 986 (VMOVUPDYmr addr:$dst, VR256:$src)>; 987 988let SchedRW = [WriteStore] in { 989def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 990 "movaps\t{$src, $dst|$dst, $src}", 991 [(alignedstore (v4f32 VR128:$src), addr:$dst)], 992 IIC_SSE_MOVA_P_MR>; 993def MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 994 "movapd\t{$src, $dst|$dst, $src}", 995 [(alignedstore (v2f64 VR128:$src), addr:$dst)], 996 IIC_SSE_MOVA_P_MR>; 997def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 998 "movups\t{$src, $dst|$dst, $src}", 999 [(store (v4f32 VR128:$src), addr:$dst)], 1000 IIC_SSE_MOVU_P_MR>; 1001def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 1002 "movupd\t{$src, $dst|$dst, $src}", 1003 [(store (v2f64 VR128:$src), addr:$dst)], 1004 IIC_SSE_MOVU_P_MR>; 1005} // SchedRW 1006 1007// For disassembler 1008let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, 1009 SchedRW = [WriteMove] in { 1010 def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 1011 "movaps\t{$src, $dst|$dst, $src}", [], 1012 IIC_SSE_MOVA_P_RR>; 1013 def MOVAPDrr_REV : PDI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 1014 "movapd\t{$src, $dst|$dst, $src}", [], 1015 IIC_SSE_MOVA_P_RR>; 1016 def MOVUPSrr_REV : PSI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 1017 "movups\t{$src, $dst|$dst, $src}", [], 1018 IIC_SSE_MOVU_P_RR>; 1019 def MOVUPDrr_REV : PDI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 1020 "movupd\t{$src, $dst|$dst, $src}", [], 1021 IIC_SSE_MOVU_P_RR>; 1022} 1023 1024let Predicates = [HasAVX] in { 1025 def : Pat<(int_x86_sse_storeu_ps addr:$dst, VR128:$src), 1026 (VMOVUPSmr addr:$dst, VR128:$src)>; 1027 def : Pat<(int_x86_sse2_storeu_pd addr:$dst, VR128:$src), 1028 (VMOVUPDmr addr:$dst, VR128:$src)>; 1029} 1030 1031let Predicates = [UseSSE1] in 1032 def : Pat<(int_x86_sse_storeu_ps addr:$dst, VR128:$src), 1033 (MOVUPSmr addr:$dst, VR128:$src)>; 1034let Predicates = [UseSSE2] in 1035 def : Pat<(int_x86_sse2_storeu_pd addr:$dst, VR128:$src), 1036 (MOVUPDmr addr:$dst, VR128:$src)>; 1037 1038// Use vmovaps/vmovups for AVX integer load/store. 1039let Predicates = [HasAVX] in { 1040 // 128-bit load/store 1041 def : Pat<(alignedloadv2i64 addr:$src), 1042 (VMOVAPSrm addr:$src)>; 1043 def : Pat<(loadv2i64 addr:$src), 1044 (VMOVUPSrm addr:$src)>; 1045 1046 def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst), 1047 (VMOVAPSmr addr:$dst, VR128:$src)>; 1048 def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst), 1049 (VMOVAPSmr addr:$dst, VR128:$src)>; 1050 def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), 1051 (VMOVAPSmr addr:$dst, VR128:$src)>; 1052 def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), 1053 (VMOVAPSmr addr:$dst, VR128:$src)>; 1054 def : Pat<(store (v2i64 VR128:$src), addr:$dst), 1055 (VMOVUPSmr addr:$dst, VR128:$src)>; 1056 def : Pat<(store (v4i32 VR128:$src), addr:$dst), 1057 (VMOVUPSmr addr:$dst, VR128:$src)>; 1058 def : Pat<(store (v8i16 VR128:$src), addr:$dst), 1059 (VMOVUPSmr addr:$dst, VR128:$src)>; 1060 def : Pat<(store (v16i8 VR128:$src), addr:$dst), 1061 (VMOVUPSmr addr:$dst, VR128:$src)>; 1062 1063 // 256-bit load/store 1064 def : Pat<(alignedloadv4i64 addr:$src), 1065 (VMOVAPSYrm addr:$src)>; 1066 def : Pat<(loadv4i64 addr:$src), 1067 (VMOVUPSYrm addr:$src)>; 1068 def : Pat<(alignedstore256 (v4i64 VR256:$src), addr:$dst), 1069 (VMOVAPSYmr addr:$dst, VR256:$src)>; 1070 def : Pat<(alignedstore256 (v8i32 VR256:$src), addr:$dst), 1071 (VMOVAPSYmr addr:$dst, VR256:$src)>; 1072 def : Pat<(alignedstore256 (v16i16 VR256:$src), addr:$dst), 1073 (VMOVAPSYmr addr:$dst, VR256:$src)>; 1074 def : Pat<(alignedstore256 (v32i8 VR256:$src), addr:$dst), 1075 (VMOVAPSYmr addr:$dst, VR256:$src)>; 1076 def : Pat<(store (v4i64 VR256:$src), addr:$dst), 1077 (VMOVUPSYmr addr:$dst, VR256:$src)>; 1078 def : Pat<(store (v8i32 VR256:$src), addr:$dst), 1079 (VMOVUPSYmr addr:$dst, VR256:$src)>; 1080 def : Pat<(store (v16i16 VR256:$src), addr:$dst), 1081 (VMOVUPSYmr addr:$dst, VR256:$src)>; 1082 def : Pat<(store (v32i8 VR256:$src), addr:$dst), 1083 (VMOVUPSYmr addr:$dst, VR256:$src)>; 1084 1085 // Special patterns for storing subvector extracts of lower 128-bits 1086 // Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr 1087 def : Pat<(alignedstore (v2f64 (extract_subvector 1088 (v4f64 VR256:$src), (iPTR 0))), addr:$dst), 1089 (VMOVAPDmr addr:$dst, (v2f64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1090 def : Pat<(alignedstore (v4f32 (extract_subvector 1091 (v8f32 VR256:$src), (iPTR 0))), addr:$dst), 1092 (VMOVAPSmr addr:$dst, (v4f32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1093 def : Pat<(alignedstore (v2i64 (extract_subvector 1094 (v4i64 VR256:$src), (iPTR 0))), addr:$dst), 1095 (VMOVAPDmr addr:$dst, (v2i64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1096 def : Pat<(alignedstore (v4i32 (extract_subvector 1097 (v8i32 VR256:$src), (iPTR 0))), addr:$dst), 1098 (VMOVAPSmr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1099 def : Pat<(alignedstore (v8i16 (extract_subvector 1100 (v16i16 VR256:$src), (iPTR 0))), addr:$dst), 1101 (VMOVAPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1102 def : Pat<(alignedstore (v16i8 (extract_subvector 1103 (v32i8 VR256:$src), (iPTR 0))), addr:$dst), 1104 (VMOVAPSmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1105 1106 def : Pat<(store (v2f64 (extract_subvector 1107 (v4f64 VR256:$src), (iPTR 0))), addr:$dst), 1108 (VMOVUPDmr addr:$dst, (v2f64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1109 def : Pat<(store (v4f32 (extract_subvector 1110 (v8f32 VR256:$src), (iPTR 0))), addr:$dst), 1111 (VMOVUPSmr addr:$dst, (v4f32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1112 def : Pat<(store (v2i64 (extract_subvector 1113 (v4i64 VR256:$src), (iPTR 0))), addr:$dst), 1114 (VMOVUPDmr addr:$dst, (v2i64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1115 def : Pat<(store (v4i32 (extract_subvector 1116 (v8i32 VR256:$src), (iPTR 0))), addr:$dst), 1117 (VMOVUPSmr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1118 def : Pat<(store (v8i16 (extract_subvector 1119 (v16i16 VR256:$src), (iPTR 0))), addr:$dst), 1120 (VMOVUPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1121 def : Pat<(store (v16i8 (extract_subvector 1122 (v32i8 VR256:$src), (iPTR 0))), addr:$dst), 1123 (VMOVUPSmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1124} 1125 1126// Use movaps / movups for SSE integer load / store (one byte shorter). 1127// The instructions selected below are then converted to MOVDQA/MOVDQU 1128// during the SSE domain pass. 1129let Predicates = [UseSSE1] in { 1130 def : Pat<(alignedloadv2i64 addr:$src), 1131 (MOVAPSrm addr:$src)>; 1132 def : Pat<(loadv2i64 addr:$src), 1133 (MOVUPSrm addr:$src)>; 1134 1135 def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst), 1136 (MOVAPSmr addr:$dst, VR128:$src)>; 1137 def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst), 1138 (MOVAPSmr addr:$dst, VR128:$src)>; 1139 def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), 1140 (MOVAPSmr addr:$dst, VR128:$src)>; 1141 def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), 1142 (MOVAPSmr addr:$dst, VR128:$src)>; 1143 def : Pat<(store (v2i64 VR128:$src), addr:$dst), 1144 (MOVUPSmr addr:$dst, VR128:$src)>; 1145 def : Pat<(store (v4i32 VR128:$src), addr:$dst), 1146 (MOVUPSmr addr:$dst, VR128:$src)>; 1147 def : Pat<(store (v8i16 VR128:$src), addr:$dst), 1148 (MOVUPSmr addr:$dst, VR128:$src)>; 1149 def : Pat<(store (v16i8 VR128:$src), addr:$dst), 1150 (MOVUPSmr addr:$dst, VR128:$src)>; 1151} 1152 1153// Alias instruction to load FR32 or FR64 from f128mem using movaps. Upper 1154// bits are disregarded. FIXME: Set encoding to pseudo! 1155let canFoldAsLoad = 1, isReMaterializable = 1, SchedRW = [WriteLoad] in { 1156let isCodeGenOnly = 1 in { 1157 def FsVMOVAPSrm : VPSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src), 1158 "movaps\t{$src, $dst|$dst, $src}", 1159 [(set FR32:$dst, (alignedloadfsf32 addr:$src))], 1160 IIC_SSE_MOVA_P_RM>, VEX; 1161 def FsVMOVAPDrm : VPDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src), 1162 "movapd\t{$src, $dst|$dst, $src}", 1163 [(set FR64:$dst, (alignedloadfsf64 addr:$src))], 1164 IIC_SSE_MOVA_P_RM>, VEX; 1165 def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src), 1166 "movaps\t{$src, $dst|$dst, $src}", 1167 [(set FR32:$dst, (alignedloadfsf32 addr:$src))], 1168 IIC_SSE_MOVA_P_RM>; 1169 def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src), 1170 "movapd\t{$src, $dst|$dst, $src}", 1171 [(set FR64:$dst, (alignedloadfsf64 addr:$src))], 1172 IIC_SSE_MOVA_P_RM>; 1173} 1174} 1175 1176//===----------------------------------------------------------------------===// 1177// SSE 1 & 2 - Move Low packed FP Instructions 1178//===----------------------------------------------------------------------===// 1179 1180multiclass sse12_mov_hilo_packed_base<bits<8>opc, SDNode psnode, SDNode pdnode, 1181 string base_opc, string asm_opr, 1182 InstrItinClass itin> { 1183 def PSrm : PI<opc, MRMSrcMem, 1184 (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), 1185 !strconcat(base_opc, "s", asm_opr), 1186 [(set VR128:$dst, 1187 (psnode VR128:$src1, 1188 (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))], 1189 itin, SSEPackedSingle>, PS, 1190 Sched<[WriteFShuffleLd, ReadAfterLd]>; 1191 1192 def PDrm : PI<opc, MRMSrcMem, 1193 (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), 1194 !strconcat(base_opc, "d", asm_opr), 1195 [(set VR128:$dst, (v2f64 (pdnode VR128:$src1, 1196 (scalar_to_vector (loadf64 addr:$src2)))))], 1197 itin, SSEPackedDouble>, PD, 1198 Sched<[WriteFShuffleLd, ReadAfterLd]>; 1199 1200} 1201 1202multiclass sse12_mov_hilo_packed<bits<8>opc, SDNode psnode, SDNode pdnode, 1203 string base_opc, InstrItinClass itin> { 1204 defm V#NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc, 1205 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1206 itin>, VEX_4V; 1207 1208let Constraints = "$src1 = $dst" in 1209 defm NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc, 1210 "\t{$src2, $dst|$dst, $src2}", 1211 itin>; 1212} 1213 1214let AddedComplexity = 20 in { 1215 defm MOVL : sse12_mov_hilo_packed<0x12, X86Movlps, X86Movlpd, "movlp", 1216 IIC_SSE_MOV_LH>; 1217} 1218 1219let SchedRW = [WriteStore] in { 1220def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 1221 "movlps\t{$src, $dst|$dst, $src}", 1222 [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)), 1223 (iPTR 0))), addr:$dst)], 1224 IIC_SSE_MOV_LH>, VEX; 1225def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 1226 "movlpd\t{$src, $dst|$dst, $src}", 1227 [(store (f64 (vector_extract (v2f64 VR128:$src), 1228 (iPTR 0))), addr:$dst)], 1229 IIC_SSE_MOV_LH>, VEX; 1230def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 1231 "movlps\t{$src, $dst|$dst, $src}", 1232 [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)), 1233 (iPTR 0))), addr:$dst)], 1234 IIC_SSE_MOV_LH>; 1235def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 1236 "movlpd\t{$src, $dst|$dst, $src}", 1237 [(store (f64 (vector_extract (v2f64 VR128:$src), 1238 (iPTR 0))), addr:$dst)], 1239 IIC_SSE_MOV_LH>; 1240} // SchedRW 1241 1242let Predicates = [HasAVX] in { 1243 // Shuffle with VMOVLPS 1244 def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))), 1245 (VMOVLPSrm VR128:$src1, addr:$src2)>; 1246 def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))), 1247 (VMOVLPSrm VR128:$src1, addr:$src2)>; 1248 1249 // Shuffle with VMOVLPD 1250 def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))), 1251 (VMOVLPDrm VR128:$src1, addr:$src2)>; 1252 def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))), 1253 (VMOVLPDrm VR128:$src1, addr:$src2)>; 1254 1255 // Store patterns 1256 def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)), 1257 addr:$src1), 1258 (VMOVLPSmr addr:$src1, VR128:$src2)>; 1259 def : Pat<(store (v4i32 (X86Movlps 1260 (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), addr:$src1), 1261 (VMOVLPSmr addr:$src1, VR128:$src2)>; 1262 def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)), 1263 addr:$src1), 1264 (VMOVLPDmr addr:$src1, VR128:$src2)>; 1265 def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128:$src2)), 1266 addr:$src1), 1267 (VMOVLPDmr addr:$src1, VR128:$src2)>; 1268} 1269 1270let Predicates = [UseSSE1] in { 1271 // (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS 1272 def : Pat<(store (i64 (vector_extract (bc_v2i64 (v4f32 VR128:$src2)), 1273 (iPTR 0))), addr:$src1), 1274 (MOVLPSmr addr:$src1, VR128:$src2)>; 1275 1276 // Shuffle with MOVLPS 1277 def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))), 1278 (MOVLPSrm VR128:$src1, addr:$src2)>; 1279 def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))), 1280 (MOVLPSrm VR128:$src1, addr:$src2)>; 1281 def : Pat<(X86Movlps VR128:$src1, 1282 (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))), 1283 (MOVLPSrm VR128:$src1, addr:$src2)>; 1284 1285 // Store patterns 1286 def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)), 1287 addr:$src1), 1288 (MOVLPSmr addr:$src1, VR128:$src2)>; 1289 def : Pat<(store (v4i32 (X86Movlps 1290 (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), 1291 addr:$src1), 1292 (MOVLPSmr addr:$src1, VR128:$src2)>; 1293} 1294 1295let Predicates = [UseSSE2] in { 1296 // Shuffle with MOVLPD 1297 def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))), 1298 (MOVLPDrm VR128:$src1, addr:$src2)>; 1299 def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))), 1300 (MOVLPDrm VR128:$src1, addr:$src2)>; 1301 1302 // Store patterns 1303 def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)), 1304 addr:$src1), 1305 (MOVLPDmr addr:$src1, VR128:$src2)>; 1306 def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128:$src2)), 1307 addr:$src1), 1308 (MOVLPDmr addr:$src1, VR128:$src2)>; 1309} 1310 1311//===----------------------------------------------------------------------===// 1312// SSE 1 & 2 - Move Hi packed FP Instructions 1313//===----------------------------------------------------------------------===// 1314 1315let AddedComplexity = 20 in { 1316 defm MOVH : sse12_mov_hilo_packed<0x16, X86Movlhps, X86Movlhpd, "movhp", 1317 IIC_SSE_MOV_LH>; 1318} 1319 1320let SchedRW = [WriteStore] in { 1321// v2f64 extract element 1 is always custom lowered to unpack high to low 1322// and extract element 0 so the non-store version isn't too horrible. 1323def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 1324 "movhps\t{$src, $dst|$dst, $src}", 1325 [(store (f64 (vector_extract 1326 (X86Unpckh (bc_v2f64 (v4f32 VR128:$src)), 1327 (bc_v2f64 (v4f32 VR128:$src))), 1328 (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, VEX; 1329def VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 1330 "movhpd\t{$src, $dst|$dst, $src}", 1331 [(store (f64 (vector_extract 1332 (v2f64 (X86Unpckh VR128:$src, VR128:$src)), 1333 (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, VEX; 1334def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 1335 "movhps\t{$src, $dst|$dst, $src}", 1336 [(store (f64 (vector_extract 1337 (X86Unpckh (bc_v2f64 (v4f32 VR128:$src)), 1338 (bc_v2f64 (v4f32 VR128:$src))), 1339 (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>; 1340def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 1341 "movhpd\t{$src, $dst|$dst, $src}", 1342 [(store (f64 (vector_extract 1343 (v2f64 (X86Unpckh VR128:$src, VR128:$src)), 1344 (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>; 1345} // SchedRW 1346 1347let Predicates = [HasAVX] in { 1348 // VMOVHPS patterns 1349 def : Pat<(X86Movlhps VR128:$src1, 1350 (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))), 1351 (VMOVHPSrm VR128:$src1, addr:$src2)>; 1352 def : Pat<(X86Movlhps VR128:$src1, 1353 (bc_v4i32 (v2i64 (X86vzload addr:$src2)))), 1354 (VMOVHPSrm VR128:$src1, addr:$src2)>; 1355 1356 // FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem 1357 // is during lowering, where it's not possible to recognize the load fold 1358 // cause it has two uses through a bitcast. One use disappears at isel time 1359 // and the fold opportunity reappears. 1360 def : Pat<(v2f64 (X86Unpckl VR128:$src1, 1361 (scalar_to_vector (loadf64 addr:$src2)))), 1362 (VMOVHPDrm VR128:$src1, addr:$src2)>; 1363} 1364 1365let Predicates = [UseSSE1] in { 1366 // MOVHPS patterns 1367 def : Pat<(X86Movlhps VR128:$src1, 1368 (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))), 1369 (MOVHPSrm VR128:$src1, addr:$src2)>; 1370 def : Pat<(X86Movlhps VR128:$src1, 1371 (bc_v4f32 (v2i64 (X86vzload addr:$src2)))), 1372 (MOVHPSrm VR128:$src1, addr:$src2)>; 1373} 1374 1375let Predicates = [UseSSE2] in { 1376 // FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem 1377 // is during lowering, where it's not possible to recognize the load fold 1378 // cause it has two uses through a bitcast. One use disappears at isel time 1379 // and the fold opportunity reappears. 1380 def : Pat<(v2f64 (X86Unpckl VR128:$src1, 1381 (scalar_to_vector (loadf64 addr:$src2)))), 1382 (MOVHPDrm VR128:$src1, addr:$src2)>; 1383} 1384 1385//===----------------------------------------------------------------------===// 1386// SSE 1 & 2 - Move Low to High and High to Low packed FP Instructions 1387//===----------------------------------------------------------------------===// 1388 1389let AddedComplexity = 20, Predicates = [UseAVX] in { 1390 def VMOVLHPSrr : VPSI<0x16, MRMSrcReg, (outs VR128:$dst), 1391 (ins VR128:$src1, VR128:$src2), 1392 "movlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1393 [(set VR128:$dst, 1394 (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))], 1395 IIC_SSE_MOV_LH>, 1396 VEX_4V, Sched<[WriteFShuffle]>; 1397 def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst), 1398 (ins VR128:$src1, VR128:$src2), 1399 "movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1400 [(set VR128:$dst, 1401 (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))], 1402 IIC_SSE_MOV_LH>, 1403 VEX_4V, Sched<[WriteFShuffle]>; 1404} 1405let Constraints = "$src1 = $dst", AddedComplexity = 20 in { 1406 def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), 1407 (ins VR128:$src1, VR128:$src2), 1408 "movlhps\t{$src2, $dst|$dst, $src2}", 1409 [(set VR128:$dst, 1410 (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))], 1411 IIC_SSE_MOV_LH>, Sched<[WriteFShuffle]>; 1412 def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), 1413 (ins VR128:$src1, VR128:$src2), 1414 "movhlps\t{$src2, $dst|$dst, $src2}", 1415 [(set VR128:$dst, 1416 (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))], 1417 IIC_SSE_MOV_LH>, Sched<[WriteFShuffle]>; 1418} 1419 1420let Predicates = [UseAVX] in { 1421 // MOVLHPS patterns 1422 def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)), 1423 (VMOVLHPSrr VR128:$src1, VR128:$src2)>; 1424 def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)), 1425 (VMOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>; 1426 1427 // MOVHLPS patterns 1428 def : Pat<(v4i32 (X86Movhlps VR128:$src1, VR128:$src2)), 1429 (VMOVHLPSrr VR128:$src1, VR128:$src2)>; 1430} 1431 1432let Predicates = [UseSSE1] in { 1433 // MOVLHPS patterns 1434 def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)), 1435 (MOVLHPSrr VR128:$src1, VR128:$src2)>; 1436 def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)), 1437 (MOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>; 1438 1439 // MOVHLPS patterns 1440 def : Pat<(v4i32 (X86Movhlps VR128:$src1, VR128:$src2)), 1441 (MOVHLPSrr VR128:$src1, VR128:$src2)>; 1442} 1443 1444//===----------------------------------------------------------------------===// 1445// SSE 1 & 2 - Conversion Instructions 1446//===----------------------------------------------------------------------===// 1447 1448def SSE_CVT_PD : OpndItins< 1449 IIC_SSE_CVT_PD_RR, IIC_SSE_CVT_PD_RM 1450>; 1451 1452let Sched = WriteCvtI2F in 1453def SSE_CVT_PS : OpndItins< 1454 IIC_SSE_CVT_PS_RR, IIC_SSE_CVT_PS_RM 1455>; 1456 1457let Sched = WriteCvtI2F in 1458def SSE_CVT_Scalar : OpndItins< 1459 IIC_SSE_CVT_Scalar_RR, IIC_SSE_CVT_Scalar_RM 1460>; 1461 1462let Sched = WriteCvtF2I in 1463def SSE_CVT_SS2SI_32 : OpndItins< 1464 IIC_SSE_CVT_SS2SI32_RR, IIC_SSE_CVT_SS2SI32_RM 1465>; 1466 1467let Sched = WriteCvtF2I in 1468def SSE_CVT_SS2SI_64 : OpndItins< 1469 IIC_SSE_CVT_SS2SI64_RR, IIC_SSE_CVT_SS2SI64_RM 1470>; 1471 1472let Sched = WriteCvtF2I in 1473def SSE_CVT_SD2SI : OpndItins< 1474 IIC_SSE_CVT_SD2SI_RR, IIC_SSE_CVT_SD2SI_RM 1475>; 1476 1477multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, 1478 SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag, 1479 string asm, OpndItins itins> { 1480 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, 1481 [(set DstRC:$dst, (OpNode SrcRC:$src))], 1482 itins.rr>, Sched<[itins.Sched]>; 1483 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, 1484 [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))], 1485 itins.rm>, Sched<[itins.Sched.Folded]>; 1486} 1487 1488multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, 1489 X86MemOperand x86memop, string asm, Domain d, 1490 OpndItins itins> { 1491let neverHasSideEffects = 1 in { 1492 def rr : I<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, 1493 [], itins.rr, d>, Sched<[itins.Sched]>; 1494 let mayLoad = 1 in 1495 def rm : I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, 1496 [], itins.rm, d>, Sched<[itins.Sched.Folded]>; 1497} 1498} 1499 1500multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, 1501 X86MemOperand x86memop, string asm> { 1502let neverHasSideEffects = 1, Predicates = [UseAVX] in { 1503 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src), 1504 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, 1505 Sched<[WriteCvtI2F]>; 1506 let mayLoad = 1 in 1507 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), 1508 (ins DstRC:$src1, x86memop:$src), 1509 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, 1510 Sched<[WriteCvtI2FLd, ReadAfterLd]>; 1511} // neverHasSideEffects = 1 1512} 1513 1514let Predicates = [UseAVX] in { 1515defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, 1516 "cvttss2si\t{$src, $dst|$dst, $src}", 1517 SSE_CVT_SS2SI_32>, 1518 XS, VEX, VEX_LIG; 1519defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32, 1520 "cvttss2si\t{$src, $dst|$dst, $src}", 1521 SSE_CVT_SS2SI_64>, 1522 XS, VEX, VEX_W, VEX_LIG; 1523defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, 1524 "cvttsd2si\t{$src, $dst|$dst, $src}", 1525 SSE_CVT_SD2SI>, 1526 XD, VEX, VEX_LIG; 1527defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64, 1528 "cvttsd2si\t{$src, $dst|$dst, $src}", 1529 SSE_CVT_SD2SI>, 1530 XD, VEX, VEX_W, VEX_LIG; 1531 1532def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}", 1533 (VCVTTSS2SIrr GR32:$dst, FR32:$src), 0>; 1534def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}", 1535 (VCVTTSS2SIrm GR32:$dst, f32mem:$src), 0>; 1536def : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}", 1537 (VCVTTSD2SIrr GR32:$dst, FR64:$src), 0>; 1538def : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}", 1539 (VCVTTSD2SIrm GR32:$dst, f64mem:$src), 0>; 1540def : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}", 1541 (VCVTTSS2SI64rr GR64:$dst, FR32:$src), 0>; 1542def : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}", 1543 (VCVTTSS2SI64rm GR64:$dst, f32mem:$src), 0>; 1544def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}", 1545 (VCVTTSD2SI64rr GR64:$dst, FR64:$src), 0>; 1546def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}", 1547 (VCVTTSD2SI64rm GR64:$dst, f64mem:$src), 0>; 1548} 1549// The assembler can recognize rr 64-bit instructions by seeing a rxx 1550// register, but the same isn't true when only using memory operands, 1551// provide other assembly "l" and "q" forms to address this explicitly 1552// where appropriate to do so. 1553defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss{l}">, 1554 XS, VEX_4V, VEX_LIG; 1555defm VCVTSI2SS64 : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}">, 1556 XS, VEX_4V, VEX_W, VEX_LIG; 1557defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, 1558 XD, VEX_4V, VEX_LIG; 1559defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, 1560 XD, VEX_4V, VEX_W, VEX_LIG; 1561 1562let Predicates = [UseAVX] in { 1563 def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", 1564 (VCVTSI2SSrm FR64:$dst, FR64:$src1, i32mem:$src)>; 1565 def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", 1566 (VCVTSI2SDrm FR64:$dst, FR64:$src1, i32mem:$src)>; 1567 1568 def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))), 1569 (VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>; 1570 def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))), 1571 (VCVTSI2SS64rm (f32 (IMPLICIT_DEF)), addr:$src)>; 1572 def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))), 1573 (VCVTSI2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>; 1574 def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))), 1575 (VCVTSI2SD64rm (f64 (IMPLICIT_DEF)), addr:$src)>; 1576 1577 def : Pat<(f32 (sint_to_fp GR32:$src)), 1578 (VCVTSI2SSrr (f32 (IMPLICIT_DEF)), GR32:$src)>; 1579 def : Pat<(f32 (sint_to_fp GR64:$src)), 1580 (VCVTSI2SS64rr (f32 (IMPLICIT_DEF)), GR64:$src)>; 1581 def : Pat<(f64 (sint_to_fp GR32:$src)), 1582 (VCVTSI2SDrr (f64 (IMPLICIT_DEF)), GR32:$src)>; 1583 def : Pat<(f64 (sint_to_fp GR64:$src)), 1584 (VCVTSI2SD64rr (f64 (IMPLICIT_DEF)), GR64:$src)>; 1585} 1586 1587defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, 1588 "cvttss2si\t{$src, $dst|$dst, $src}", 1589 SSE_CVT_SS2SI_32>, XS; 1590defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32, 1591 "cvttss2si\t{$src, $dst|$dst, $src}", 1592 SSE_CVT_SS2SI_64>, XS, REX_W; 1593defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, 1594 "cvttsd2si\t{$src, $dst|$dst, $src}", 1595 SSE_CVT_SD2SI>, XD; 1596defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64, 1597 "cvttsd2si\t{$src, $dst|$dst, $src}", 1598 SSE_CVT_SD2SI>, XD, REX_W; 1599defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32, 1600 "cvtsi2ss{l}\t{$src, $dst|$dst, $src}", 1601 SSE_CVT_Scalar>, XS; 1602defm CVTSI2SS64 : sse12_cvt_s<0x2A, GR64, FR32, sint_to_fp, i64mem, loadi64, 1603 "cvtsi2ss{q}\t{$src, $dst|$dst, $src}", 1604 SSE_CVT_Scalar>, XS, REX_W; 1605defm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32, 1606 "cvtsi2sd{l}\t{$src, $dst|$dst, $src}", 1607 SSE_CVT_Scalar>, XD; 1608defm CVTSI2SD64 : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64, 1609 "cvtsi2sd{q}\t{$src, $dst|$dst, $src}", 1610 SSE_CVT_Scalar>, XD, REX_W; 1611 1612def : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}", 1613 (CVTTSS2SIrr GR32:$dst, FR32:$src), 0>; 1614def : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}", 1615 (CVTTSS2SIrm GR32:$dst, f32mem:$src), 0>; 1616def : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}", 1617 (CVTTSD2SIrr GR32:$dst, FR64:$src), 0>; 1618def : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}", 1619 (CVTTSD2SIrm GR32:$dst, f64mem:$src), 0>; 1620def : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}", 1621 (CVTTSS2SI64rr GR64:$dst, FR32:$src), 0>; 1622def : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}", 1623 (CVTTSS2SI64rm GR64:$dst, f32mem:$src), 0>; 1624def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}", 1625 (CVTTSD2SI64rr GR64:$dst, FR64:$src), 0>; 1626def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}", 1627 (CVTTSD2SI64rm GR64:$dst, f64mem:$src), 0>; 1628 1629def : InstAlias<"cvtsi2ss\t{$src, $dst|$dst, $src}", 1630 (CVTSI2SSrm FR64:$dst, i32mem:$src)>; 1631def : InstAlias<"cvtsi2sd\t{$src, $dst|$dst, $src}", 1632 (CVTSI2SDrm FR64:$dst, i32mem:$src)>; 1633 1634// Conversion Instructions Intrinsics - Match intrinsics which expect MM 1635// and/or XMM operand(s). 1636 1637multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, 1638 Intrinsic Int, Operand memop, ComplexPattern mem_cpat, 1639 string asm, OpndItins itins> { 1640 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), 1641 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 1642 [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr>, 1643 Sched<[itins.Sched]>; 1644 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src), 1645 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 1646 [(set DstRC:$dst, (Int mem_cpat:$src))], itins.rm>, 1647 Sched<[itins.Sched.Folded]>; 1648} 1649 1650multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC, 1651 RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop, 1652 PatFrag ld_frag, string asm, OpndItins itins, 1653 bit Is2Addr = 1> { 1654 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2), 1655 !if(Is2Addr, 1656 !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), 1657 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 1658 [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], 1659 itins.rr>, Sched<[itins.Sched]>; 1660 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), 1661 (ins DstRC:$src1, x86memop:$src2), 1662 !if(Is2Addr, 1663 !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), 1664 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 1665 [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], 1666 itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; 1667} 1668 1669let Predicates = [UseAVX] in { 1670defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, 1671 int_x86_sse2_cvtsd2si, sdmem, sse_load_f64, "cvtsd2si", 1672 SSE_CVT_SD2SI>, XD, VEX, VEX_LIG; 1673defm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, 1674 int_x86_sse2_cvtsd2si64, sdmem, sse_load_f64, "cvtsd2si", 1675 SSE_CVT_SD2SI>, XD, VEX, VEX_W, VEX_LIG; 1676} 1677defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si, 1678 sdmem, sse_load_f64, "cvtsd2si", SSE_CVT_SD2SI>, XD; 1679defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64, 1680 sdmem, sse_load_f64, "cvtsd2si", SSE_CVT_SD2SI>, XD, REX_W; 1681 1682 1683let isCodeGenOnly = 1 in { 1684 let Predicates = [UseAVX] in { 1685 defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, 1686 int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}", 1687 SSE_CVT_Scalar, 0>, XS, VEX_4V; 1688 defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, 1689 int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss{q}", 1690 SSE_CVT_Scalar, 0>, XS, VEX_4V, 1691 VEX_W; 1692 defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, 1693 int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd{l}", 1694 SSE_CVT_Scalar, 0>, XD, VEX_4V; 1695 defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, 1696 int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}", 1697 SSE_CVT_Scalar, 0>, XD, 1698 VEX_4V, VEX_W; 1699 } 1700 let Constraints = "$src1 = $dst" in { 1701 defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, 1702 int_x86_sse_cvtsi2ss, i32mem, loadi32, 1703 "cvtsi2ss{l}", SSE_CVT_Scalar>, XS; 1704 defm Int_CVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, 1705 int_x86_sse_cvtsi642ss, i64mem, loadi64, 1706 "cvtsi2ss{q}", SSE_CVT_Scalar>, XS, REX_W; 1707 defm Int_CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, 1708 int_x86_sse2_cvtsi2sd, i32mem, loadi32, 1709 "cvtsi2sd{l}", SSE_CVT_Scalar>, XD; 1710 defm Int_CVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, 1711 int_x86_sse2_cvtsi642sd, i64mem, loadi64, 1712 "cvtsi2sd{q}", SSE_CVT_Scalar>, XD, REX_W; 1713 } 1714} // isCodeGenOnly = 1 1715 1716/// SSE 1 Only 1717 1718// Aliases for intrinsics 1719let isCodeGenOnly = 1 in { 1720let Predicates = [UseAVX] in { 1721defm Int_VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si, 1722 ssmem, sse_load_f32, "cvttss2si", 1723 SSE_CVT_SS2SI_32>, XS, VEX; 1724defm Int_VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, 1725 int_x86_sse_cvttss2si64, ssmem, sse_load_f32, 1726 "cvttss2si", SSE_CVT_SS2SI_64>, 1727 XS, VEX, VEX_W; 1728defm Int_VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si, 1729 sdmem, sse_load_f64, "cvttsd2si", 1730 SSE_CVT_SD2SI>, XD, VEX; 1731defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, 1732 int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64, 1733 "cvttsd2si", SSE_CVT_SD2SI>, 1734 XD, VEX, VEX_W; 1735} 1736defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si, 1737 ssmem, sse_load_f32, "cvttss2si", 1738 SSE_CVT_SS2SI_32>, XS; 1739defm Int_CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, 1740 int_x86_sse_cvttss2si64, ssmem, sse_load_f32, 1741 "cvttss2si", SSE_CVT_SS2SI_64>, XS, REX_W; 1742defm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si, 1743 sdmem, sse_load_f64, "cvttsd2si", 1744 SSE_CVT_SD2SI>, XD; 1745defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, 1746 int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64, 1747 "cvttsd2si", SSE_CVT_SD2SI>, XD, REX_W; 1748} // isCodeGenOnly = 1 1749 1750let Predicates = [UseAVX] in { 1751defm VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si, 1752 ssmem, sse_load_f32, "cvtss2si", 1753 SSE_CVT_SS2SI_32>, XS, VEX, VEX_LIG; 1754defm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64, 1755 ssmem, sse_load_f32, "cvtss2si", 1756 SSE_CVT_SS2SI_64>, XS, VEX, VEX_W, VEX_LIG; 1757} 1758defm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si, 1759 ssmem, sse_load_f32, "cvtss2si", 1760 SSE_CVT_SS2SI_32>, XS; 1761defm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64, 1762 ssmem, sse_load_f32, "cvtss2si", 1763 SSE_CVT_SS2SI_64>, XS, REX_W; 1764 1765defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem, 1766 "vcvtdq2ps\t{$src, $dst|$dst, $src}", 1767 SSEPackedSingle, SSE_CVT_PS>, 1768 PS, VEX, Requires<[HasAVX]>; 1769defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, i256mem, 1770 "vcvtdq2ps\t{$src, $dst|$dst, $src}", 1771 SSEPackedSingle, SSE_CVT_PS>, 1772 PS, VEX, VEX_L, Requires<[HasAVX]>; 1773 1774defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem, 1775 "cvtdq2ps\t{$src, $dst|$dst, $src}", 1776 SSEPackedSingle, SSE_CVT_PS>, 1777 PS, Requires<[UseSSE2]>; 1778 1779let Predicates = [UseAVX] in { 1780def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}", 1781 (VCVTSS2SIrr GR32:$dst, VR128:$src), 0>; 1782def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}", 1783 (VCVTSS2SIrm GR32:$dst, ssmem:$src), 0>; 1784def : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}", 1785 (VCVTSD2SIrr GR32:$dst, VR128:$src), 0>; 1786def : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}", 1787 (VCVTSD2SIrm GR32:$dst, sdmem:$src), 0>; 1788def : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}", 1789 (VCVTSS2SI64rr GR64:$dst, VR128:$src), 0>; 1790def : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}", 1791 (VCVTSS2SI64rm GR64:$dst, ssmem:$src), 0>; 1792def : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}", 1793 (VCVTSD2SI64rr GR64:$dst, VR128:$src), 0>; 1794def : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}", 1795 (VCVTSD2SI64rm GR64:$dst, sdmem:$src), 0>; 1796} 1797 1798def : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}", 1799 (CVTSS2SIrr GR32:$dst, VR128:$src), 0>; 1800def : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}", 1801 (CVTSS2SIrm GR32:$dst, ssmem:$src), 0>; 1802def : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}", 1803 (CVTSD2SIrr GR32:$dst, VR128:$src), 0>; 1804def : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}", 1805 (CVTSD2SIrm GR32:$dst, sdmem:$src), 0>; 1806def : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}", 1807 (CVTSS2SI64rr GR64:$dst, VR128:$src), 0>; 1808def : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}", 1809 (CVTSS2SI64rm GR64:$dst, ssmem:$src), 0>; 1810def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}", 1811 (CVTSD2SI64rr GR64:$dst, VR128:$src), 0>; 1812def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}", 1813 (CVTSD2SI64rm GR64:$dst, sdmem:$src)>; 1814 1815/// SSE 2 Only 1816 1817// Convert scalar double to scalar single 1818let neverHasSideEffects = 1, Predicates = [UseAVX] in { 1819def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst), 1820 (ins FR64:$src1, FR64:$src2), 1821 "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], 1822 IIC_SSE_CVT_Scalar_RR>, VEX_4V, VEX_LIG, 1823 Sched<[WriteCvtF2F]>; 1824let mayLoad = 1 in 1825def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), 1826 (ins FR64:$src1, f64mem:$src2), 1827 "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1828 [], IIC_SSE_CVT_Scalar_RM>, 1829 XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG, 1830 Sched<[WriteCvtF2FLd, ReadAfterLd]>; 1831} 1832 1833def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>, 1834 Requires<[UseAVX]>; 1835 1836def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src), 1837 "cvtsd2ss\t{$src, $dst|$dst, $src}", 1838 [(set FR32:$dst, (fround FR64:$src))], 1839 IIC_SSE_CVT_Scalar_RR>, Sched<[WriteCvtF2F]>; 1840def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), 1841 "cvtsd2ss\t{$src, $dst|$dst, $src}", 1842 [(set FR32:$dst, (fround (loadf64 addr:$src)))], 1843 IIC_SSE_CVT_Scalar_RM>, 1844 XD, 1845 Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtF2FLd]>; 1846 1847let isCodeGenOnly = 1 in { 1848def Int_VCVTSD2SSrr: I<0x5A, MRMSrcReg, 1849 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 1850 "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1851 [(set VR128:$dst, 1852 (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))], 1853 IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[UseAVX]>, 1854 Sched<[WriteCvtF2F]>; 1855def Int_VCVTSD2SSrm: I<0x5A, MRMSrcReg, 1856 (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), 1857 "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1858 [(set VR128:$dst, (int_x86_sse2_cvtsd2ss 1859 VR128:$src1, sse_load_f64:$src2))], 1860 IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[UseAVX]>, 1861 Sched<[WriteCvtF2FLd, ReadAfterLd]>; 1862 1863let Constraints = "$src1 = $dst" in { 1864def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg, 1865 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 1866 "cvtsd2ss\t{$src2, $dst|$dst, $src2}", 1867 [(set VR128:$dst, 1868 (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))], 1869 IIC_SSE_CVT_Scalar_RR>, XD, Requires<[UseSSE2]>, 1870 Sched<[WriteCvtF2F]>; 1871def Int_CVTSD2SSrm: I<0x5A, MRMSrcReg, 1872 (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), 1873 "cvtsd2ss\t{$src2, $dst|$dst, $src2}", 1874 [(set VR128:$dst, (int_x86_sse2_cvtsd2ss 1875 VR128:$src1, sse_load_f64:$src2))], 1876 IIC_SSE_CVT_Scalar_RM>, XD, Requires<[UseSSE2]>, 1877 Sched<[WriteCvtF2FLd, ReadAfterLd]>; 1878} 1879} // isCodeGenOnly = 1 1880 1881// Convert scalar single to scalar double 1882// SSE2 instructions with XS prefix 1883let neverHasSideEffects = 1, Predicates = [UseAVX] in { 1884def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), 1885 (ins FR32:$src1, FR32:$src2), 1886 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1887 [], IIC_SSE_CVT_Scalar_RR>, 1888 XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG, 1889 Sched<[WriteCvtF2F]>; 1890let mayLoad = 1 in 1891def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), 1892 (ins FR32:$src1, f32mem:$src2), 1893 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1894 [], IIC_SSE_CVT_Scalar_RM>, 1895 XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>, 1896 Sched<[WriteCvtF2FLd, ReadAfterLd]>; 1897} 1898 1899def : Pat<(f64 (fextend FR32:$src)), 1900 (VCVTSS2SDrr FR32:$src, FR32:$src)>, Requires<[UseAVX]>; 1901def : Pat<(fextend (loadf32 addr:$src)), 1902 (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX]>; 1903 1904def : Pat<(extloadf32 addr:$src), 1905 (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, 1906 Requires<[UseAVX, OptForSize]>; 1907def : Pat<(extloadf32 addr:$src), 1908 (VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (VMOVSSrm addr:$src))>, 1909 Requires<[UseAVX, OptForSpeed]>; 1910 1911def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), 1912 "cvtss2sd\t{$src, $dst|$dst, $src}", 1913 [(set FR64:$dst, (fextend FR32:$src))], 1914 IIC_SSE_CVT_Scalar_RR>, XS, 1915 Requires<[UseSSE2]>, Sched<[WriteCvtF2F]>; 1916def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), 1917 "cvtss2sd\t{$src, $dst|$dst, $src}", 1918 [(set FR64:$dst, (extloadf32 addr:$src))], 1919 IIC_SSE_CVT_Scalar_RM>, XS, 1920 Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtF2FLd]>; 1921 1922// extload f32 -> f64. This matches load+fextend because we have a hack in 1923// the isel (PreprocessForFPConvert) that can introduce loads after dag 1924// combine. 1925// Since these loads aren't folded into the fextend, we have to match it 1926// explicitly here. 1927def : Pat<(fextend (loadf32 addr:$src)), 1928 (CVTSS2SDrm addr:$src)>, Requires<[UseSSE2]>; 1929def : Pat<(extloadf32 addr:$src), 1930 (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[UseSSE2, OptForSpeed]>; 1931 1932let isCodeGenOnly = 1 in { 1933def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg, 1934 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 1935 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1936 [(set VR128:$dst, 1937 (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))], 1938 IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[UseAVX]>, 1939 Sched<[WriteCvtF2F]>; 1940def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem, 1941 (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), 1942 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1943 [(set VR128:$dst, 1944 (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))], 1945 IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[UseAVX]>, 1946 Sched<[WriteCvtF2FLd, ReadAfterLd]>; 1947let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix 1948def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg, 1949 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 1950 "cvtss2sd\t{$src2, $dst|$dst, $src2}", 1951 [(set VR128:$dst, 1952 (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))], 1953 IIC_SSE_CVT_Scalar_RR>, XS, Requires<[UseSSE2]>, 1954 Sched<[WriteCvtF2F]>; 1955def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem, 1956 (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), 1957 "cvtss2sd\t{$src2, $dst|$dst, $src2}", 1958 [(set VR128:$dst, 1959 (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))], 1960 IIC_SSE_CVT_Scalar_RM>, XS, Requires<[UseSSE2]>, 1961 Sched<[WriteCvtF2FLd, ReadAfterLd]>; 1962} 1963} // isCodeGenOnly = 1 1964 1965// Convert packed single/double fp to doubleword 1966def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1967 "cvtps2dq\t{$src, $dst|$dst, $src}", 1968 [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))], 1969 IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>; 1970def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1971 "cvtps2dq\t{$src, $dst|$dst, $src}", 1972 [(set VR128:$dst, 1973 (int_x86_sse2_cvtps2dq (loadv4f32 addr:$src)))], 1974 IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>; 1975def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 1976 "cvtps2dq\t{$src, $dst|$dst, $src}", 1977 [(set VR256:$dst, 1978 (int_x86_avx_cvt_ps2dq_256 VR256:$src))], 1979 IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>; 1980def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 1981 "cvtps2dq\t{$src, $dst|$dst, $src}", 1982 [(set VR256:$dst, 1983 (int_x86_avx_cvt_ps2dq_256 (loadv8f32 addr:$src)))], 1984 IIC_SSE_CVT_PS_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>; 1985def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1986 "cvtps2dq\t{$src, $dst|$dst, $src}", 1987 [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))], 1988 IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>; 1989def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1990 "cvtps2dq\t{$src, $dst|$dst, $src}", 1991 [(set VR128:$dst, 1992 (int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)))], 1993 IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>; 1994 1995 1996// Convert Packed Double FP to Packed DW Integers 1997let Predicates = [HasAVX] in { 1998// The assembler can recognize rr 256-bit instructions by seeing a ymm 1999// register, but the same isn't true when using memory operands instead. 2000// Provide other assembly rr and rm forms to address this explicitly. 2001def VCVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2002 "vcvtpd2dq\t{$src, $dst|$dst, $src}", 2003 [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>, 2004 VEX, Sched<[WriteCvtF2I]>; 2005 2006// XMM only 2007def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}", 2008 (VCVTPD2DQrr VR128:$dst, VR128:$src)>; 2009def VCVTPD2DQXrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 2010 "vcvtpd2dqx\t{$src, $dst|$dst, $src}", 2011 [(set VR128:$dst, 2012 (int_x86_sse2_cvtpd2dq (loadv2f64 addr:$src)))]>, VEX, 2013 Sched<[WriteCvtF2ILd]>; 2014 2015// YMM only 2016def VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), 2017 "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", 2018 [(set VR128:$dst, 2019 (int_x86_avx_cvt_pd2dq_256 VR256:$src))]>, VEX, VEX_L, 2020 Sched<[WriteCvtF2I]>; 2021def VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), 2022 "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", 2023 [(set VR128:$dst, 2024 (int_x86_avx_cvt_pd2dq_256 (loadv4f64 addr:$src)))]>, 2025 VEX, VEX_L, Sched<[WriteCvtF2ILd]>; 2026def : InstAlias<"vcvtpd2dq\t{$src, $dst|$dst, $src}", 2027 (VCVTPD2DQYrr VR128:$dst, VR256:$src)>; 2028} 2029 2030def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 2031 "cvtpd2dq\t{$src, $dst|$dst, $src}", 2032 [(set VR128:$dst, 2033 (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))], 2034 IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtF2ILd]>; 2035def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2036 "cvtpd2dq\t{$src, $dst|$dst, $src}", 2037 [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))], 2038 IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2I]>; 2039 2040// Convert with truncation packed single/double fp to doubleword 2041// SSE2 packed instructions with XS prefix 2042def VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2043 "cvttps2dq\t{$src, $dst|$dst, $src}", 2044 [(set VR128:$dst, 2045 (int_x86_sse2_cvttps2dq VR128:$src))], 2046 IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>; 2047def VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 2048 "cvttps2dq\t{$src, $dst|$dst, $src}", 2049 [(set VR128:$dst, (int_x86_sse2_cvttps2dq 2050 (loadv4f32 addr:$src)))], 2051 IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>; 2052def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 2053 "cvttps2dq\t{$src, $dst|$dst, $src}", 2054 [(set VR256:$dst, 2055 (int_x86_avx_cvtt_ps2dq_256 VR256:$src))], 2056 IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>; 2057def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 2058 "cvttps2dq\t{$src, $dst|$dst, $src}", 2059 [(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256 2060 (loadv8f32 addr:$src)))], 2061 IIC_SSE_CVT_PS_RM>, VEX, VEX_L, 2062 Sched<[WriteCvtF2ILd]>; 2063 2064def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2065 "cvttps2dq\t{$src, $dst|$dst, $src}", 2066 [(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))], 2067 IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>; 2068def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 2069 "cvttps2dq\t{$src, $dst|$dst, $src}", 2070 [(set VR128:$dst, 2071 (int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))], 2072 IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>; 2073 2074let Predicates = [HasAVX] in { 2075 def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), 2076 (VCVTDQ2PSrr VR128:$src)>; 2077 def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (loadv2i64 addr:$src)))), 2078 (VCVTDQ2PSrm addr:$src)>; 2079 2080 def : Pat<(int_x86_sse2_cvtdq2ps VR128:$src), 2081 (VCVTDQ2PSrr VR128:$src)>; 2082 def : Pat<(int_x86_sse2_cvtdq2ps (bc_v4i32 (loadv2i64 addr:$src))), 2083 (VCVTDQ2PSrm addr:$src)>; 2084 2085 def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), 2086 (VCVTTPS2DQrr VR128:$src)>; 2087 def : Pat<(v4i32 (fp_to_sint (loadv4f32 addr:$src))), 2088 (VCVTTPS2DQrm addr:$src)>; 2089 2090 def : Pat<(v8f32 (sint_to_fp (v8i32 VR256:$src))), 2091 (VCVTDQ2PSYrr VR256:$src)>; 2092 def : Pat<(v8f32 (sint_to_fp (bc_v8i32 (loadv4i64 addr:$src)))), 2093 (VCVTDQ2PSYrm addr:$src)>; 2094 2095 def : Pat<(v8i32 (fp_to_sint (v8f32 VR256:$src))), 2096 (VCVTTPS2DQYrr VR256:$src)>; 2097 def : Pat<(v8i32 (fp_to_sint (loadv8f32 addr:$src))), 2098 (VCVTTPS2DQYrm addr:$src)>; 2099} 2100 2101let Predicates = [UseSSE2] in { 2102 def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), 2103 (CVTDQ2PSrr VR128:$src)>; 2104 def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))), 2105 (CVTDQ2PSrm addr:$src)>; 2106 2107 def : Pat<(int_x86_sse2_cvtdq2ps VR128:$src), 2108 (CVTDQ2PSrr VR128:$src)>; 2109 def : Pat<(int_x86_sse2_cvtdq2ps (bc_v4i32 (memopv2i64 addr:$src))), 2110 (CVTDQ2PSrm addr:$src)>; 2111 2112 def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), 2113 (CVTTPS2DQrr VR128:$src)>; 2114 def : Pat<(v4i32 (fp_to_sint (memopv4f32 addr:$src))), 2115 (CVTTPS2DQrm addr:$src)>; 2116} 2117 2118def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2119 "cvttpd2dq\t{$src, $dst|$dst, $src}", 2120 [(set VR128:$dst, 2121 (int_x86_sse2_cvttpd2dq VR128:$src))], 2122 IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2I]>; 2123 2124// The assembler can recognize rr 256-bit instructions by seeing a ymm 2125// register, but the same isn't true when using memory operands instead. 2126// Provide other assembly rr and rm forms to address this explicitly. 2127 2128// XMM only 2129def : InstAlias<"vcvttpd2dqx\t{$src, $dst|$dst, $src}", 2130 (VCVTTPD2DQrr VR128:$dst, VR128:$src)>; 2131def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 2132 "cvttpd2dqx\t{$src, $dst|$dst, $src}", 2133 [(set VR128:$dst, (int_x86_sse2_cvttpd2dq 2134 (loadv2f64 addr:$src)))], 2135 IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2ILd]>; 2136 2137// YMM only 2138def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), 2139 "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", 2140 [(set VR128:$dst, 2141 (int_x86_avx_cvtt_pd2dq_256 VR256:$src))], 2142 IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>; 2143def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), 2144 "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", 2145 [(set VR128:$dst, 2146 (int_x86_avx_cvtt_pd2dq_256 (loadv4f64 addr:$src)))], 2147 IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>; 2148def : InstAlias<"vcvttpd2dq\t{$src, $dst|$dst, $src}", 2149 (VCVTTPD2DQYrr VR128:$dst, VR256:$src)>; 2150 2151let Predicates = [HasAVX] in { 2152 def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))), 2153 (VCVTTPD2DQYrr VR256:$src)>; 2154 def : Pat<(v4i32 (fp_to_sint (loadv4f64 addr:$src))), 2155 (VCVTTPD2DQYrm addr:$src)>; 2156} // Predicates = [HasAVX] 2157 2158def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2159 "cvttpd2dq\t{$src, $dst|$dst, $src}", 2160 [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))], 2161 IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2I]>; 2162def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src), 2163 "cvttpd2dq\t{$src, $dst|$dst, $src}", 2164 [(set VR128:$dst, (int_x86_sse2_cvttpd2dq 2165 (memopv2f64 addr:$src)))], 2166 IIC_SSE_CVT_PD_RM>, 2167 Sched<[WriteCvtF2ILd]>; 2168 2169// Convert packed single to packed double 2170let Predicates = [HasAVX] in { 2171 // SSE2 instructions without OpSize prefix 2172def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2173 "vcvtps2pd\t{$src, $dst|$dst, $src}", 2174 [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))], 2175 IIC_SSE_CVT_PD_RR>, PS, VEX, Sched<[WriteCvtF2F]>; 2176def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), 2177 "vcvtps2pd\t{$src, $dst|$dst, $src}", 2178 [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))], 2179 IIC_SSE_CVT_PD_RM>, PS, VEX, Sched<[WriteCvtF2FLd]>; 2180def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), 2181 "vcvtps2pd\t{$src, $dst|$dst, $src}", 2182 [(set VR256:$dst, 2183 (int_x86_avx_cvt_ps2_pd_256 VR128:$src))], 2184 IIC_SSE_CVT_PD_RR>, PS, VEX, VEX_L, Sched<[WriteCvtF2F]>; 2185def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), 2186 "vcvtps2pd\t{$src, $dst|$dst, $src}", 2187 [(set VR256:$dst, 2188 (int_x86_avx_cvt_ps2_pd_256 (loadv4f32 addr:$src)))], 2189 IIC_SSE_CVT_PD_RM>, PS, VEX, VEX_L, Sched<[WriteCvtF2FLd]>; 2190} 2191 2192let Predicates = [UseSSE2] in { 2193def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2194 "cvtps2pd\t{$src, $dst|$dst, $src}", 2195 [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))], 2196 IIC_SSE_CVT_PD_RR>, PS, Sched<[WriteCvtF2F]>; 2197def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), 2198 "cvtps2pd\t{$src, $dst|$dst, $src}", 2199 [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))], 2200 IIC_SSE_CVT_PD_RM>, PS, Sched<[WriteCvtF2FLd]>; 2201} 2202 2203// Convert Packed DW Integers to Packed Double FP 2204let Predicates = [HasAVX] in { 2205let neverHasSideEffects = 1, mayLoad = 1 in 2206def VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 2207 "vcvtdq2pd\t{$src, $dst|$dst, $src}", 2208 []>, VEX, Sched<[WriteCvtI2FLd]>; 2209def VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2210 "vcvtdq2pd\t{$src, $dst|$dst, $src}", 2211 [(set VR128:$dst, 2212 (int_x86_sse2_cvtdq2pd VR128:$src))]>, VEX, 2213 Sched<[WriteCvtI2F]>; 2214def VCVTDQ2PDYrm : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src), 2215 "vcvtdq2pd\t{$src, $dst|$dst, $src}", 2216 [(set VR256:$dst, 2217 (int_x86_avx_cvtdq2_pd_256 2218 (bitconvert (loadv2i64 addr:$src))))]>, VEX, VEX_L, 2219 Sched<[WriteCvtI2FLd]>; 2220def VCVTDQ2PDYrr : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), 2221 "vcvtdq2pd\t{$src, $dst|$dst, $src}", 2222 [(set VR256:$dst, 2223 (int_x86_avx_cvtdq2_pd_256 VR128:$src))]>, VEX, VEX_L, 2224 Sched<[WriteCvtI2F]>; 2225} 2226 2227let neverHasSideEffects = 1, mayLoad = 1 in 2228def CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 2229 "cvtdq2pd\t{$src, $dst|$dst, $src}", [], 2230 IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtI2FLd]>; 2231def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2232 "cvtdq2pd\t{$src, $dst|$dst, $src}", 2233 [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))], 2234 IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtI2F]>; 2235 2236// AVX 256-bit register conversion intrinsics 2237let Predicates = [HasAVX] in { 2238 def : Pat<(v4f64 (sint_to_fp (v4i32 VR128:$src))), 2239 (VCVTDQ2PDYrr VR128:$src)>; 2240 def : Pat<(v4f64 (sint_to_fp (bc_v4i32 (loadv2i64 addr:$src)))), 2241 (VCVTDQ2PDYrm addr:$src)>; 2242} // Predicates = [HasAVX] 2243 2244// Convert packed double to packed single 2245// The assembler can recognize rr 256-bit instructions by seeing a ymm 2246// register, but the same isn't true when using memory operands instead. 2247// Provide other assembly rr and rm forms to address this explicitly. 2248def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2249 "cvtpd2ps\t{$src, $dst|$dst, $src}", 2250 [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))], 2251 IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2F]>; 2252 2253// XMM only 2254def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}", 2255 (VCVTPD2PSrr VR128:$dst, VR128:$src)>; 2256def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 2257 "cvtpd2psx\t{$src, $dst|$dst, $src}", 2258 [(set VR128:$dst, 2259 (int_x86_sse2_cvtpd2ps (loadv2f64 addr:$src)))], 2260 IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2FLd]>; 2261 2262// YMM only 2263def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), 2264 "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", 2265 [(set VR128:$dst, 2266 (int_x86_avx_cvt_pd2_ps_256 VR256:$src))], 2267 IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2F]>; 2268def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), 2269 "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", 2270 [(set VR128:$dst, 2271 (int_x86_avx_cvt_pd2_ps_256 (loadv4f64 addr:$src)))], 2272 IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2FLd]>; 2273def : InstAlias<"vcvtpd2ps\t{$src, $dst|$dst, $src}", 2274 (VCVTPD2PSYrr VR128:$dst, VR256:$src)>; 2275 2276def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2277 "cvtpd2ps\t{$src, $dst|$dst, $src}", 2278 [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))], 2279 IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2F]>; 2280def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 2281 "cvtpd2ps\t{$src, $dst|$dst, $src}", 2282 [(set VR128:$dst, 2283 (int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)))], 2284 IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtF2FLd]>; 2285 2286 2287// AVX 256-bit register conversion intrinsics 2288// FIXME: Migrate SSE conversion intrinsics matching to use patterns as below 2289// whenever possible to avoid declaring two versions of each one. 2290let Predicates = [HasAVX] in { 2291 def : Pat<(int_x86_avx_cvtdq2_ps_256 VR256:$src), 2292 (VCVTDQ2PSYrr VR256:$src)>; 2293 def : Pat<(int_x86_avx_cvtdq2_ps_256 (bitconvert (loadv4i64 addr:$src))), 2294 (VCVTDQ2PSYrm addr:$src)>; 2295 2296 // Match fround and fextend for 128/256-bit conversions 2297 def : Pat<(v4f32 (X86vfpround (v2f64 VR128:$src))), 2298 (VCVTPD2PSrr VR128:$src)>; 2299 def : Pat<(v4f32 (X86vfpround (loadv2f64 addr:$src))), 2300 (VCVTPD2PSXrm addr:$src)>; 2301 def : Pat<(v4f32 (fround (v4f64 VR256:$src))), 2302 (VCVTPD2PSYrr VR256:$src)>; 2303 def : Pat<(v4f32 (fround (loadv4f64 addr:$src))), 2304 (VCVTPD2PSYrm addr:$src)>; 2305 2306 def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))), 2307 (VCVTPS2PDrr VR128:$src)>; 2308 def : Pat<(v4f64 (fextend (v4f32 VR128:$src))), 2309 (VCVTPS2PDYrr VR128:$src)>; 2310 def : Pat<(v4f64 (extloadv4f32 addr:$src)), 2311 (VCVTPS2PDYrm addr:$src)>; 2312} 2313 2314let Predicates = [UseSSE2] in { 2315 // Match fround and fextend for 128 conversions 2316 def : Pat<(v4f32 (X86vfpround (v2f64 VR128:$src))), 2317 (CVTPD2PSrr VR128:$src)>; 2318 def : Pat<(v4f32 (X86vfpround (memopv2f64 addr:$src))), 2319 (CVTPD2PSrm addr:$src)>; 2320 2321 def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))), 2322 (CVTPS2PDrr VR128:$src)>; 2323} 2324 2325//===----------------------------------------------------------------------===// 2326// SSE 1 & 2 - Compare Instructions 2327//===----------------------------------------------------------------------===// 2328 2329// sse12_cmp_scalar - sse 1 & 2 compare scalar instructions 2330multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop, 2331 Operand CC, SDNode OpNode, ValueType VT, 2332 PatFrag ld_frag, string asm, string asm_alt, 2333 OpndItins itins> { 2334 def rr : SIi8<0xC2, MRMSrcReg, 2335 (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm, 2336 [(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))], 2337 itins.rr>, Sched<[itins.Sched]>; 2338 def rm : SIi8<0xC2, MRMSrcMem, 2339 (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm, 2340 [(set RC:$dst, (OpNode (VT RC:$src1), 2341 (ld_frag addr:$src2), imm:$cc))], 2342 itins.rm>, 2343 Sched<[itins.Sched.Folded, ReadAfterLd]>; 2344 2345 // Accept explicit immediate argument form instead of comparison code. 2346 let isAsmParserOnly = 1, hasSideEffects = 0 in { 2347 def rr_alt : SIi8<0xC2, MRMSrcReg, (outs RC:$dst), 2348 (ins RC:$src1, RC:$src2, i8imm:$cc), asm_alt, [], 2349 IIC_SSE_ALU_F32S_RR>, Sched<[itins.Sched]>; 2350 let mayLoad = 1 in 2351 def rm_alt : SIi8<0xC2, MRMSrcMem, (outs RC:$dst), 2352 (ins RC:$src1, x86memop:$src2, i8imm:$cc), asm_alt, [], 2353 IIC_SSE_ALU_F32S_RM>, 2354 Sched<[itins.Sched.Folded, ReadAfterLd]>; 2355 } 2356} 2357 2358defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, AVXCC, X86cmps, f32, loadf32, 2359 "cmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2360 "cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 2361 SSE_ALU_F32S>, 2362 XS, VEX_4V, VEX_LIG; 2363defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, AVXCC, X86cmps, f64, loadf64, 2364 "cmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2365 "cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 2366 SSE_ALU_F32S>, // same latency as 32 bit compare 2367 XD, VEX_4V, VEX_LIG; 2368 2369let Constraints = "$src1 = $dst" in { 2370 defm CMPSS : sse12_cmp_scalar<FR32, f32mem, SSECC, X86cmps, f32, loadf32, 2371 "cmp${cc}ss\t{$src2, $dst|$dst, $src2}", 2372 "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}", SSE_ALU_F32S>, 2373 XS; 2374 defm CMPSD : sse12_cmp_scalar<FR64, f64mem, SSECC, X86cmps, f64, loadf64, 2375 "cmp${cc}sd\t{$src2, $dst|$dst, $src2}", 2376 "cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}", 2377 SSE_ALU_F64S>, 2378 XD; 2379} 2380 2381multiclass sse12_cmp_scalar_int<X86MemOperand x86memop, Operand CC, 2382 Intrinsic Int, string asm, OpndItins itins> { 2383 def rr : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst), 2384 (ins VR128:$src1, VR128:$src, CC:$cc), asm, 2385 [(set VR128:$dst, (Int VR128:$src1, 2386 VR128:$src, imm:$cc))], 2387 itins.rr>, 2388 Sched<[itins.Sched]>; 2389 def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst), 2390 (ins VR128:$src1, x86memop:$src, CC:$cc), asm, 2391 [(set VR128:$dst, (Int VR128:$src1, 2392 (load addr:$src), imm:$cc))], 2393 itins.rm>, 2394 Sched<[itins.Sched.Folded, ReadAfterLd]>; 2395} 2396 2397let isCodeGenOnly = 1 in { 2398 // Aliases to match intrinsics which expect XMM operand(s). 2399 defm Int_VCMPSS : sse12_cmp_scalar_int<f32mem, AVXCC, int_x86_sse_cmp_ss, 2400 "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}", 2401 SSE_ALU_F32S>, 2402 XS, VEX_4V; 2403 defm Int_VCMPSD : sse12_cmp_scalar_int<f64mem, AVXCC, int_x86_sse2_cmp_sd, 2404 "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}", 2405 SSE_ALU_F32S>, // same latency as f32 2406 XD, VEX_4V; 2407 let Constraints = "$src1 = $dst" in { 2408 defm Int_CMPSS : sse12_cmp_scalar_int<f32mem, SSECC, int_x86_sse_cmp_ss, 2409 "cmp${cc}ss\t{$src, $dst|$dst, $src}", 2410 SSE_ALU_F32S>, XS; 2411 defm Int_CMPSD : sse12_cmp_scalar_int<f64mem, SSECC, int_x86_sse2_cmp_sd, 2412 "cmp${cc}sd\t{$src, $dst|$dst, $src}", 2413 SSE_ALU_F64S>, 2414 XD; 2415} 2416} 2417 2418 2419// sse12_ord_cmp - Unordered/Ordered scalar fp compare and set EFLAGS 2420multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode, 2421 ValueType vt, X86MemOperand x86memop, 2422 PatFrag ld_frag, string OpcodeStr> { 2423 def rr: SI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), 2424 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 2425 [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))], 2426 IIC_SSE_COMIS_RR>, 2427 Sched<[WriteFAdd]>; 2428 def rm: SI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2), 2429 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 2430 [(set EFLAGS, (OpNode (vt RC:$src1), 2431 (ld_frag addr:$src2)))], 2432 IIC_SSE_COMIS_RM>, 2433 Sched<[WriteFAddLd, ReadAfterLd]>; 2434} 2435 2436let Defs = [EFLAGS] in { 2437 defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, 2438 "ucomiss">, PS, VEX, VEX_LIG; 2439 defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, 2440 "ucomisd">, PD, VEX, VEX_LIG; 2441 let Pattern = []<dag> in { 2442 defm VCOMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load, 2443 "comiss">, PS, VEX, VEX_LIG; 2444 defm VCOMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load, 2445 "comisd">, PD, VEX, VEX_LIG; 2446 } 2447 2448 let isCodeGenOnly = 1 in { 2449 defm Int_VUCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem, 2450 load, "ucomiss">, PS, VEX; 2451 defm Int_VUCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem, 2452 load, "ucomisd">, PD, VEX; 2453 2454 defm Int_VCOMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, 2455 load, "comiss">, PS, VEX; 2456 defm Int_VCOMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, 2457 load, "comisd">, PD, VEX; 2458 } 2459 defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, 2460 "ucomiss">, PS; 2461 defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, 2462 "ucomisd">, PD; 2463 2464 let Pattern = []<dag> in { 2465 defm COMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load, 2466 "comiss">, PS; 2467 defm COMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load, 2468 "comisd">, PD; 2469 } 2470 2471 let isCodeGenOnly = 1 in { 2472 defm Int_UCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem, 2473 load, "ucomiss">, PS; 2474 defm Int_UCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem, 2475 load, "ucomisd">, PD; 2476 2477 defm Int_COMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, load, 2478 "comiss">, PS; 2479 defm Int_COMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, load, 2480 "comisd">, PD; 2481 } 2482} // Defs = [EFLAGS] 2483 2484// sse12_cmp_packed - sse 1 & 2 compare packed instructions 2485multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop, 2486 Operand CC, Intrinsic Int, string asm, 2487 string asm_alt, Domain d, 2488 OpndItins itins = SSE_ALU_F32P> { 2489 def rri : PIi8<0xC2, MRMSrcReg, 2490 (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm, 2491 [(set RC:$dst, (Int RC:$src1, RC:$src2, imm:$cc))], 2492 itins.rr, d>, 2493 Sched<[WriteFAdd]>; 2494 def rmi : PIi8<0xC2, MRMSrcMem, 2495 (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm, 2496 [(set RC:$dst, (Int RC:$src1, (memop addr:$src2), imm:$cc))], 2497 itins.rm, d>, 2498 Sched<[WriteFAddLd, ReadAfterLd]>; 2499 2500 // Accept explicit immediate argument form instead of comparison code. 2501 let isAsmParserOnly = 1, hasSideEffects = 0 in { 2502 def rri_alt : PIi8<0xC2, MRMSrcReg, 2503 (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc), 2504 asm_alt, [], itins.rr, d>, Sched<[WriteFAdd]>; 2505 def rmi_alt : PIi8<0xC2, MRMSrcMem, 2506 (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc), 2507 asm_alt, [], itins.rm, d>, 2508 Sched<[WriteFAddLd, ReadAfterLd]>; 2509 } 2510} 2511 2512defm VCMPPS : sse12_cmp_packed<VR128, f128mem, AVXCC, int_x86_sse_cmp_ps, 2513 "cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2514 "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 2515 SSEPackedSingle>, PS, VEX_4V; 2516defm VCMPPD : sse12_cmp_packed<VR128, f128mem, AVXCC, int_x86_sse2_cmp_pd, 2517 "cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2518 "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 2519 SSEPackedDouble>, PD, VEX_4V; 2520defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, AVXCC, int_x86_avx_cmp_ps_256, 2521 "cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2522 "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 2523 SSEPackedSingle>, PS, VEX_4V, VEX_L; 2524defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, AVXCC, int_x86_avx_cmp_pd_256, 2525 "cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2526 "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 2527 SSEPackedDouble>, PD, VEX_4V, VEX_L; 2528let Constraints = "$src1 = $dst" in { 2529 defm CMPPS : sse12_cmp_packed<VR128, f128mem, SSECC, int_x86_sse_cmp_ps, 2530 "cmp${cc}ps\t{$src2, $dst|$dst, $src2}", 2531 "cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}", 2532 SSEPackedSingle, SSE_ALU_F32P>, PS; 2533 defm CMPPD : sse12_cmp_packed<VR128, f128mem, SSECC, int_x86_sse2_cmp_pd, 2534 "cmp${cc}pd\t{$src2, $dst|$dst, $src2}", 2535 "cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}", 2536 SSEPackedDouble, SSE_ALU_F64P>, PD; 2537} 2538 2539let Predicates = [HasAVX] in { 2540def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), VR128:$src2, imm:$cc)), 2541 (VCMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>; 2542def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)), 2543 (VCMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>; 2544def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), VR128:$src2, imm:$cc)), 2545 (VCMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>; 2546def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)), 2547 (VCMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>; 2548 2549def : Pat<(v8i32 (X86cmpp (v8f32 VR256:$src1), VR256:$src2, imm:$cc)), 2550 (VCMPPSYrri (v8f32 VR256:$src1), (v8f32 VR256:$src2), imm:$cc)>; 2551def : Pat<(v8i32 (X86cmpp (v8f32 VR256:$src1), (memop addr:$src2), imm:$cc)), 2552 (VCMPPSYrmi (v8f32 VR256:$src1), addr:$src2, imm:$cc)>; 2553def : Pat<(v4i64 (X86cmpp (v4f64 VR256:$src1), VR256:$src2, imm:$cc)), 2554 (VCMPPDYrri VR256:$src1, VR256:$src2, imm:$cc)>; 2555def : Pat<(v4i64 (X86cmpp (v4f64 VR256:$src1), (memop addr:$src2), imm:$cc)), 2556 (VCMPPDYrmi VR256:$src1, addr:$src2, imm:$cc)>; 2557} 2558 2559let Predicates = [UseSSE1] in { 2560def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), VR128:$src2, imm:$cc)), 2561 (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>; 2562def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)), 2563 (CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>; 2564} 2565 2566let Predicates = [UseSSE2] in { 2567def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), VR128:$src2, imm:$cc)), 2568 (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>; 2569def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)), 2570 (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>; 2571} 2572 2573//===----------------------------------------------------------------------===// 2574// SSE 1 & 2 - Shuffle Instructions 2575//===----------------------------------------------------------------------===// 2576 2577/// sse12_shuffle - sse 1 & 2 fp shuffle instructions 2578multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop, 2579 ValueType vt, string asm, PatFrag mem_frag, 2580 Domain d, bit IsConvertibleToThreeAddress = 0> { 2581 def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst), 2582 (ins RC:$src1, x86memop:$src2, i8imm:$src3), asm, 2583 [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2), 2584 (i8 imm:$src3))))], IIC_SSE_SHUFP, d>, 2585 Sched<[WriteFShuffleLd, ReadAfterLd]>; 2586 let isConvertibleToThreeAddress = IsConvertibleToThreeAddress in 2587 def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst), 2588 (ins RC:$src1, RC:$src2, i8imm:$src3), asm, 2589 [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2, 2590 (i8 imm:$src3))))], IIC_SSE_SHUFP, d>, 2591 Sched<[WriteFShuffle]>; 2592} 2593 2594defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32, 2595 "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 2596 loadv4f32, SSEPackedSingle>, PS, VEX_4V; 2597defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32, 2598 "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 2599 loadv8f32, SSEPackedSingle>, PS, VEX_4V, VEX_L; 2600defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64, 2601 "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 2602 loadv2f64, SSEPackedDouble>, PD, VEX_4V; 2603defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64, 2604 "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 2605 loadv4f64, SSEPackedDouble>, PD, VEX_4V, VEX_L; 2606 2607let Constraints = "$src1 = $dst" in { 2608 defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32, 2609 "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}", 2610 memopv4f32, SSEPackedSingle, 1 /* cvt to pshufd */>, PS; 2611 defm SHUFPD : sse12_shuffle<VR128, f128mem, v2f64, 2612 "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}", 2613 memopv2f64, SSEPackedDouble, 1 /* cvt to pshufd */>, PD; 2614} 2615 2616let Predicates = [HasAVX] in { 2617 def : Pat<(v4i32 (X86Shufp VR128:$src1, 2618 (bc_v4i32 (loadv2i64 addr:$src2)), (i8 imm:$imm))), 2619 (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>; 2620 def : Pat<(v4i32 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))), 2621 (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>; 2622 2623 def : Pat<(v2i64 (X86Shufp VR128:$src1, 2624 (loadv2i64 addr:$src2), (i8 imm:$imm))), 2625 (VSHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>; 2626 def : Pat<(v2i64 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))), 2627 (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>; 2628 2629 // 256-bit patterns 2630 def : Pat<(v8i32 (X86Shufp VR256:$src1, VR256:$src2, (i8 imm:$imm))), 2631 (VSHUFPSYrri VR256:$src1, VR256:$src2, imm:$imm)>; 2632 def : Pat<(v8i32 (X86Shufp VR256:$src1, 2633 (bc_v8i32 (loadv4i64 addr:$src2)), (i8 imm:$imm))), 2634 (VSHUFPSYrmi VR256:$src1, addr:$src2, imm:$imm)>; 2635 2636 def : Pat<(v4i64 (X86Shufp VR256:$src1, VR256:$src2, (i8 imm:$imm))), 2637 (VSHUFPDYrri VR256:$src1, VR256:$src2, imm:$imm)>; 2638 def : Pat<(v4i64 (X86Shufp VR256:$src1, 2639 (loadv4i64 addr:$src2), (i8 imm:$imm))), 2640 (VSHUFPDYrmi VR256:$src1, addr:$src2, imm:$imm)>; 2641} 2642 2643let Predicates = [UseSSE1] in { 2644 def : Pat<(v4i32 (X86Shufp VR128:$src1, 2645 (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))), 2646 (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>; 2647 def : Pat<(v4i32 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))), 2648 (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>; 2649} 2650 2651let Predicates = [UseSSE2] in { 2652 // Generic SHUFPD patterns 2653 def : Pat<(v2i64 (X86Shufp VR128:$src1, 2654 (memopv2i64 addr:$src2), (i8 imm:$imm))), 2655 (SHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>; 2656 def : Pat<(v2i64 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))), 2657 (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>; 2658} 2659 2660//===----------------------------------------------------------------------===// 2661// SSE 1 & 2 - Unpack FP Instructions 2662//===----------------------------------------------------------------------===// 2663 2664/// sse12_unpack_interleave - sse 1 & 2 fp unpack and interleave 2665multiclass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt, 2666 PatFrag mem_frag, RegisterClass RC, 2667 X86MemOperand x86memop, string asm, 2668 Domain d> { 2669 def rr : PI<opc, MRMSrcReg, 2670 (outs RC:$dst), (ins RC:$src1, RC:$src2), 2671 asm, [(set RC:$dst, 2672 (vt (OpNode RC:$src1, RC:$src2)))], 2673 IIC_SSE_UNPCK, d>, Sched<[WriteFShuffle]>; 2674 def rm : PI<opc, MRMSrcMem, 2675 (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 2676 asm, [(set RC:$dst, 2677 (vt (OpNode RC:$src1, 2678 (mem_frag addr:$src2))))], 2679 IIC_SSE_UNPCK, d>, 2680 Sched<[WriteFShuffleLd, ReadAfterLd]>; 2681} 2682 2683defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, loadv4f32, 2684 VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2685 SSEPackedSingle>, PS, VEX_4V; 2686defm VUNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, loadv2f64, 2687 VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2688 SSEPackedDouble>, PD, VEX_4V; 2689defm VUNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, loadv4f32, 2690 VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2691 SSEPackedSingle>, PS, VEX_4V; 2692defm VUNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, loadv2f64, 2693 VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2694 SSEPackedDouble>, PD, VEX_4V; 2695 2696defm VUNPCKHPSY: sse12_unpack_interleave<0x15, X86Unpckh, v8f32, loadv8f32, 2697 VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2698 SSEPackedSingle>, PS, VEX_4V, VEX_L; 2699defm VUNPCKHPDY: sse12_unpack_interleave<0x15, X86Unpckh, v4f64, loadv4f64, 2700 VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2701 SSEPackedDouble>, PD, VEX_4V, VEX_L; 2702defm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, loadv8f32, 2703 VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2704 SSEPackedSingle>, PS, VEX_4V, VEX_L; 2705defm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, loadv4f64, 2706 VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2707 SSEPackedDouble>, PD, VEX_4V, VEX_L; 2708 2709let Constraints = "$src1 = $dst" in { 2710 defm UNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32, 2711 VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}", 2712 SSEPackedSingle>, PS; 2713 defm UNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, memopv2f64, 2714 VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}", 2715 SSEPackedDouble>, PD; 2716 defm UNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, memopv4f32, 2717 VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}", 2718 SSEPackedSingle>, PS; 2719 defm UNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, memopv2f64, 2720 VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}", 2721 SSEPackedDouble>, PD; 2722} // Constraints = "$src1 = $dst" 2723 2724let Predicates = [HasAVX1Only] in { 2725 def : Pat<(v8i32 (X86Unpckl VR256:$src1, (bc_v8i32 (loadv4i64 addr:$src2)))), 2726 (VUNPCKLPSYrm VR256:$src1, addr:$src2)>; 2727 def : Pat<(v8i32 (X86Unpckl VR256:$src1, VR256:$src2)), 2728 (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>; 2729 def : Pat<(v8i32 (X86Unpckh VR256:$src1, (bc_v8i32 (loadv4i64 addr:$src2)))), 2730 (VUNPCKHPSYrm VR256:$src1, addr:$src2)>; 2731 def : Pat<(v8i32 (X86Unpckh VR256:$src1, VR256:$src2)), 2732 (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>; 2733 2734 def : Pat<(v4i64 (X86Unpckl VR256:$src1, (loadv4i64 addr:$src2))), 2735 (VUNPCKLPDYrm VR256:$src1, addr:$src2)>; 2736 def : Pat<(v4i64 (X86Unpckl VR256:$src1, VR256:$src2)), 2737 (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>; 2738 def : Pat<(v4i64 (X86Unpckh VR256:$src1, (loadv4i64 addr:$src2))), 2739 (VUNPCKHPDYrm VR256:$src1, addr:$src2)>; 2740 def : Pat<(v4i64 (X86Unpckh VR256:$src1, VR256:$src2)), 2741 (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>; 2742} 2743 2744let Predicates = [HasAVX] in { 2745 // FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the 2746 // problem is during lowering, where it's not possible to recognize the load 2747 // fold cause it has two uses through a bitcast. One use disappears at isel 2748 // time and the fold opportunity reappears. 2749 def : Pat<(v2f64 (X86Movddup VR128:$src)), 2750 (VUNPCKLPDrr VR128:$src, VR128:$src)>; 2751} 2752 2753let Predicates = [UseSSE2] in { 2754 // FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the 2755 // problem is during lowering, where it's not possible to recognize the load 2756 // fold cause it has two uses through a bitcast. One use disappears at isel 2757 // time and the fold opportunity reappears. 2758 def : Pat<(v2f64 (X86Movddup VR128:$src)), 2759 (UNPCKLPDrr VR128:$src, VR128:$src)>; 2760} 2761 2762//===----------------------------------------------------------------------===// 2763// SSE 1 & 2 - Extract Floating-Point Sign mask 2764//===----------------------------------------------------------------------===// 2765 2766/// sse12_extr_sign_mask - sse 1 & 2 unpack and interleave 2767multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm, 2768 Domain d> { 2769 def rr : PI<0x50, MRMSrcReg, (outs GR32orGR64:$dst), (ins RC:$src), 2770 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 2771 [(set GR32orGR64:$dst, (Int RC:$src))], IIC_SSE_MOVMSK, d>, 2772 Sched<[WriteVecLogic]>; 2773} 2774 2775let Predicates = [HasAVX] in { 2776 defm VMOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, 2777 "movmskps", SSEPackedSingle>, PS, VEX; 2778 defm VMOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, 2779 "movmskpd", SSEPackedDouble>, PD, VEX; 2780 defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_ps_256, 2781 "movmskps", SSEPackedSingle>, PS, 2782 VEX, VEX_L; 2783 defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_pd_256, 2784 "movmskpd", SSEPackedDouble>, PD, 2785 VEX, VEX_L; 2786 2787 def : Pat<(i32 (X86fgetsign FR32:$src)), 2788 (VMOVMSKPSrr (COPY_TO_REGCLASS FR32:$src, VR128))>; 2789 def : Pat<(i64 (X86fgetsign FR32:$src)), 2790 (SUBREG_TO_REG (i64 0), 2791 (VMOVMSKPSrr (COPY_TO_REGCLASS FR32:$src, VR128)), sub_32bit)>; 2792 def : Pat<(i32 (X86fgetsign FR64:$src)), 2793 (VMOVMSKPDrr (COPY_TO_REGCLASS FR64:$src, VR128))>; 2794 def : Pat<(i64 (X86fgetsign FR64:$src)), 2795 (SUBREG_TO_REG (i64 0), 2796 (VMOVMSKPDrr (COPY_TO_REGCLASS FR64:$src, VR128)), sub_32bit)>; 2797} 2798 2799defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps", 2800 SSEPackedSingle>, PS; 2801defm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd", 2802 SSEPackedDouble>, PD; 2803 2804def : Pat<(i32 (X86fgetsign FR32:$src)), 2805 (MOVMSKPSrr (COPY_TO_REGCLASS FR32:$src, VR128))>, 2806 Requires<[UseSSE1]>; 2807def : Pat<(i64 (X86fgetsign FR32:$src)), 2808 (SUBREG_TO_REG (i64 0), 2809 (MOVMSKPSrr (COPY_TO_REGCLASS FR32:$src, VR128)), sub_32bit)>, 2810 Requires<[UseSSE1]>; 2811def : Pat<(i32 (X86fgetsign FR64:$src)), 2812 (MOVMSKPDrr (COPY_TO_REGCLASS FR64:$src, VR128))>, 2813 Requires<[UseSSE2]>; 2814def : Pat<(i64 (X86fgetsign FR64:$src)), 2815 (SUBREG_TO_REG (i64 0), 2816 (MOVMSKPDrr (COPY_TO_REGCLASS FR64:$src, VR128)), sub_32bit)>, 2817 Requires<[UseSSE2]>; 2818 2819//===---------------------------------------------------------------------===// 2820// SSE2 - Packed Integer Logical Instructions 2821//===---------------------------------------------------------------------===// 2822 2823let ExeDomain = SSEPackedInt in { // SSE integer instructions 2824 2825/// PDI_binop_rm - Simple SSE2 binary operator. 2826multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 2827 ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 2828 X86MemOperand x86memop, OpndItins itins, 2829 bit IsCommutable, bit Is2Addr> { 2830 let isCommutable = IsCommutable in 2831 def rr : PDI<opc, MRMSrcReg, (outs RC:$dst), 2832 (ins RC:$src1, RC:$src2), 2833 !if(Is2Addr, 2834 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 2835 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 2836 [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>, 2837 Sched<[itins.Sched]>; 2838 def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), 2839 (ins RC:$src1, x86memop:$src2), 2840 !if(Is2Addr, 2841 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 2842 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 2843 [(set RC:$dst, (OpVT (OpNode RC:$src1, 2844 (bitconvert (memop_frag addr:$src2)))))], 2845 itins.rm>, 2846 Sched<[itins.Sched.Folded, ReadAfterLd]>; 2847} 2848} // ExeDomain = SSEPackedInt 2849 2850multiclass PDI_binop_all<bits<8> opc, string OpcodeStr, SDNode Opcode, 2851 ValueType OpVT128, ValueType OpVT256, 2852 OpndItins itins, bit IsCommutable = 0> { 2853let Predicates = [HasAVX] in 2854 defm V#NAME : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, OpVT128, 2855 VR128, loadv2i64, i128mem, itins, IsCommutable, 0>, VEX_4V; 2856 2857let Constraints = "$src1 = $dst" in 2858 defm NAME : PDI_binop_rm<opc, OpcodeStr, Opcode, OpVT128, VR128, 2859 memopv2i64, i128mem, itins, IsCommutable, 1>; 2860 2861let Predicates = [HasAVX2] in 2862 defm V#NAME#Y : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, 2863 OpVT256, VR256, loadv4i64, i256mem, itins, 2864 IsCommutable, 0>, VEX_4V, VEX_L; 2865} 2866 2867// These are ordered here for pattern ordering requirements with the fp versions 2868 2869defm PAND : PDI_binop_all<0xDB, "pand", and, v2i64, v4i64, 2870 SSE_VEC_BIT_ITINS_P, 1>; 2871defm POR : PDI_binop_all<0xEB, "por", or, v2i64, v4i64, 2872 SSE_VEC_BIT_ITINS_P, 1>; 2873defm PXOR : PDI_binop_all<0xEF, "pxor", xor, v2i64, v4i64, 2874 SSE_VEC_BIT_ITINS_P, 1>; 2875defm PANDN : PDI_binop_all<0xDF, "pandn", X86andnp, v2i64, v4i64, 2876 SSE_VEC_BIT_ITINS_P, 0>; 2877 2878//===----------------------------------------------------------------------===// 2879// SSE 1 & 2 - Logical Instructions 2880//===----------------------------------------------------------------------===// 2881 2882/// sse12_fp_alias_pack_logical - SSE 1 & 2 aliased packed FP logical ops 2883/// 2884multiclass sse12_fp_alias_pack_logical<bits<8> opc, string OpcodeStr, 2885 SDNode OpNode, OpndItins itins> { 2886 defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, 2887 FR32, f32, f128mem, memopfsf32, SSEPackedSingle, itins, 0>, 2888 PS, VEX_4V; 2889 2890 defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, 2891 FR64, f64, f128mem, memopfsf64, SSEPackedDouble, itins, 0>, 2892 PD, VEX_4V; 2893 2894 let Constraints = "$src1 = $dst" in { 2895 defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, FR32, 2896 f32, f128mem, memopfsf32, SSEPackedSingle, itins>, 2897 PS; 2898 2899 defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, FR64, 2900 f64, f128mem, memopfsf64, SSEPackedDouble, itins>, 2901 PD; 2902 } 2903} 2904 2905// Alias bitwise logical operations using SSE logical ops on packed FP values. 2906let isCodeGenOnly = 1 in { 2907 defm FsAND : sse12_fp_alias_pack_logical<0x54, "and", X86fand, 2908 SSE_BIT_ITINS_P>; 2909 defm FsOR : sse12_fp_alias_pack_logical<0x56, "or", X86for, 2910 SSE_BIT_ITINS_P>; 2911 defm FsXOR : sse12_fp_alias_pack_logical<0x57, "xor", X86fxor, 2912 SSE_BIT_ITINS_P>; 2913 2914 let isCommutable = 0 in 2915 defm FsANDN : sse12_fp_alias_pack_logical<0x55, "andn", X86fandn, 2916 SSE_BIT_ITINS_P>; 2917} 2918 2919/// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops 2920/// 2921multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr, 2922 SDNode OpNode> { 2923 defm V#NAME#PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle, 2924 !strconcat(OpcodeStr, "ps"), f256mem, 2925 [(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))], 2926 [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)), 2927 (loadv4i64 addr:$src2)))], 0>, PS, VEX_4V, VEX_L; 2928 2929 defm V#NAME#PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble, 2930 !strconcat(OpcodeStr, "pd"), f256mem, 2931 [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)), 2932 (bc_v4i64 (v4f64 VR256:$src2))))], 2933 [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)), 2934 (loadv4i64 addr:$src2)))], 0>, 2935 PD, VEX_4V, VEX_L; 2936 2937 // In AVX no need to add a pattern for 128-bit logical rr ps, because they 2938 // are all promoted to v2i64, and the patterns are covered by the int 2939 // version. This is needed in SSE only, because v2i64 isn't supported on 2940 // SSE1, but only on SSE2. 2941 defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, 2942 !strconcat(OpcodeStr, "ps"), f128mem, [], 2943 [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)), 2944 (loadv2i64 addr:$src2)))], 0>, PS, VEX_4V; 2945 2946 defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble, 2947 !strconcat(OpcodeStr, "pd"), f128mem, 2948 [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), 2949 (bc_v2i64 (v2f64 VR128:$src2))))], 2950 [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), 2951 (loadv2i64 addr:$src2)))], 0>, 2952 PD, VEX_4V; 2953 2954 let Constraints = "$src1 = $dst" in { 2955 defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, 2956 !strconcat(OpcodeStr, "ps"), f128mem, 2957 [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))], 2958 [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)), 2959 (memopv2i64 addr:$src2)))]>, PS; 2960 2961 defm PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble, 2962 !strconcat(OpcodeStr, "pd"), f128mem, 2963 [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), 2964 (bc_v2i64 (v2f64 VR128:$src2))))], 2965 [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), 2966 (memopv2i64 addr:$src2)))]>, PD; 2967 } 2968} 2969 2970defm AND : sse12_fp_packed_logical<0x54, "and", and>; 2971defm OR : sse12_fp_packed_logical<0x56, "or", or>; 2972defm XOR : sse12_fp_packed_logical<0x57, "xor", xor>; 2973let isCommutable = 0 in 2974 defm ANDN : sse12_fp_packed_logical<0x55, "andn", X86andnp>; 2975 2976//===----------------------------------------------------------------------===// 2977// SSE 1 & 2 - Arithmetic Instructions 2978//===----------------------------------------------------------------------===// 2979 2980/// basic_sse12_fp_binop_xxx - SSE 1 & 2 binops come in both scalar and 2981/// vector forms. 2982/// 2983/// In addition, we also have a special variant of the scalar form here to 2984/// represent the associated intrinsic operation. This form is unlike the 2985/// plain scalar form, in that it takes an entire vector (instead of a scalar) 2986/// and leaves the top elements unmodified (therefore these cannot be commuted). 2987/// 2988/// These three forms can each be reg+reg or reg+mem. 2989/// 2990 2991/// FIXME: once all 256-bit intrinsics are matched, cleanup and refactor those 2992/// classes below 2993multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr, 2994 SDNode OpNode, SizeItins itins> { 2995 defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, 2996 VR128, v4f32, f128mem, loadv4f32, 2997 SSEPackedSingle, itins.s, 0>, PS, VEX_4V; 2998 defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, 2999 VR128, v2f64, f128mem, loadv2f64, 3000 SSEPackedDouble, itins.d, 0>, PD, VEX_4V; 3001 3002 defm V#NAME#PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), 3003 OpNode, VR256, v8f32, f256mem, loadv8f32, 3004 SSEPackedSingle, itins.s, 0>, PS, VEX_4V, VEX_L; 3005 defm V#NAME#PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), 3006 OpNode, VR256, v4f64, f256mem, loadv4f64, 3007 SSEPackedDouble, itins.d, 0>, PD, VEX_4V, VEX_L; 3008 3009 let Constraints = "$src1 = $dst" in { 3010 defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128, 3011 v4f32, f128mem, memopv4f32, SSEPackedSingle, 3012 itins.s>, PS; 3013 defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128, 3014 v2f64, f128mem, memopv2f64, SSEPackedDouble, 3015 itins.d>, PD; 3016 } 3017} 3018 3019multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode, 3020 SizeItins itins> { 3021 defm V#NAME#SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"), 3022 OpNode, FR32, f32mem, itins.s, 0>, XS, VEX_4V, VEX_LIG; 3023 defm V#NAME#SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"), 3024 OpNode, FR64, f64mem, itins.d, 0>, XD, VEX_4V, VEX_LIG; 3025 3026 let Constraints = "$src1 = $dst" in { 3027 defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"), 3028 OpNode, FR32, f32mem, itins.s>, XS; 3029 defm SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"), 3030 OpNode, FR64, f64mem, itins.d>, XD; 3031 } 3032} 3033 3034multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr, 3035 SizeItins itins> { 3036 defm V#NAME#SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128, 3037 !strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32, 3038 itins.s, 0>, XS, VEX_4V, VEX_LIG; 3039 defm V#NAME#SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128, 3040 !strconcat(OpcodeStr, "sd"), "2", "_sd", sdmem, sse_load_f64, 3041 itins.d, 0>, XD, VEX_4V, VEX_LIG; 3042 3043 let Constraints = "$src1 = $dst" in { 3044 defm SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128, 3045 !strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32, 3046 itins.s>, XS; 3047 defm SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128, 3048 !strconcat(OpcodeStr, "sd"), "2", "_sd", sdmem, sse_load_f64, 3049 itins.d>, XD; 3050 } 3051} 3052 3053// Binary Arithmetic instructions 3054defm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P>, 3055 basic_sse12_fp_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>, 3056 basic_sse12_fp_binop_s_int<0x58, "add", SSE_ALU_ITINS_S>; 3057defm MUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P>, 3058 basic_sse12_fp_binop_s<0x59, "mul", fmul, SSE_MUL_ITINS_S>, 3059 basic_sse12_fp_binop_s_int<0x59, "mul", SSE_MUL_ITINS_S>; 3060let isCommutable = 0 in { 3061 defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P>, 3062 basic_sse12_fp_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>, 3063 basic_sse12_fp_binop_s_int<0x5C, "sub", SSE_ALU_ITINS_S>; 3064 defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_DIV_ITINS_P>, 3065 basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S>, 3066 basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S>; 3067 defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>, 3068 basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>, 3069 basic_sse12_fp_binop_s_int<0x5F, "max", SSE_ALU_ITINS_S>; 3070 defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SSE_ALU_ITINS_P>, 3071 basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>, 3072 basic_sse12_fp_binop_s_int<0x5D, "min", SSE_ALU_ITINS_S>; 3073} 3074 3075let isCodeGenOnly = 1 in { 3076 defm MAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P>, 3077 basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S>; 3078 defm MINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SSE_ALU_ITINS_P>, 3079 basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S>; 3080} 3081 3082// Patterns used to select SSE scalar fp arithmetic instructions from 3083// a scalar fp operation followed by a blend. 3084// 3085// These patterns know, for example, how to select an ADDSS from a 3086// float add plus vector insert. 3087// 3088// The effect is that the backend no longer emits unnecessary vector 3089// insert instructions immediately after SSE scalar fp instructions 3090// like addss or mulss. 3091// 3092// For example, given the following code: 3093// __m128 foo(__m128 A, __m128 B) { 3094// A[0] += B[0]; 3095// return A; 3096// } 3097// 3098// previously we generated: 3099// addss %xmm0, %xmm1 3100// movss %xmm1, %xmm0 3101// 3102// we now generate: 3103// addss %xmm1, %xmm0 3104 3105let Predicates = [UseSSE1] in { 3106 def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fadd 3107 (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), 3108 FR32:$src))))), 3109 (ADDSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; 3110 def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fsub 3111 (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), 3112 FR32:$src))))), 3113 (SUBSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; 3114 def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fmul 3115 (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), 3116 FR32:$src))))), 3117 (MULSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; 3118 def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fdiv 3119 (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), 3120 FR32:$src))))), 3121 (DIVSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; 3122} 3123 3124let Predicates = [UseSSE2] in { 3125 // SSE2 patterns to select scalar double-precision fp arithmetic instructions 3126 3127 def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fadd 3128 (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))), 3129 FR64:$src))))), 3130 (ADDSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>; 3131 def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fsub 3132 (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))), 3133 FR64:$src))))), 3134 (SUBSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>; 3135 def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fmul 3136 (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))), 3137 FR64:$src))))), 3138 (MULSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>; 3139 def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fdiv 3140 (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))), 3141 FR64:$src))))), 3142 (DIVSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>; 3143} 3144 3145let Predicates = [UseSSE41] in { 3146 // If the subtarget has SSE4.1 but not AVX, the vector insert 3147 // instruction is lowered into a X86insrtps rather than a X86Movss. 3148 // When selecting SSE scalar single-precision fp arithmetic instructions, 3149 // make sure that we correctly match the X86insrtps. 3150 3151 def : Pat<(v4f32 (X86insrtps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector 3152 (fadd (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), 3153 FR32:$src))), (iPTR 0))), 3154 (ADDSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; 3155 def : Pat<(v4f32 (X86insrtps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector 3156 (fsub (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), 3157 FR32:$src))), (iPTR 0))), 3158 (SUBSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; 3159 def : Pat<(v4f32 (X86insrtps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector 3160 (fmul (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), 3161 FR32:$src))), (iPTR 0))), 3162 (MULSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; 3163 def : Pat<(v4f32 (X86insrtps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector 3164 (fdiv (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), 3165 FR32:$src))), (iPTR 0))), 3166 (DIVSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; 3167} 3168 3169let Predicates = [HasAVX] in { 3170 // The following patterns select AVX Scalar single/double precision fp 3171 // arithmetic instructions. 3172 3173 def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fadd 3174 (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))), 3175 FR64:$src))))), 3176 (VADDSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>; 3177 def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fsub 3178 (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))), 3179 FR64:$src))))), 3180 (VSUBSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>; 3181 def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fmul 3182 (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))), 3183 FR64:$src))))), 3184 (VMULSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>; 3185 def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fdiv 3186 (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))), 3187 FR64:$src))))), 3188 (VDIVSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>; 3189 def : Pat<(v4f32 (X86insrtps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector 3190 (fadd (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), 3191 FR32:$src))), (iPTR 0))), 3192 (VADDSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; 3193 def : Pat<(v4f32 (X86insrtps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector 3194 (fsub (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), 3195 FR32:$src))), (iPTR 0))), 3196 (VSUBSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; 3197 def : Pat<(v4f32 (X86insrtps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector 3198 (fmul (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), 3199 FR32:$src))), (iPTR 0))), 3200 (VMULSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; 3201 def : Pat<(v4f32 (X86insrtps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector 3202 (fdiv (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), 3203 FR32:$src))), (iPTR 0))), 3204 (VDIVSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; 3205} 3206 3207// Patterns used to select SSE scalar fp arithmetic instructions from 3208// a vector packed single/double fp operation followed by a vector insert. 3209// 3210// The effect is that the backend converts the packed fp instruction 3211// followed by a vector insert into a single SSE scalar fp instruction. 3212// 3213// For example, given the following code: 3214// __m128 foo(__m128 A, __m128 B) { 3215// __m128 C = A + B; 3216// return (__m128) {c[0], a[1], a[2], a[3]}; 3217// } 3218// 3219// previously we generated: 3220// addps %xmm0, %xmm1 3221// movss %xmm1, %xmm0 3222// 3223// we now generate: 3224// addss %xmm1, %xmm0 3225 3226let Predicates = [UseSSE1] in { 3227 def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), 3228 (fadd (v4f32 VR128:$dst), (v4f32 VR128:$src)))), 3229 (ADDSSrr_Int v4f32:$dst, v4f32:$src)>; 3230 def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), 3231 (fsub (v4f32 VR128:$dst), (v4f32 VR128:$src)))), 3232 (SUBSSrr_Int v4f32:$dst, v4f32:$src)>; 3233 def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), 3234 (fmul (v4f32 VR128:$dst), (v4f32 VR128:$src)))), 3235 (MULSSrr_Int v4f32:$dst, v4f32:$src)>; 3236 def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), 3237 (fdiv (v4f32 VR128:$dst), (v4f32 VR128:$src)))), 3238 (DIVSSrr_Int v4f32:$dst, v4f32:$src)>; 3239} 3240 3241let Predicates = [UseSSE2] in { 3242 // SSE2 patterns to select scalar double-precision fp arithmetic instructions 3243 // from a packed double-precision fp instruction plus movsd. 3244 3245 def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), 3246 (fadd (v2f64 VR128:$dst), (v2f64 VR128:$src)))), 3247 (ADDSDrr_Int v2f64:$dst, v2f64:$src)>; 3248 def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), 3249 (fsub (v2f64 VR128:$dst), (v2f64 VR128:$src)))), 3250 (SUBSDrr_Int v2f64:$dst, v2f64:$src)>; 3251 def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), 3252 (fmul (v2f64 VR128:$dst), (v2f64 VR128:$src)))), 3253 (MULSDrr_Int v2f64:$dst, v2f64:$src)>; 3254 def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), 3255 (fdiv (v2f64 VR128:$dst), (v2f64 VR128:$src)))), 3256 (DIVSDrr_Int v2f64:$dst, v2f64:$src)>; 3257} 3258 3259let Predicates = [HasAVX] in { 3260 // The following patterns select AVX Scalar single/double precision fp 3261 // arithmetic instructions from a packed single precision fp instruction 3262 // plus movss/movsd. 3263 3264 def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), 3265 (fadd (v4f32 VR128:$dst), (v4f32 VR128:$src)))), 3266 (VADDSSrr_Int v4f32:$dst, v4f32:$src)>; 3267 def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), 3268 (fsub (v4f32 VR128:$dst), (v4f32 VR128:$src)))), 3269 (VSUBSSrr_Int v4f32:$dst, v4f32:$src)>; 3270 def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), 3271 (fmul (v4f32 VR128:$dst), (v4f32 VR128:$src)))), 3272 (VMULSSrr_Int v4f32:$dst, v4f32:$src)>; 3273 def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), 3274 (fdiv (v4f32 VR128:$dst), (v4f32 VR128:$src)))), 3275 (VDIVSSrr_Int v4f32:$dst, v4f32:$src)>; 3276 def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), 3277 (fadd (v2f64 VR128:$dst), (v2f64 VR128:$src)))), 3278 (VADDSDrr_Int v2f64:$dst, v2f64:$src)>; 3279 def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), 3280 (fsub (v2f64 VR128:$dst), (v2f64 VR128:$src)))), 3281 (VSUBSDrr_Int v2f64:$dst, v2f64:$src)>; 3282 def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), 3283 (fmul (v2f64 VR128:$dst), (v2f64 VR128:$src)))), 3284 (VMULSDrr_Int v2f64:$dst, v2f64:$src)>; 3285 def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), 3286 (fdiv (v2f64 VR128:$dst), (v2f64 VR128:$src)))), 3287 (VDIVSDrr_Int v2f64:$dst, v2f64:$src)>; 3288} 3289 3290/// Unop Arithmetic 3291/// In addition, we also have a special variant of the scalar form here to 3292/// represent the associated intrinsic operation. This form is unlike the 3293/// plain scalar form, in that it takes an entire vector (instead of a 3294/// scalar) and leaves the top elements undefined. 3295/// 3296/// And, we have a special variant form for a full-vector intrinsic form. 3297 3298let Sched = WriteFSqrt in { 3299def SSE_SQRTPS : OpndItins< 3300 IIC_SSE_SQRTPS_RR, IIC_SSE_SQRTPS_RM 3301>; 3302 3303def SSE_SQRTSS : OpndItins< 3304 IIC_SSE_SQRTSS_RR, IIC_SSE_SQRTSS_RM 3305>; 3306 3307def SSE_SQRTPD : OpndItins< 3308 IIC_SSE_SQRTPD_RR, IIC_SSE_SQRTPD_RM 3309>; 3310 3311def SSE_SQRTSD : OpndItins< 3312 IIC_SSE_SQRTSD_RR, IIC_SSE_SQRTSD_RM 3313>; 3314} 3315 3316let Sched = WriteFRcp in { 3317def SSE_RCPP : OpndItins< 3318 IIC_SSE_RCPP_RR, IIC_SSE_RCPP_RM 3319>; 3320 3321def SSE_RCPS : OpndItins< 3322 IIC_SSE_RCPS_RR, IIC_SSE_RCPS_RM 3323>; 3324} 3325 3326/// sse1_fp_unop_s - SSE1 unops in scalar form. 3327multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, 3328 SDNode OpNode, Intrinsic F32Int, OpndItins itins> { 3329let Predicates = [HasAVX], hasSideEffects = 0 in { 3330 def V#NAME#SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), 3331 (ins FR32:$src1, FR32:$src2), 3332 !strconcat("v", OpcodeStr, 3333 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3334 []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>; 3335 let mayLoad = 1 in { 3336 def V#NAME#SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), 3337 (ins FR32:$src1,f32mem:$src2), 3338 !strconcat("v", OpcodeStr, 3339 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3340 []>, VEX_4V, VEX_LIG, 3341 Sched<[itins.Sched.Folded, ReadAfterLd]>; 3342 let isCodeGenOnly = 1 in 3343 def V#NAME#SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), 3344 (ins VR128:$src1, ssmem:$src2), 3345 !strconcat("v", OpcodeStr, 3346 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3347 []>, VEX_4V, VEX_LIG, 3348 Sched<[itins.Sched.Folded, ReadAfterLd]>; 3349 } 3350} 3351 3352 def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), 3353 !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), 3354 [(set FR32:$dst, (OpNode FR32:$src))]>, Sched<[itins.Sched]>; 3355 // For scalar unary operations, fold a load into the operation 3356 // only in OptForSize mode. It eliminates an instruction, but it also 3357 // eliminates a whole-register clobber (the load), so it introduces a 3358 // partial register update condition. 3359 def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src), 3360 !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), 3361 [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS, 3362 Requires<[UseSSE1, OptForSize]>, Sched<[itins.Sched.Folded]>; 3363let isCodeGenOnly = 1 in { 3364 def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3365 !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), 3366 [(set VR128:$dst, (F32Int VR128:$src))], itins.rr>, 3367 Sched<[itins.Sched]>; 3368 def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src), 3369 !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), 3370 [(set VR128:$dst, (F32Int sse_load_f32:$src))], itins.rm>, 3371 Sched<[itins.Sched.Folded]>; 3372} 3373} 3374 3375/// sse1_fp_unop_s_rw - SSE1 unops where vector form has a read-write operand. 3376multiclass sse1_fp_unop_rw<bits<8> opc, string OpcodeStr, SDNode OpNode, 3377 OpndItins itins> { 3378let Predicates = [HasAVX], hasSideEffects = 0 in { 3379 def V#NAME#SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), 3380 (ins FR32:$src1, FR32:$src2), 3381 !strconcat("v", OpcodeStr, 3382 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3383 []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>; 3384 let mayLoad = 1 in { 3385 def V#NAME#SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), 3386 (ins FR32:$src1,f32mem:$src2), 3387 !strconcat("v", OpcodeStr, 3388 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3389 []>, VEX_4V, VEX_LIG, 3390 Sched<[itins.Sched.Folded, ReadAfterLd]>; 3391 let isCodeGenOnly = 1 in 3392 def V#NAME#SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), 3393 (ins VR128:$src1, ssmem:$src2), 3394 !strconcat("v", OpcodeStr, 3395 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3396 []>, VEX_4V, VEX_LIG, 3397 Sched<[itins.Sched.Folded, ReadAfterLd]>; 3398 } 3399} 3400 3401 def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), 3402 !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), 3403 [(set FR32:$dst, (OpNode FR32:$src))]>, Sched<[itins.Sched]>; 3404 // For scalar unary operations, fold a load into the operation 3405 // only in OptForSize mode. It eliminates an instruction, but it also 3406 // eliminates a whole-register clobber (the load), so it introduces a 3407 // partial register update condition. 3408 def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src), 3409 !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), 3410 [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS, 3411 Requires<[UseSSE1, OptForSize]>, Sched<[itins.Sched.Folded]>; 3412 let isCodeGenOnly = 1, Constraints = "$src1 = $dst" in { 3413 def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), 3414 (ins VR128:$src1, VR128:$src2), 3415 !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), 3416 [], itins.rr>, Sched<[itins.Sched]>; 3417 let mayLoad = 1, hasSideEffects = 0 in 3418 def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), 3419 (ins VR128:$src1, ssmem:$src2), 3420 !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), 3421 [], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; 3422 } 3423} 3424 3425/// sse1_fp_unop_p - SSE1 unops in packed form. 3426multiclass sse1_fp_unop_p<bits<8> opc, string OpcodeStr, SDNode OpNode, 3427 OpndItins itins> { 3428let Predicates = [HasAVX] in { 3429 def V#NAME#PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3430 !strconcat("v", OpcodeStr, 3431 "ps\t{$src, $dst|$dst, $src}"), 3432 [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], 3433 itins.rr>, VEX, Sched<[itins.Sched]>; 3434 def V#NAME#PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 3435 !strconcat("v", OpcodeStr, 3436 "ps\t{$src, $dst|$dst, $src}"), 3437 [(set VR128:$dst, (OpNode (loadv4f32 addr:$src)))], 3438 itins.rm>, VEX, Sched<[itins.Sched.Folded]>; 3439 def V#NAME#PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 3440 !strconcat("v", OpcodeStr, 3441 "ps\t{$src, $dst|$dst, $src}"), 3442 [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))], 3443 itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>; 3444 def V#NAME#PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 3445 !strconcat("v", OpcodeStr, 3446 "ps\t{$src, $dst|$dst, $src}"), 3447 [(set VR256:$dst, (OpNode (loadv8f32 addr:$src)))], 3448 itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>; 3449} 3450 3451 def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3452 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), 3453 [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], itins.rr>, 3454 Sched<[itins.Sched]>; 3455 def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 3456 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), 3457 [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))], itins.rm>, 3458 Sched<[itins.Sched.Folded]>; 3459} 3460 3461/// sse1_fp_unop_p_int - SSE1 intrinsics unops in packed forms. 3462multiclass sse1_fp_unop_p_int<bits<8> opc, string OpcodeStr, 3463 Intrinsic V4F32Int, Intrinsic V8F32Int, 3464 OpndItins itins> { 3465let isCodeGenOnly = 1 in { 3466let Predicates = [HasAVX] in { 3467 def V#NAME#PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3468 !strconcat("v", OpcodeStr, 3469 "ps\t{$src, $dst|$dst, $src}"), 3470 [(set VR128:$dst, (V4F32Int VR128:$src))], 3471 itins.rr>, VEX, Sched<[itins.Sched]>; 3472 def V#NAME#PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 3473 !strconcat("v", OpcodeStr, 3474 "ps\t{$src, $dst|$dst, $src}"), 3475 [(set VR128:$dst, (V4F32Int (loadv4f32 addr:$src)))], 3476 itins.rm>, VEX, Sched<[itins.Sched.Folded]>; 3477 def V#NAME#PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 3478 !strconcat("v", OpcodeStr, 3479 "ps\t{$src, $dst|$dst, $src}"), 3480 [(set VR256:$dst, (V8F32Int VR256:$src))], 3481 itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>; 3482 def V#NAME#PSYm_Int : PSI<opc, MRMSrcMem, (outs VR256:$dst), 3483 (ins f256mem:$src), 3484 !strconcat("v", OpcodeStr, 3485 "ps\t{$src, $dst|$dst, $src}"), 3486 [(set VR256:$dst, (V8F32Int (loadv8f32 addr:$src)))], 3487 itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>; 3488} 3489 3490 def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3491 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), 3492 [(set VR128:$dst, (V4F32Int VR128:$src))], 3493 itins.rr>, Sched<[itins.Sched]>; 3494 def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 3495 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), 3496 [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))], 3497 itins.rm>, Sched<[itins.Sched.Folded]>; 3498} // isCodeGenOnly = 1 3499} 3500 3501/// sse2_fp_unop_s - SSE2 unops in scalar form. 3502multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, 3503 SDNode OpNode, Intrinsic F64Int, OpndItins itins> { 3504let Predicates = [HasAVX], hasSideEffects = 0 in { 3505 def V#NAME#SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), 3506 (ins FR64:$src1, FR64:$src2), 3507 !strconcat("v", OpcodeStr, 3508 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3509 []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>; 3510 let mayLoad = 1 in { 3511 def V#NAME#SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), 3512 (ins FR64:$src1,f64mem:$src2), 3513 !strconcat("v", OpcodeStr, 3514 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3515 []>, VEX_4V, VEX_LIG, 3516 Sched<[itins.Sched.Folded, ReadAfterLd]>; 3517 let isCodeGenOnly = 1 in 3518 def V#NAME#SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), 3519 (ins VR128:$src1, sdmem:$src2), 3520 !strconcat("v", OpcodeStr, 3521 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3522 []>, VEX_4V, VEX_LIG, 3523 Sched<[itins.Sched.Folded, ReadAfterLd]>; 3524 } 3525} 3526 3527 def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), 3528 !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), 3529 [(set FR64:$dst, (OpNode FR64:$src))], itins.rr>, 3530 Sched<[itins.Sched]>; 3531 // See the comments in sse1_fp_unop_s for why this is OptForSize. 3532 def SDm : I<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src), 3533 !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), 3534 [(set FR64:$dst, (OpNode (load addr:$src)))], itins.rm>, XD, 3535 Requires<[UseSSE2, OptForSize]>, Sched<[itins.Sched.Folded]>; 3536let isCodeGenOnly = 1 in { 3537 def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3538 !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), 3539 [(set VR128:$dst, (F64Int VR128:$src))], itins.rr>, 3540 Sched<[itins.Sched]>; 3541 def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src), 3542 !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), 3543 [(set VR128:$dst, (F64Int sse_load_f64:$src))], itins.rm>, 3544 Sched<[itins.Sched.Folded]>; 3545} 3546} 3547 3548/// sse2_fp_unop_p - SSE2 unops in vector forms. 3549multiclass sse2_fp_unop_p<bits<8> opc, string OpcodeStr, 3550 SDNode OpNode, OpndItins itins> { 3551let Predicates = [HasAVX] in { 3552 def V#NAME#PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3553 !strconcat("v", OpcodeStr, 3554 "pd\t{$src, $dst|$dst, $src}"), 3555 [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], 3556 itins.rr>, VEX, Sched<[itins.Sched]>; 3557 def V#NAME#PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 3558 !strconcat("v", OpcodeStr, 3559 "pd\t{$src, $dst|$dst, $src}"), 3560 [(set VR128:$dst, (OpNode (loadv2f64 addr:$src)))], 3561 itins.rm>, VEX, Sched<[itins.Sched.Folded]>; 3562 def V#NAME#PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 3563 !strconcat("v", OpcodeStr, 3564 "pd\t{$src, $dst|$dst, $src}"), 3565 [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))], 3566 itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>; 3567 def V#NAME#PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 3568 !strconcat("v", OpcodeStr, 3569 "pd\t{$src, $dst|$dst, $src}"), 3570 [(set VR256:$dst, (OpNode (loadv4f64 addr:$src)))], 3571 itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>; 3572} 3573 3574 def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3575 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), 3576 [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], itins.rr>, 3577 Sched<[itins.Sched]>; 3578 def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 3579 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), 3580 [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], itins.rm>, 3581 Sched<[itins.Sched.Folded]>; 3582} 3583 3584// Square root. 3585defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss, 3586 SSE_SQRTSS>, 3587 sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPS>, 3588 sse2_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd, 3589 SSE_SQRTSD>, 3590 sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPD>; 3591 3592// Reciprocal approximations. Note that these typically require refinement 3593// in order to obtain suitable precision. 3594defm RSQRT : sse1_fp_unop_rw<0x52, "rsqrt", X86frsqrt, SSE_SQRTSS>, 3595 sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTPS>, 3596 sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps, 3597 int_x86_avx_rsqrt_ps_256, SSE_SQRTPS>; 3598defm RCP : sse1_fp_unop_rw<0x53, "rcp", X86frcp, SSE_RCPS>, 3599 sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP>, 3600 sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps, 3601 int_x86_avx_rcp_ps_256, SSE_RCPP>; 3602 3603let Predicates = [UseAVX] in { 3604 def : Pat<(f32 (fsqrt FR32:$src)), 3605 (VSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>; 3606 def : Pat<(f32 (fsqrt (load addr:$src))), 3607 (VSQRTSSm (f32 (IMPLICIT_DEF)), addr:$src)>, 3608 Requires<[HasAVX, OptForSize]>; 3609 def : Pat<(f64 (fsqrt FR64:$src)), 3610 (VSQRTSDr (f64 (IMPLICIT_DEF)), FR64:$src)>, Requires<[HasAVX]>; 3611 def : Pat<(f64 (fsqrt (load addr:$src))), 3612 (VSQRTSDm (f64 (IMPLICIT_DEF)), addr:$src)>, 3613 Requires<[HasAVX, OptForSize]>; 3614 3615 def : Pat<(f32 (X86frsqrt FR32:$src)), 3616 (VRSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>; 3617 def : Pat<(f32 (X86frsqrt (load addr:$src))), 3618 (VRSQRTSSm (f32 (IMPLICIT_DEF)), addr:$src)>, 3619 Requires<[HasAVX, OptForSize]>; 3620 3621 def : Pat<(f32 (X86frcp FR32:$src)), 3622 (VRCPSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>; 3623 def : Pat<(f32 (X86frcp (load addr:$src))), 3624 (VRCPSSm (f32 (IMPLICIT_DEF)), addr:$src)>, 3625 Requires<[HasAVX, OptForSize]>; 3626} 3627let Predicates = [UseAVX] in { 3628 def : Pat<(int_x86_sse_sqrt_ss VR128:$src), 3629 (COPY_TO_REGCLASS (VSQRTSSr (f32 (IMPLICIT_DEF)), 3630 (COPY_TO_REGCLASS VR128:$src, FR32)), 3631 VR128)>; 3632 def : Pat<(int_x86_sse_sqrt_ss sse_load_f32:$src), 3633 (VSQRTSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>; 3634 3635 def : Pat<(int_x86_sse2_sqrt_sd VR128:$src), 3636 (COPY_TO_REGCLASS (VSQRTSDr (f64 (IMPLICIT_DEF)), 3637 (COPY_TO_REGCLASS VR128:$src, FR64)), 3638 VR128)>; 3639 def : Pat<(int_x86_sse2_sqrt_sd sse_load_f64:$src), 3640 (VSQRTSDm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>; 3641} 3642 3643let Predicates = [HasAVX] in { 3644 def : Pat<(int_x86_sse_rsqrt_ss VR128:$src), 3645 (COPY_TO_REGCLASS (VRSQRTSSr (f32 (IMPLICIT_DEF)), 3646 (COPY_TO_REGCLASS VR128:$src, FR32)), 3647 VR128)>; 3648 def : Pat<(int_x86_sse_rsqrt_ss sse_load_f32:$src), 3649 (VRSQRTSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>; 3650 3651 def : Pat<(int_x86_sse_rcp_ss VR128:$src), 3652 (COPY_TO_REGCLASS (VRCPSSr (f32 (IMPLICIT_DEF)), 3653 (COPY_TO_REGCLASS VR128:$src, FR32)), 3654 VR128)>; 3655 def : Pat<(int_x86_sse_rcp_ss sse_load_f32:$src), 3656 (VRCPSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>; 3657} 3658 3659// Reciprocal approximations. Note that these typically require refinement 3660// in order to obtain suitable precision. 3661let Predicates = [UseSSE1] in { 3662 def : Pat<(int_x86_sse_rsqrt_ss VR128:$src), 3663 (RSQRTSSr_Int VR128:$src, VR128:$src)>; 3664 def : Pat<(int_x86_sse_rcp_ss VR128:$src), 3665 (RCPSSr_Int VR128:$src, VR128:$src)>; 3666} 3667 3668// There is no f64 version of the reciprocal approximation instructions. 3669 3670//===----------------------------------------------------------------------===// 3671// SSE 1 & 2 - Non-temporal stores 3672//===----------------------------------------------------------------------===// 3673 3674let AddedComplexity = 400 in { // Prefer non-temporal versions 3675let SchedRW = [WriteStore] in { 3676def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs), 3677 (ins f128mem:$dst, VR128:$src), 3678 "movntps\t{$src, $dst|$dst, $src}", 3679 [(alignednontemporalstore (v4f32 VR128:$src), 3680 addr:$dst)], 3681 IIC_SSE_MOVNT>, VEX; 3682def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs), 3683 (ins f128mem:$dst, VR128:$src), 3684 "movntpd\t{$src, $dst|$dst, $src}", 3685 [(alignednontemporalstore (v2f64 VR128:$src), 3686 addr:$dst)], 3687 IIC_SSE_MOVNT>, VEX; 3688 3689let ExeDomain = SSEPackedInt in 3690def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs), 3691 (ins f128mem:$dst, VR128:$src), 3692 "movntdq\t{$src, $dst|$dst, $src}", 3693 [(alignednontemporalstore (v2i64 VR128:$src), 3694 addr:$dst)], 3695 IIC_SSE_MOVNT>, VEX; 3696 3697def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs), 3698 (ins f256mem:$dst, VR256:$src), 3699 "movntps\t{$src, $dst|$dst, $src}", 3700 [(alignednontemporalstore (v8f32 VR256:$src), 3701 addr:$dst)], 3702 IIC_SSE_MOVNT>, VEX, VEX_L; 3703def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs), 3704 (ins f256mem:$dst, VR256:$src), 3705 "movntpd\t{$src, $dst|$dst, $src}", 3706 [(alignednontemporalstore (v4f64 VR256:$src), 3707 addr:$dst)], 3708 IIC_SSE_MOVNT>, VEX, VEX_L; 3709let ExeDomain = SSEPackedInt in 3710def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs), 3711 (ins f256mem:$dst, VR256:$src), 3712 "movntdq\t{$src, $dst|$dst, $src}", 3713 [(alignednontemporalstore (v4i64 VR256:$src), 3714 addr:$dst)], 3715 IIC_SSE_MOVNT>, VEX, VEX_L; 3716 3717def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 3718 "movntps\t{$src, $dst|$dst, $src}", 3719 [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)], 3720 IIC_SSE_MOVNT>; 3721def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 3722 "movntpd\t{$src, $dst|$dst, $src}", 3723 [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)], 3724 IIC_SSE_MOVNT>; 3725 3726let ExeDomain = SSEPackedInt in 3727def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 3728 "movntdq\t{$src, $dst|$dst, $src}", 3729 [(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)], 3730 IIC_SSE_MOVNT>; 3731 3732// There is no AVX form for instructions below this point 3733def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), 3734 "movnti{l}\t{$src, $dst|$dst, $src}", 3735 [(nontemporalstore (i32 GR32:$src), addr:$dst)], 3736 IIC_SSE_MOVNT>, 3737 PS, Requires<[HasSSE2]>; 3738def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), 3739 "movnti{q}\t{$src, $dst|$dst, $src}", 3740 [(nontemporalstore (i64 GR64:$src), addr:$dst)], 3741 IIC_SSE_MOVNT>, 3742 PS, Requires<[HasSSE2]>; 3743} // SchedRW = [WriteStore] 3744 3745} // AddedComplexity 3746 3747//===----------------------------------------------------------------------===// 3748// SSE 1 & 2 - Prefetch and memory fence 3749//===----------------------------------------------------------------------===// 3750 3751// Prefetch intrinsic. 3752let Predicates = [HasSSE1], SchedRW = [WriteLoad] in { 3753def PREFETCHT0 : I<0x18, MRM1m, (outs), (ins i8mem:$src), 3754 "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))], 3755 IIC_SSE_PREFETCH>, TB; 3756def PREFETCHT1 : I<0x18, MRM2m, (outs), (ins i8mem:$src), 3757 "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))], 3758 IIC_SSE_PREFETCH>, TB; 3759def PREFETCHT2 : I<0x18, MRM3m, (outs), (ins i8mem:$src), 3760 "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))], 3761 IIC_SSE_PREFETCH>, TB; 3762def PREFETCHNTA : I<0x18, MRM0m, (outs), (ins i8mem:$src), 3763 "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))], 3764 IIC_SSE_PREFETCH>, TB; 3765} 3766 3767// FIXME: How should flush instruction be modeled? 3768let SchedRW = [WriteLoad] in { 3769// Flush cache 3770def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src), 3771 "clflush\t$src", [(int_x86_sse2_clflush addr:$src)], 3772 IIC_SSE_PREFETCH>, TB, Requires<[HasSSE2]>; 3773} 3774 3775let SchedRW = [WriteNop] in { 3776// Pause. This "instruction" is encoded as "rep; nop", so even though it 3777// was introduced with SSE2, it's backward compatible. 3778def PAUSE : I<0x90, RawFrm, (outs), (ins), 3779 "pause", [(int_x86_sse2_pause)], IIC_SSE_PAUSE>, 3780 OBXS, Requires<[HasSSE2]>; 3781} 3782 3783let SchedRW = [WriteFence] in { 3784// Load, store, and memory fence 3785def SFENCE : I<0xAE, MRM_F8, (outs), (ins), 3786 "sfence", [(int_x86_sse_sfence)], IIC_SSE_SFENCE>, 3787 TB, Requires<[HasSSE1]>; 3788def LFENCE : I<0xAE, MRM_E8, (outs), (ins), 3789 "lfence", [(int_x86_sse2_lfence)], IIC_SSE_LFENCE>, 3790 TB, Requires<[HasSSE2]>; 3791def MFENCE : I<0xAE, MRM_F0, (outs), (ins), 3792 "mfence", [(int_x86_sse2_mfence)], IIC_SSE_MFENCE>, 3793 TB, Requires<[HasSSE2]>; 3794} // SchedRW 3795 3796def : Pat<(X86SFence), (SFENCE)>; 3797def : Pat<(X86LFence), (LFENCE)>; 3798def : Pat<(X86MFence), (MFENCE)>; 3799 3800//===----------------------------------------------------------------------===// 3801// SSE 1 & 2 - Load/Store XCSR register 3802//===----------------------------------------------------------------------===// 3803 3804def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src), 3805 "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)], 3806 IIC_SSE_LDMXCSR>, VEX, Sched<[WriteLoad]>; 3807def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), 3808 "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)], 3809 IIC_SSE_STMXCSR>, VEX, Sched<[WriteStore]>; 3810 3811def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src), 3812 "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)], 3813 IIC_SSE_LDMXCSR>, Sched<[WriteLoad]>; 3814def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), 3815 "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)], 3816 IIC_SSE_STMXCSR>, Sched<[WriteStore]>; 3817 3818//===---------------------------------------------------------------------===// 3819// SSE2 - Move Aligned/Unaligned Packed Integer Instructions 3820//===---------------------------------------------------------------------===// 3821 3822let ExeDomain = SSEPackedInt in { // SSE integer instructions 3823 3824let neverHasSideEffects = 1, SchedRW = [WriteMove] in { 3825def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3826 "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>, 3827 VEX; 3828def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 3829 "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>, 3830 VEX, VEX_L; 3831def VMOVDQUrr : VSSI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3832 "movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>, 3833 VEX; 3834def VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 3835 "movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>, 3836 VEX, VEX_L; 3837} 3838 3839// For Disassembler 3840let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, 3841 SchedRW = [WriteMove] in { 3842def VMOVDQArr_REV : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 3843 "movdqa\t{$src, $dst|$dst, $src}", [], 3844 IIC_SSE_MOVA_P_RR>, 3845 VEX; 3846def VMOVDQAYrr_REV : VPDI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), 3847 "movdqa\t{$src, $dst|$dst, $src}", [], 3848 IIC_SSE_MOVA_P_RR>, VEX, VEX_L; 3849def VMOVDQUrr_REV : VSSI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 3850 "movdqu\t{$src, $dst|$dst, $src}", [], 3851 IIC_SSE_MOVU_P_RR>, 3852 VEX; 3853def VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), 3854 "movdqu\t{$src, $dst|$dst, $src}", [], 3855 IIC_SSE_MOVU_P_RR>, VEX, VEX_L; 3856} 3857 3858let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, 3859 neverHasSideEffects = 1, SchedRW = [WriteLoad] in { 3860def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 3861 "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RM>, 3862 VEX; 3863def VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), 3864 "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RM>, 3865 VEX, VEX_L; 3866let Predicates = [HasAVX] in { 3867 def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 3868 "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_RM>, 3869 XS, VEX; 3870 def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), 3871 "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_RM>, 3872 XS, VEX, VEX_L; 3873} 3874} 3875 3876let mayStore = 1, neverHasSideEffects = 1, SchedRW = [WriteStore] in { 3877def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs), 3878 (ins i128mem:$dst, VR128:$src), 3879 "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_MR>, 3880 VEX; 3881def VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs), 3882 (ins i256mem:$dst, VR256:$src), 3883 "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_MR>, 3884 VEX, VEX_L; 3885let Predicates = [HasAVX] in { 3886def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), 3887 "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_MR>, 3888 XS, VEX; 3889def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src), 3890 "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_MR>, 3891 XS, VEX, VEX_L; 3892} 3893} 3894 3895let SchedRW = [WriteMove] in { 3896let neverHasSideEffects = 1 in 3897def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3898 "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>; 3899 3900def MOVDQUrr : I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3901 "movdqu\t{$src, $dst|$dst, $src}", 3902 [], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>; 3903 3904// For Disassembler 3905let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { 3906def MOVDQArr_REV : PDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 3907 "movdqa\t{$src, $dst|$dst, $src}", [], 3908 IIC_SSE_MOVA_P_RR>; 3909 3910def MOVDQUrr_REV : I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 3911 "movdqu\t{$src, $dst|$dst, $src}", 3912 [], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>; 3913} 3914} // SchedRW 3915 3916let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, 3917 neverHasSideEffects = 1, SchedRW = [WriteLoad] in { 3918def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 3919 "movdqa\t{$src, $dst|$dst, $src}", 3920 [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/], 3921 IIC_SSE_MOVA_P_RM>; 3922def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 3923 "movdqu\t{$src, $dst|$dst, $src}", 3924 [/*(set VR128:$dst, (loadv2i64 addr:$src))*/], 3925 IIC_SSE_MOVU_P_RM>, 3926 XS, Requires<[UseSSE2]>; 3927} 3928 3929let mayStore = 1, neverHasSideEffects = 1, SchedRW = [WriteStore] in { 3930def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), 3931 "movdqa\t{$src, $dst|$dst, $src}", 3932 [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/], 3933 IIC_SSE_MOVA_P_MR>; 3934def MOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), 3935 "movdqu\t{$src, $dst|$dst, $src}", 3936 [/*(store (v2i64 VR128:$src), addr:$dst)*/], 3937 IIC_SSE_MOVU_P_MR>, 3938 XS, Requires<[UseSSE2]>; 3939} 3940 3941} // ExeDomain = SSEPackedInt 3942 3943let Predicates = [HasAVX] in { 3944 def : Pat<(int_x86_sse2_storeu_dq addr:$dst, VR128:$src), 3945 (VMOVDQUmr addr:$dst, VR128:$src)>; 3946 def : Pat<(int_x86_avx_storeu_dq_256 addr:$dst, VR256:$src), 3947 (VMOVDQUYmr addr:$dst, VR256:$src)>; 3948} 3949let Predicates = [UseSSE2] in 3950def : Pat<(int_x86_sse2_storeu_dq addr:$dst, VR128:$src), 3951 (MOVDQUmr addr:$dst, VR128:$src)>; 3952 3953//===---------------------------------------------------------------------===// 3954// SSE2 - Packed Integer Arithmetic Instructions 3955//===---------------------------------------------------------------------===// 3956 3957let Sched = WriteVecIMul in 3958def SSE_PMADD : OpndItins< 3959 IIC_SSE_PMADD, IIC_SSE_PMADD 3960>; 3961 3962let ExeDomain = SSEPackedInt in { // SSE integer instructions 3963 3964multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId, 3965 RegisterClass RC, PatFrag memop_frag, 3966 X86MemOperand x86memop, 3967 OpndItins itins, 3968 bit IsCommutable = 0, 3969 bit Is2Addr = 1> { 3970 let isCommutable = IsCommutable in 3971 def rr : PDI<opc, MRMSrcReg, (outs RC:$dst), 3972 (ins RC:$src1, RC:$src2), 3973 !if(Is2Addr, 3974 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3975 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3976 [(set RC:$dst, (IntId RC:$src1, RC:$src2))], itins.rr>, 3977 Sched<[itins.Sched]>; 3978 def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), 3979 (ins RC:$src1, x86memop:$src2), 3980 !if(Is2Addr, 3981 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3982 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3983 [(set RC:$dst, (IntId RC:$src1, (bitconvert (memop_frag addr:$src2))))], 3984 itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; 3985} 3986 3987multiclass PDI_binop_all_int<bits<8> opc, string OpcodeStr, Intrinsic IntId128, 3988 Intrinsic IntId256, OpndItins itins, 3989 bit IsCommutable = 0> { 3990let Predicates = [HasAVX] in 3991 defm V#NAME : PDI_binop_rm_int<opc, !strconcat("v", OpcodeStr), IntId128, 3992 VR128, loadv2i64, i128mem, itins, 3993 IsCommutable, 0>, VEX_4V; 3994 3995let Constraints = "$src1 = $dst" in 3996 defm NAME : PDI_binop_rm_int<opc, OpcodeStr, IntId128, VR128, memopv2i64, 3997 i128mem, itins, IsCommutable, 1>; 3998 3999let Predicates = [HasAVX2] in 4000 defm V#NAME#Y : PDI_binop_rm_int<opc, !strconcat("v", OpcodeStr), IntId256, 4001 VR256, loadv4i64, i256mem, itins, 4002 IsCommutable, 0>, VEX_4V, VEX_L; 4003} 4004 4005multiclass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm, 4006 string OpcodeStr, SDNode OpNode, 4007 SDNode OpNode2, RegisterClass RC, 4008 ValueType DstVT, ValueType SrcVT, PatFrag bc_frag, 4009 ShiftOpndItins itins, 4010 bit Is2Addr = 1> { 4011 // src2 is always 128-bit 4012 def rr : PDI<opc, MRMSrcReg, (outs RC:$dst), 4013 (ins RC:$src1, VR128:$src2), 4014 !if(Is2Addr, 4015 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4016 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4017 [(set RC:$dst, (DstVT (OpNode RC:$src1, (SrcVT VR128:$src2))))], 4018 itins.rr>, Sched<[WriteVecShift]>; 4019 def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), 4020 (ins RC:$src1, i128mem:$src2), 4021 !if(Is2Addr, 4022 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4023 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4024 [(set RC:$dst, (DstVT (OpNode RC:$src1, 4025 (bc_frag (memopv2i64 addr:$src2)))))], itins.rm>, 4026 Sched<[WriteVecShiftLd, ReadAfterLd]>; 4027 def ri : PDIi8<opc2, ImmForm, (outs RC:$dst), 4028 (ins RC:$src1, i8imm:$src2), 4029 !if(Is2Addr, 4030 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4031 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4032 [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i8 imm:$src2))))], itins.ri>, 4033 Sched<[WriteVecShift]>; 4034} 4035 4036/// PDI_binop_rm2 - Simple SSE2 binary operator with different src and dst types 4037multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode, 4038 ValueType DstVT, ValueType SrcVT, RegisterClass RC, 4039 PatFrag memop_frag, X86MemOperand x86memop, 4040 OpndItins itins, 4041 bit IsCommutable = 0, bit Is2Addr = 1> { 4042 let isCommutable = IsCommutable in 4043 def rr : PDI<opc, MRMSrcReg, (outs RC:$dst), 4044 (ins RC:$src1, RC:$src2), 4045 !if(Is2Addr, 4046 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4047 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4048 [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>, 4049 Sched<[itins.Sched]>; 4050 def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), 4051 (ins RC:$src1, x86memop:$src2), 4052 !if(Is2Addr, 4053 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4054 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4055 [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), 4056 (bitconvert (memop_frag addr:$src2)))))]>, 4057 Sched<[itins.Sched.Folded, ReadAfterLd]>; 4058} 4059} // ExeDomain = SSEPackedInt 4060 4061defm PADDB : PDI_binop_all<0xFC, "paddb", add, v16i8, v32i8, 4062 SSE_INTALU_ITINS_P, 1>; 4063defm PADDW : PDI_binop_all<0xFD, "paddw", add, v8i16, v16i16, 4064 SSE_INTALU_ITINS_P, 1>; 4065defm PADDD : PDI_binop_all<0xFE, "paddd", add, v4i32, v8i32, 4066 SSE_INTALU_ITINS_P, 1>; 4067defm PADDQ : PDI_binop_all<0xD4, "paddq", add, v2i64, v4i64, 4068 SSE_INTALUQ_ITINS_P, 1>; 4069defm PMULLW : PDI_binop_all<0xD5, "pmullw", mul, v8i16, v16i16, 4070 SSE_INTMUL_ITINS_P, 1>; 4071defm PSUBB : PDI_binop_all<0xF8, "psubb", sub, v16i8, v32i8, 4072 SSE_INTALU_ITINS_P, 0>; 4073defm PSUBW : PDI_binop_all<0xF9, "psubw", sub, v8i16, v16i16, 4074 SSE_INTALU_ITINS_P, 0>; 4075defm PSUBD : PDI_binop_all<0xFA, "psubd", sub, v4i32, v8i32, 4076 SSE_INTALU_ITINS_P, 0>; 4077defm PSUBQ : PDI_binop_all<0xFB, "psubq", sub, v2i64, v4i64, 4078 SSE_INTALUQ_ITINS_P, 0>; 4079defm PSUBUSB : PDI_binop_all<0xD8, "psubusb", X86subus, v16i8, v32i8, 4080 SSE_INTALU_ITINS_P, 0>; 4081defm PSUBUSW : PDI_binop_all<0xD9, "psubusw", X86subus, v8i16, v16i16, 4082 SSE_INTALU_ITINS_P, 0>; 4083defm PMINUB : PDI_binop_all<0xDA, "pminub", X86umin, v16i8, v32i8, 4084 SSE_INTALU_ITINS_P, 1>; 4085defm PMINSW : PDI_binop_all<0xEA, "pminsw", X86smin, v8i16, v16i16, 4086 SSE_INTALU_ITINS_P, 1>; 4087defm PMAXUB : PDI_binop_all<0xDE, "pmaxub", X86umax, v16i8, v32i8, 4088 SSE_INTALU_ITINS_P, 1>; 4089defm PMAXSW : PDI_binop_all<0xEE, "pmaxsw", X86smax, v8i16, v16i16, 4090 SSE_INTALU_ITINS_P, 1>; 4091 4092// Intrinsic forms 4093defm PSUBSB : PDI_binop_all_int<0xE8, "psubsb", int_x86_sse2_psubs_b, 4094 int_x86_avx2_psubs_b, SSE_INTALU_ITINS_P, 0>; 4095defm PSUBSW : PDI_binop_all_int<0xE9, "psubsw" , int_x86_sse2_psubs_w, 4096 int_x86_avx2_psubs_w, SSE_INTALU_ITINS_P, 0>; 4097defm PADDSB : PDI_binop_all_int<0xEC, "paddsb" , int_x86_sse2_padds_b, 4098 int_x86_avx2_padds_b, SSE_INTALU_ITINS_P, 1>; 4099defm PADDSW : PDI_binop_all_int<0xED, "paddsw" , int_x86_sse2_padds_w, 4100 int_x86_avx2_padds_w, SSE_INTALU_ITINS_P, 1>; 4101defm PADDUSB : PDI_binop_all_int<0xDC, "paddusb", int_x86_sse2_paddus_b, 4102 int_x86_avx2_paddus_b, SSE_INTALU_ITINS_P, 1>; 4103defm PADDUSW : PDI_binop_all_int<0xDD, "paddusw", int_x86_sse2_paddus_w, 4104 int_x86_avx2_paddus_w, SSE_INTALU_ITINS_P, 1>; 4105defm PMULHUW : PDI_binop_all_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w, 4106 int_x86_avx2_pmulhu_w, SSE_INTMUL_ITINS_P, 1>; 4107defm PMULHW : PDI_binop_all_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w, 4108 int_x86_avx2_pmulh_w, SSE_INTMUL_ITINS_P, 1>; 4109defm PMADDWD : PDI_binop_all_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd, 4110 int_x86_avx2_pmadd_wd, SSE_PMADD, 1>; 4111defm PAVGB : PDI_binop_all_int<0xE0, "pavgb", int_x86_sse2_pavg_b, 4112 int_x86_avx2_pavg_b, SSE_INTALU_ITINS_P, 1>; 4113defm PAVGW : PDI_binop_all_int<0xE3, "pavgw", int_x86_sse2_pavg_w, 4114 int_x86_avx2_pavg_w, SSE_INTALU_ITINS_P, 1>; 4115defm PSADBW : PDI_binop_all_int<0xF6, "psadbw", int_x86_sse2_psad_bw, 4116 int_x86_avx2_psad_bw, SSE_PMADD, 1>; 4117 4118let Predicates = [HasAVX] in 4119defm VPMULUDQ : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v2i64, v4i32, VR128, 4120 loadv2i64, i128mem, SSE_INTMUL_ITINS_P, 1, 0>, 4121 VEX_4V; 4122let Predicates = [HasAVX2] in 4123defm VPMULUDQY : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v4i64, v8i32, 4124 VR256, loadv4i64, i256mem, 4125 SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L; 4126let Constraints = "$src1 = $dst" in 4127defm PMULUDQ : PDI_binop_rm2<0xF4, "pmuludq", X86pmuludq, v2i64, v4i32, VR128, 4128 memopv2i64, i128mem, SSE_INTMUL_ITINS_P, 1>; 4129 4130//===---------------------------------------------------------------------===// 4131// SSE2 - Packed Integer Logical Instructions 4132//===---------------------------------------------------------------------===// 4133 4134let Predicates = [HasAVX] in { 4135defm VPSLLW : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli, 4136 VR128, v8i16, v8i16, bc_v8i16, 4137 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; 4138defm VPSLLD : PDI_binop_rmi<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli, 4139 VR128, v4i32, v4i32, bc_v4i32, 4140 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; 4141defm VPSLLQ : PDI_binop_rmi<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli, 4142 VR128, v2i64, v2i64, bc_v2i64, 4143 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; 4144 4145defm VPSRLW : PDI_binop_rmi<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli, 4146 VR128, v8i16, v8i16, bc_v8i16, 4147 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; 4148defm VPSRLD : PDI_binop_rmi<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli, 4149 VR128, v4i32, v4i32, bc_v4i32, 4150 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; 4151defm VPSRLQ : PDI_binop_rmi<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli, 4152 VR128, v2i64, v2i64, bc_v2i64, 4153 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; 4154 4155defm VPSRAW : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai, 4156 VR128, v8i16, v8i16, bc_v8i16, 4157 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; 4158defm VPSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai, 4159 VR128, v4i32, v4i32, bc_v4i32, 4160 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; 4161 4162let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in { 4163 // 128-bit logical shifts. 4164 def VPSLLDQri : PDIi8<0x73, MRM7r, 4165 (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), 4166 "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4167 [(set VR128:$dst, 4168 (int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2))]>, 4169 VEX_4V; 4170 def VPSRLDQri : PDIi8<0x73, MRM3r, 4171 (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), 4172 "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4173 [(set VR128:$dst, 4174 (int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2))]>, 4175 VEX_4V; 4176 // PSRADQri doesn't exist in SSE[1-3]. 4177} 4178} // Predicates = [HasAVX] 4179 4180let Predicates = [HasAVX2] in { 4181defm VPSLLWY : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli, 4182 VR256, v16i16, v8i16, bc_v8i16, 4183 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; 4184defm VPSLLDY : PDI_binop_rmi<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli, 4185 VR256, v8i32, v4i32, bc_v4i32, 4186 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; 4187defm VPSLLQY : PDI_binop_rmi<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli, 4188 VR256, v4i64, v2i64, bc_v2i64, 4189 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; 4190 4191defm VPSRLWY : PDI_binop_rmi<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli, 4192 VR256, v16i16, v8i16, bc_v8i16, 4193 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; 4194defm VPSRLDY : PDI_binop_rmi<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli, 4195 VR256, v8i32, v4i32, bc_v4i32, 4196 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; 4197defm VPSRLQY : PDI_binop_rmi<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli, 4198 VR256, v4i64, v2i64, bc_v2i64, 4199 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; 4200 4201defm VPSRAWY : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai, 4202 VR256, v16i16, v8i16, bc_v8i16, 4203 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; 4204defm VPSRADY : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai, 4205 VR256, v8i32, v4i32, bc_v4i32, 4206 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; 4207 4208let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in { 4209 // 256-bit logical shifts. 4210 def VPSLLDQYri : PDIi8<0x73, MRM7r, 4211 (outs VR256:$dst), (ins VR256:$src1, i32i8imm:$src2), 4212 "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4213 [(set VR256:$dst, 4214 (int_x86_avx2_psll_dq_bs VR256:$src1, imm:$src2))]>, 4215 VEX_4V, VEX_L; 4216 def VPSRLDQYri : PDIi8<0x73, MRM3r, 4217 (outs VR256:$dst), (ins VR256:$src1, i32i8imm:$src2), 4218 "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4219 [(set VR256:$dst, 4220 (int_x86_avx2_psrl_dq_bs VR256:$src1, imm:$src2))]>, 4221 VEX_4V, VEX_L; 4222 // PSRADQYri doesn't exist in SSE[1-3]. 4223} 4224} // Predicates = [HasAVX2] 4225 4226let Constraints = "$src1 = $dst" in { 4227defm PSLLW : PDI_binop_rmi<0xF1, 0x71, MRM6r, "psllw", X86vshl, X86vshli, 4228 VR128, v8i16, v8i16, bc_v8i16, 4229 SSE_INTSHIFT_ITINS_P>; 4230defm PSLLD : PDI_binop_rmi<0xF2, 0x72, MRM6r, "pslld", X86vshl, X86vshli, 4231 VR128, v4i32, v4i32, bc_v4i32, 4232 SSE_INTSHIFT_ITINS_P>; 4233defm PSLLQ : PDI_binop_rmi<0xF3, 0x73, MRM6r, "psllq", X86vshl, X86vshli, 4234 VR128, v2i64, v2i64, bc_v2i64, 4235 SSE_INTSHIFT_ITINS_P>; 4236 4237defm PSRLW : PDI_binop_rmi<0xD1, 0x71, MRM2r, "psrlw", X86vsrl, X86vsrli, 4238 VR128, v8i16, v8i16, bc_v8i16, 4239 SSE_INTSHIFT_ITINS_P>; 4240defm PSRLD : PDI_binop_rmi<0xD2, 0x72, MRM2r, "psrld", X86vsrl, X86vsrli, 4241 VR128, v4i32, v4i32, bc_v4i32, 4242 SSE_INTSHIFT_ITINS_P>; 4243defm PSRLQ : PDI_binop_rmi<0xD3, 0x73, MRM2r, "psrlq", X86vsrl, X86vsrli, 4244 VR128, v2i64, v2i64, bc_v2i64, 4245 SSE_INTSHIFT_ITINS_P>; 4246 4247defm PSRAW : PDI_binop_rmi<0xE1, 0x71, MRM4r, "psraw", X86vsra, X86vsrai, 4248 VR128, v8i16, v8i16, bc_v8i16, 4249 SSE_INTSHIFT_ITINS_P>; 4250defm PSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai, 4251 VR128, v4i32, v4i32, bc_v4i32, 4252 SSE_INTSHIFT_ITINS_P>; 4253 4254let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in { 4255 // 128-bit logical shifts. 4256 def PSLLDQri : PDIi8<0x73, MRM7r, 4257 (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), 4258 "pslldq\t{$src2, $dst|$dst, $src2}", 4259 [(set VR128:$dst, 4260 (int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2))], 4261 IIC_SSE_INTSHDQ_P_RI>; 4262 def PSRLDQri : PDIi8<0x73, MRM3r, 4263 (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), 4264 "psrldq\t{$src2, $dst|$dst, $src2}", 4265 [(set VR128:$dst, 4266 (int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2))], 4267 IIC_SSE_INTSHDQ_P_RI>; 4268 // PSRADQri doesn't exist in SSE[1-3]. 4269} 4270} // Constraints = "$src1 = $dst" 4271 4272let Predicates = [HasAVX] in { 4273 def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2), 4274 (VPSLLDQri VR128:$src1, (BYTE_imm imm:$src2))>; 4275 def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2), 4276 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>; 4277 def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)), 4278 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>; 4279 4280 // Shift up / down and insert zero's. 4281 def : Pat<(v2i64 (X86vshldq VR128:$src, (i8 imm:$amt))), 4282 (VPSLLDQri VR128:$src, (BYTE_imm imm:$amt))>; 4283 def : Pat<(v2i64 (X86vshrdq VR128:$src, (i8 imm:$amt))), 4284 (VPSRLDQri VR128:$src, (BYTE_imm imm:$amt))>; 4285} 4286 4287let Predicates = [HasAVX2] in { 4288 def : Pat<(int_x86_avx2_psll_dq VR256:$src1, imm:$src2), 4289 (VPSLLDQYri VR256:$src1, (BYTE_imm imm:$src2))>; 4290 def : Pat<(int_x86_avx2_psrl_dq VR256:$src1, imm:$src2), 4291 (VPSRLDQYri VR256:$src1, (BYTE_imm imm:$src2))>; 4292} 4293 4294let Predicates = [UseSSE2] in { 4295 def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2), 4296 (PSLLDQri VR128:$src1, (BYTE_imm imm:$src2))>; 4297 def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2), 4298 (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>; 4299 def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)), 4300 (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>; 4301 4302 // Shift up / down and insert zero's. 4303 def : Pat<(v2i64 (X86vshldq VR128:$src, (i8 imm:$amt))), 4304 (PSLLDQri VR128:$src, (BYTE_imm imm:$amt))>; 4305 def : Pat<(v2i64 (X86vshrdq VR128:$src, (i8 imm:$amt))), 4306 (PSRLDQri VR128:$src, (BYTE_imm imm:$amt))>; 4307} 4308 4309//===---------------------------------------------------------------------===// 4310// SSE2 - Packed Integer Comparison Instructions 4311//===---------------------------------------------------------------------===// 4312 4313defm PCMPEQB : PDI_binop_all<0x74, "pcmpeqb", X86pcmpeq, v16i8, v32i8, 4314 SSE_INTALU_ITINS_P, 1>; 4315defm PCMPEQW : PDI_binop_all<0x75, "pcmpeqw", X86pcmpeq, v8i16, v16i16, 4316 SSE_INTALU_ITINS_P, 1>; 4317defm PCMPEQD : PDI_binop_all<0x76, "pcmpeqd", X86pcmpeq, v4i32, v8i32, 4318 SSE_INTALU_ITINS_P, 1>; 4319defm PCMPGTB : PDI_binop_all<0x64, "pcmpgtb", X86pcmpgt, v16i8, v32i8, 4320 SSE_INTALU_ITINS_P, 0>; 4321defm PCMPGTW : PDI_binop_all<0x65, "pcmpgtw", X86pcmpgt, v8i16, v16i16, 4322 SSE_INTALU_ITINS_P, 0>; 4323defm PCMPGTD : PDI_binop_all<0x66, "pcmpgtd", X86pcmpgt, v4i32, v8i32, 4324 SSE_INTALU_ITINS_P, 0>; 4325 4326//===---------------------------------------------------------------------===// 4327// SSE2 - Packed Integer Pack Instructions 4328//===---------------------------------------------------------------------===// 4329 4330defm PACKSSWB : PDI_binop_all_int<0x63, "packsswb", int_x86_sse2_packsswb_128, 4331 int_x86_avx2_packsswb, 4332 SSE_INTALU_ITINS_SHUFF_P, 0>; 4333defm PACKSSDW : PDI_binop_all_int<0x6B, "packssdw", int_x86_sse2_packssdw_128, 4334 int_x86_avx2_packssdw, 4335 SSE_INTALU_ITINS_SHUFF_P, 0>; 4336defm PACKUSWB : PDI_binop_all_int<0x67, "packuswb", int_x86_sse2_packuswb_128, 4337 int_x86_avx2_packuswb, 4338 SSE_INTALU_ITINS_SHUFF_P, 0>; 4339 4340//===---------------------------------------------------------------------===// 4341// SSE2 - Packed Integer Shuffle Instructions 4342//===---------------------------------------------------------------------===// 4343 4344let ExeDomain = SSEPackedInt in { 4345multiclass sse2_pshuffle<string OpcodeStr, ValueType vt128, ValueType vt256, 4346 SDNode OpNode> { 4347let Predicates = [HasAVX] in { 4348 def V#NAME#ri : Ii8<0x70, MRMSrcReg, (outs VR128:$dst), 4349 (ins VR128:$src1, i8imm:$src2), 4350 !strconcat("v", OpcodeStr, 4351 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4352 [(set VR128:$dst, 4353 (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))], 4354 IIC_SSE_PSHUF_RI>, VEX, Sched<[WriteShuffle]>; 4355 def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), 4356 (ins i128mem:$src1, i8imm:$src2), 4357 !strconcat("v", OpcodeStr, 4358 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4359 [(set VR128:$dst, 4360 (vt128 (OpNode (bitconvert (loadv2i64 addr:$src1)), 4361 (i8 imm:$src2))))], IIC_SSE_PSHUF_MI>, VEX, 4362 Sched<[WriteShuffleLd]>; 4363} 4364 4365let Predicates = [HasAVX2] in { 4366 def V#NAME#Yri : Ii8<0x70, MRMSrcReg, (outs VR256:$dst), 4367 (ins VR256:$src1, i8imm:$src2), 4368 !strconcat("v", OpcodeStr, 4369 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4370 [(set VR256:$dst, 4371 (vt256 (OpNode VR256:$src1, (i8 imm:$src2))))], 4372 IIC_SSE_PSHUF_RI>, VEX, VEX_L, Sched<[WriteShuffle]>; 4373 def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst), 4374 (ins i256mem:$src1, i8imm:$src2), 4375 !strconcat("v", OpcodeStr, 4376 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4377 [(set VR256:$dst, 4378 (vt256 (OpNode (bitconvert (loadv4i64 addr:$src1)), 4379 (i8 imm:$src2))))], IIC_SSE_PSHUF_MI>, VEX, VEX_L, 4380 Sched<[WriteShuffleLd]>; 4381} 4382 4383let Predicates = [UseSSE2] in { 4384 def ri : Ii8<0x70, MRMSrcReg, 4385 (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2), 4386 !strconcat(OpcodeStr, 4387 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4388 [(set VR128:$dst, 4389 (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))], 4390 IIC_SSE_PSHUF_RI>, Sched<[WriteShuffle]>; 4391 def mi : Ii8<0x70, MRMSrcMem, 4392 (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), 4393 !strconcat(OpcodeStr, 4394 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4395 [(set VR128:$dst, 4396 (vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)), 4397 (i8 imm:$src2))))], IIC_SSE_PSHUF_MI>, 4398 Sched<[WriteShuffleLd, ReadAfterLd]>; 4399} 4400} 4401} // ExeDomain = SSEPackedInt 4402 4403defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, v8i32, X86PShufd>, PD; 4404defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, v16i16, X86PShufhw>, XS; 4405defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, v16i16, X86PShuflw>, XD; 4406 4407let Predicates = [HasAVX] in { 4408 def : Pat<(v4f32 (X86PShufd (loadv4f32 addr:$src1), (i8 imm:$imm))), 4409 (VPSHUFDmi addr:$src1, imm:$imm)>; 4410 def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))), 4411 (VPSHUFDri VR128:$src1, imm:$imm)>; 4412} 4413 4414let Predicates = [UseSSE2] in { 4415 def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))), 4416 (PSHUFDmi addr:$src1, imm:$imm)>; 4417 def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))), 4418 (PSHUFDri VR128:$src1, imm:$imm)>; 4419} 4420 4421//===---------------------------------------------------------------------===// 4422// SSE2 - Packed Integer Unpack Instructions 4423//===---------------------------------------------------------------------===// 4424 4425let ExeDomain = SSEPackedInt in { 4426multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt, 4427 SDNode OpNode, PatFrag bc_frag, bit Is2Addr = 1> { 4428 def rr : PDI<opc, MRMSrcReg, 4429 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 4430 !if(Is2Addr, 4431 !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"), 4432 !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4433 [(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))], 4434 IIC_SSE_UNPCK>, Sched<[WriteShuffle]>; 4435 def rm : PDI<opc, MRMSrcMem, 4436 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), 4437 !if(Is2Addr, 4438 !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"), 4439 !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4440 [(set VR128:$dst, (OpNode VR128:$src1, 4441 (bc_frag (memopv2i64 4442 addr:$src2))))], 4443 IIC_SSE_UNPCK>, 4444 Sched<[WriteShuffleLd, ReadAfterLd]>; 4445} 4446 4447multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt, 4448 SDNode OpNode, PatFrag bc_frag> { 4449 def Yrr : PDI<opc, MRMSrcReg, 4450 (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), 4451 !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4452 [(set VR256:$dst, (vt (OpNode VR256:$src1, VR256:$src2)))]>, 4453 Sched<[WriteShuffle]>; 4454 def Yrm : PDI<opc, MRMSrcMem, 4455 (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), 4456 !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4457 [(set VR256:$dst, (OpNode VR256:$src1, 4458 (bc_frag (memopv4i64 addr:$src2))))]>, 4459 Sched<[WriteShuffleLd, ReadAfterLd]>; 4460} 4461 4462let Predicates = [HasAVX] in { 4463 defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Unpckl, 4464 bc_v16i8, 0>, VEX_4V; 4465 defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Unpckl, 4466 bc_v8i16, 0>, VEX_4V; 4467 defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl, 4468 bc_v4i32, 0>, VEX_4V; 4469 defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl, 4470 bc_v2i64, 0>, VEX_4V; 4471 4472 defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Unpckh, 4473 bc_v16i8, 0>, VEX_4V; 4474 defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Unpckh, 4475 bc_v8i16, 0>, VEX_4V; 4476 defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Unpckh, 4477 bc_v4i32, 0>, VEX_4V; 4478 defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Unpckh, 4479 bc_v2i64, 0>, VEX_4V; 4480} 4481 4482let Predicates = [HasAVX2] in { 4483 defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Unpckl, 4484 bc_v32i8>, VEX_4V, VEX_L; 4485 defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Unpckl, 4486 bc_v16i16>, VEX_4V, VEX_L; 4487 defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Unpckl, 4488 bc_v8i32>, VEX_4V, VEX_L; 4489 defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Unpckl, 4490 bc_v4i64>, VEX_4V, VEX_L; 4491 4492 defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Unpckh, 4493 bc_v32i8>, VEX_4V, VEX_L; 4494 defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Unpckh, 4495 bc_v16i16>, VEX_4V, VEX_L; 4496 defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Unpckh, 4497 bc_v8i32>, VEX_4V, VEX_L; 4498 defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Unpckh, 4499 bc_v4i64>, VEX_4V, VEX_L; 4500} 4501 4502let Constraints = "$src1 = $dst" in { 4503 defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Unpckl, 4504 bc_v16i8>; 4505 defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Unpckl, 4506 bc_v8i16>; 4507 defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Unpckl, 4508 bc_v4i32>; 4509 defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Unpckl, 4510 bc_v2i64>; 4511 4512 defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Unpckh, 4513 bc_v16i8>; 4514 defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Unpckh, 4515 bc_v8i16>; 4516 defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Unpckh, 4517 bc_v4i32>; 4518 defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Unpckh, 4519 bc_v2i64>; 4520} 4521} // ExeDomain = SSEPackedInt 4522 4523//===---------------------------------------------------------------------===// 4524// SSE2 - Packed Integer Extract and Insert 4525//===---------------------------------------------------------------------===// 4526 4527let ExeDomain = SSEPackedInt in { 4528multiclass sse2_pinsrw<bit Is2Addr = 1> { 4529 def rri : Ii8<0xC4, MRMSrcReg, 4530 (outs VR128:$dst), (ins VR128:$src1, 4531 GR32orGR64:$src2, i32i8imm:$src3), 4532 !if(Is2Addr, 4533 "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", 4534 "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 4535 [(set VR128:$dst, 4536 (X86pinsrw VR128:$src1, GR32orGR64:$src2, imm:$src3))], 4537 IIC_SSE_PINSRW>, Sched<[WriteShuffle]>; 4538 def rmi : Ii8<0xC4, MRMSrcMem, 4539 (outs VR128:$dst), (ins VR128:$src1, 4540 i16mem:$src2, i32i8imm:$src3), 4541 !if(Is2Addr, 4542 "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", 4543 "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 4544 [(set VR128:$dst, 4545 (X86pinsrw VR128:$src1, (extloadi16 addr:$src2), 4546 imm:$src3))], IIC_SSE_PINSRW>, 4547 Sched<[WriteShuffleLd, ReadAfterLd]>; 4548} 4549 4550// Extract 4551let Predicates = [HasAVX] in 4552def VPEXTRWri : Ii8<0xC5, MRMSrcReg, 4553 (outs GR32orGR64:$dst), (ins VR128:$src1, i32i8imm:$src2), 4554 "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4555 [(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1), 4556 imm:$src2))]>, PD, VEX, 4557 Sched<[WriteShuffle]>; 4558def PEXTRWri : PDIi8<0xC5, MRMSrcReg, 4559 (outs GR32orGR64:$dst), (ins VR128:$src1, i32i8imm:$src2), 4560 "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4561 [(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1), 4562 imm:$src2))], IIC_SSE_PEXTRW>, 4563 Sched<[WriteShuffleLd, ReadAfterLd]>; 4564 4565// Insert 4566let Predicates = [HasAVX] in 4567defm VPINSRW : sse2_pinsrw<0>, PD, VEX_4V; 4568 4569let Predicates = [UseSSE2], Constraints = "$src1 = $dst" in 4570defm PINSRW : sse2_pinsrw, PD; 4571 4572} // ExeDomain = SSEPackedInt 4573 4574//===---------------------------------------------------------------------===// 4575// SSE2 - Packed Mask Creation 4576//===---------------------------------------------------------------------===// 4577 4578let ExeDomain = SSEPackedInt, SchedRW = [WriteVecLogic] in { 4579 4580def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), 4581 (ins VR128:$src), 4582 "pmovmskb\t{$src, $dst|$dst, $src}", 4583 [(set GR32orGR64:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))], 4584 IIC_SSE_MOVMSK>, VEX; 4585 4586let Predicates = [HasAVX2] in { 4587def VPMOVMSKBYrr : VPDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), 4588 (ins VR256:$src), 4589 "pmovmskb\t{$src, $dst|$dst, $src}", 4590 [(set GR32orGR64:$dst, (int_x86_avx2_pmovmskb VR256:$src))]>, 4591 VEX, VEX_L; 4592} 4593 4594def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), (ins VR128:$src), 4595 "pmovmskb\t{$src, $dst|$dst, $src}", 4596 [(set GR32orGR64:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))], 4597 IIC_SSE_MOVMSK>; 4598 4599} // ExeDomain = SSEPackedInt 4600 4601//===---------------------------------------------------------------------===// 4602// SSE2 - Conditional Store 4603//===---------------------------------------------------------------------===// 4604 4605let ExeDomain = SSEPackedInt, SchedRW = [WriteStore] in { 4606 4607let Uses = [EDI], Predicates = [HasAVX,Not64BitMode] in 4608def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs), 4609 (ins VR128:$src, VR128:$mask), 4610 "maskmovdqu\t{$mask, $src|$src, $mask}", 4611 [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)], 4612 IIC_SSE_MASKMOV>, VEX; 4613let Uses = [RDI], Predicates = [HasAVX,In64BitMode] in 4614def VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs), 4615 (ins VR128:$src, VR128:$mask), 4616 "maskmovdqu\t{$mask, $src|$src, $mask}", 4617 [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)], 4618 IIC_SSE_MASKMOV>, VEX; 4619 4620let Uses = [EDI], Predicates = [UseSSE2,Not64BitMode] in 4621def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), 4622 "maskmovdqu\t{$mask, $src|$src, $mask}", 4623 [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)], 4624 IIC_SSE_MASKMOV>; 4625let Uses = [RDI], Predicates = [UseSSE2,In64BitMode] in 4626def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), 4627 "maskmovdqu\t{$mask, $src|$src, $mask}", 4628 [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)], 4629 IIC_SSE_MASKMOV>; 4630 4631} // ExeDomain = SSEPackedInt 4632 4633//===---------------------------------------------------------------------===// 4634// SSE2 - Move Doubleword 4635//===---------------------------------------------------------------------===// 4636 4637//===---------------------------------------------------------------------===// 4638// Move Int Doubleword to Packed Double Int 4639// 4640def VMOVDI2PDIrr : VS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), 4641 "movd\t{$src, $dst|$dst, $src}", 4642 [(set VR128:$dst, 4643 (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>, 4644 VEX, Sched<[WriteMove]>; 4645def VMOVDI2PDIrm : VS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), 4646 "movd\t{$src, $dst|$dst, $src}", 4647 [(set VR128:$dst, 4648 (v4i32 (scalar_to_vector (loadi32 addr:$src))))], 4649 IIC_SSE_MOVDQ>, 4650 VEX, Sched<[WriteLoad]>; 4651def VMOV64toPQIrr : VRS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), 4652 "movq\t{$src, $dst|$dst, $src}", 4653 [(set VR128:$dst, 4654 (v2i64 (scalar_to_vector GR64:$src)))], 4655 IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>; 4656let isCodeGenOnly = 1 in 4657def VMOV64toSDrr : VRS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), 4658 "movq\t{$src, $dst|$dst, $src}", 4659 [(set FR64:$dst, (bitconvert GR64:$src))], 4660 IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>; 4661 4662def MOVDI2PDIrr : S2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), 4663 "movd\t{$src, $dst|$dst, $src}", 4664 [(set VR128:$dst, 4665 (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>, 4666 Sched<[WriteMove]>; 4667def MOVDI2PDIrm : S2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), 4668 "movd\t{$src, $dst|$dst, $src}", 4669 [(set VR128:$dst, 4670 (v4i32 (scalar_to_vector (loadi32 addr:$src))))], 4671 IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; 4672def MOV64toPQIrr : RS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), 4673 "mov{d|q}\t{$src, $dst|$dst, $src}", 4674 [(set VR128:$dst, 4675 (v2i64 (scalar_to_vector GR64:$src)))], 4676 IIC_SSE_MOVDQ>, Sched<[WriteMove]>; 4677let isCodeGenOnly = 1 in 4678def MOV64toSDrr : RS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), 4679 "mov{d|q}\t{$src, $dst|$dst, $src}", 4680 [(set FR64:$dst, (bitconvert GR64:$src))], 4681 IIC_SSE_MOVDQ>, Sched<[WriteMove]>; 4682 4683//===---------------------------------------------------------------------===// 4684// Move Int Doubleword to Single Scalar 4685// 4686let isCodeGenOnly = 1 in { 4687 def VMOVDI2SSrr : VS2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), 4688 "movd\t{$src, $dst|$dst, $src}", 4689 [(set FR32:$dst, (bitconvert GR32:$src))], 4690 IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>; 4691 4692 def VMOVDI2SSrm : VS2I<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), 4693 "movd\t{$src, $dst|$dst, $src}", 4694 [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))], 4695 IIC_SSE_MOVDQ>, 4696 VEX, Sched<[WriteLoad]>; 4697 def MOVDI2SSrr : S2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), 4698 "movd\t{$src, $dst|$dst, $src}", 4699 [(set FR32:$dst, (bitconvert GR32:$src))], 4700 IIC_SSE_MOVDQ>, Sched<[WriteMove]>; 4701 4702 def MOVDI2SSrm : S2I<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), 4703 "movd\t{$src, $dst|$dst, $src}", 4704 [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))], 4705 IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; 4706} 4707 4708//===---------------------------------------------------------------------===// 4709// Move Packed Doubleword Int to Packed Double Int 4710// 4711def VMOVPDI2DIrr : VS2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), 4712 "movd\t{$src, $dst|$dst, $src}", 4713 [(set GR32:$dst, (vector_extract (v4i32 VR128:$src), 4714 (iPTR 0)))], IIC_SSE_MOVD_ToGP>, VEX, 4715 Sched<[WriteMove]>; 4716def VMOVPDI2DImr : VS2I<0x7E, MRMDestMem, (outs), 4717 (ins i32mem:$dst, VR128:$src), 4718 "movd\t{$src, $dst|$dst, $src}", 4719 [(store (i32 (vector_extract (v4i32 VR128:$src), 4720 (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>, 4721 VEX, Sched<[WriteStore]>; 4722def MOVPDI2DIrr : S2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), 4723 "movd\t{$src, $dst|$dst, $src}", 4724 [(set GR32:$dst, (vector_extract (v4i32 VR128:$src), 4725 (iPTR 0)))], IIC_SSE_MOVD_ToGP>, 4726 Sched<[WriteMove]>; 4727def MOVPDI2DImr : S2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), 4728 "movd\t{$src, $dst|$dst, $src}", 4729 [(store (i32 (vector_extract (v4i32 VR128:$src), 4730 (iPTR 0))), addr:$dst)], 4731 IIC_SSE_MOVDQ>, Sched<[WriteStore]>; 4732 4733def : Pat<(v8i32 (X86Vinsert (v8i32 immAllZerosV), GR32:$src2, (iPTR 0))), 4734 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIrr GR32:$src2), sub_xmm)>; 4735 4736def : Pat<(v4i64 (X86Vinsert (bc_v4i64 (v8i32 immAllZerosV)), GR64:$src2, (iPTR 0))), 4737 (SUBREG_TO_REG (i32 0), (VMOV64toPQIrr GR64:$src2), sub_xmm)>; 4738 4739def : Pat<(v8i32 (X86Vinsert undef, GR32:$src2, (iPTR 0))), 4740 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIrr GR32:$src2), sub_xmm)>; 4741 4742def : Pat<(v4i64 (X86Vinsert undef, GR64:$src2, (iPTR 0))), 4743 (SUBREG_TO_REG (i32 0), (VMOV64toPQIrr GR64:$src2), sub_xmm)>; 4744 4745//===---------------------------------------------------------------------===// 4746// Move Packed Doubleword Int first element to Doubleword Int 4747// 4748let SchedRW = [WriteMove] in { 4749def VMOVPQIto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), 4750 "movq\t{$src, $dst|$dst, $src}", 4751 [(set GR64:$dst, (vector_extract (v2i64 VR128:$src), 4752 (iPTR 0)))], 4753 IIC_SSE_MOVD_ToGP>, 4754 VEX; 4755 4756def MOVPQIto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), 4757 "mov{d|q}\t{$src, $dst|$dst, $src}", 4758 [(set GR64:$dst, (vector_extract (v2i64 VR128:$src), 4759 (iPTR 0)))], 4760 IIC_SSE_MOVD_ToGP>; 4761} //SchedRW 4762 4763//===---------------------------------------------------------------------===// 4764// Bitcast FR64 <-> GR64 4765// 4766let isCodeGenOnly = 1 in { 4767 let Predicates = [UseAVX] in 4768 def VMOV64toSDrm : VS2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), 4769 "movq\t{$src, $dst|$dst, $src}", 4770 [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>, 4771 VEX, Sched<[WriteLoad]>; 4772 def VMOVSDto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), 4773 "movq\t{$src, $dst|$dst, $src}", 4774 [(set GR64:$dst, (bitconvert FR64:$src))], 4775 IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>; 4776 def VMOVSDto64mr : VRS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), 4777 "movq\t{$src, $dst|$dst, $src}", 4778 [(store (i64 (bitconvert FR64:$src)), addr:$dst)], 4779 IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>; 4780 4781 def MOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), 4782 "movq\t{$src, $dst|$dst, $src}", 4783 [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))], 4784 IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; 4785 def MOVSDto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), 4786 "mov{d|q}\t{$src, $dst|$dst, $src}", 4787 [(set GR64:$dst, (bitconvert FR64:$src))], 4788 IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>; 4789 def MOVSDto64mr : RS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), 4790 "movq\t{$src, $dst|$dst, $src}", 4791 [(store (i64 (bitconvert FR64:$src)), addr:$dst)], 4792 IIC_SSE_MOVDQ>, Sched<[WriteStore]>; 4793} 4794 4795//===---------------------------------------------------------------------===// 4796// Move Scalar Single to Double Int 4797// 4798let isCodeGenOnly = 1 in { 4799 def VMOVSS2DIrr : VS2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), 4800 "movd\t{$src, $dst|$dst, $src}", 4801 [(set GR32:$dst, (bitconvert FR32:$src))], 4802 IIC_SSE_MOVD_ToGP>, VEX, Sched<[WriteMove]>; 4803 def VMOVSS2DImr : VS2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src), 4804 "movd\t{$src, $dst|$dst, $src}", 4805 [(store (i32 (bitconvert FR32:$src)), addr:$dst)], 4806 IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>; 4807 def MOVSS2DIrr : S2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), 4808 "movd\t{$src, $dst|$dst, $src}", 4809 [(set GR32:$dst, (bitconvert FR32:$src))], 4810 IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>; 4811 def MOVSS2DImr : S2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src), 4812 "movd\t{$src, $dst|$dst, $src}", 4813 [(store (i32 (bitconvert FR32:$src)), addr:$dst)], 4814 IIC_SSE_MOVDQ>, Sched<[WriteStore]>; 4815} 4816 4817//===---------------------------------------------------------------------===// 4818// Patterns and instructions to describe movd/movq to XMM register zero-extends 4819// 4820let isCodeGenOnly = 1, SchedRW = [WriteMove] in { 4821let AddedComplexity = 15 in { 4822def VMOVZQI2PQIrr : VS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), 4823 "movq\t{$src, $dst|$dst, $src}", // X86-64 only 4824 [(set VR128:$dst, (v2i64 (X86vzmovl 4825 (v2i64 (scalar_to_vector GR64:$src)))))], 4826 IIC_SSE_MOVDQ>, 4827 VEX, VEX_W; 4828def MOVZQI2PQIrr : RS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), 4829 "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only 4830 [(set VR128:$dst, (v2i64 (X86vzmovl 4831 (v2i64 (scalar_to_vector GR64:$src)))))], 4832 IIC_SSE_MOVDQ>; 4833} 4834} // isCodeGenOnly, SchedRW 4835 4836let Predicates = [UseAVX] in { 4837 let AddedComplexity = 15 in 4838 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), 4839 (VMOVDI2PDIrr GR32:$src)>; 4840 4841 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part. 4842 let AddedComplexity = 20 in { 4843 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))), 4844 (VMOVDI2PDIrm addr:$src)>; 4845 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))), 4846 (VMOVDI2PDIrm addr:$src)>; 4847 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), 4848 (VMOVDI2PDIrm addr:$src)>; 4849 } 4850 // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext. 4851 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, 4852 (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))), 4853 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIrr GR32:$src), sub_xmm)>; 4854 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, 4855 (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))), 4856 (SUBREG_TO_REG (i64 0), (VMOVZQI2PQIrr GR64:$src), sub_xmm)>; 4857} 4858 4859let Predicates = [UseSSE2] in { 4860 let AddedComplexity = 15 in 4861 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), 4862 (MOVDI2PDIrr GR32:$src)>; 4863 4864 let AddedComplexity = 20 in { 4865 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))), 4866 (MOVDI2PDIrm addr:$src)>; 4867 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))), 4868 (MOVDI2PDIrm addr:$src)>; 4869 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), 4870 (MOVDI2PDIrm addr:$src)>; 4871 } 4872} 4873 4874// These are the correct encodings of the instructions so that we know how to 4875// read correct assembly, even though we continue to emit the wrong ones for 4876// compatibility with Darwin's buggy assembler. 4877def : InstAlias<"movq\t{$src, $dst|$dst, $src}", 4878 (MOV64toPQIrr VR128:$dst, GR64:$src), 0>; 4879def : InstAlias<"movq\t{$src, $dst|$dst, $src}", 4880 (MOVPQIto64rr GR64:$dst, VR128:$src), 0>; 4881// Allow "vmovd" but print "vmovq" since we don't need compatibility for AVX. 4882def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 4883 (VMOV64toPQIrr VR128:$dst, GR64:$src), 0>; 4884def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 4885 (VMOVPQIto64rr GR64:$dst, VR128:$src), 0>; 4886 4887//===---------------------------------------------------------------------===// 4888// SSE2 - Move Quadword 4889//===---------------------------------------------------------------------===// 4890 4891//===---------------------------------------------------------------------===// 4892// Move Quadword Int to Packed Quadword Int 4893// 4894 4895let SchedRW = [WriteLoad] in { 4896def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 4897 "vmovq\t{$src, $dst|$dst, $src}", 4898 [(set VR128:$dst, 4899 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS, 4900 VEX, Requires<[UseAVX]>; 4901def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 4902 "movq\t{$src, $dst|$dst, $src}", 4903 [(set VR128:$dst, 4904 (v2i64 (scalar_to_vector (loadi64 addr:$src))))], 4905 IIC_SSE_MOVDQ>, XS, 4906 Requires<[UseSSE2]>; // SSE2 instruction with XS Prefix 4907} // SchedRW 4908 4909//===---------------------------------------------------------------------===// 4910// Move Packed Quadword Int to Quadword Int 4911// 4912let SchedRW = [WriteStore] in { 4913def VMOVPQI2QImr : VS2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), 4914 "movq\t{$src, $dst|$dst, $src}", 4915 [(store (i64 (vector_extract (v2i64 VR128:$src), 4916 (iPTR 0))), addr:$dst)], 4917 IIC_SSE_MOVDQ>, VEX; 4918def MOVPQI2QImr : S2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), 4919 "movq\t{$src, $dst|$dst, $src}", 4920 [(store (i64 (vector_extract (v2i64 VR128:$src), 4921 (iPTR 0))), addr:$dst)], 4922 IIC_SSE_MOVDQ>; 4923} // SchedRW 4924 4925// For disassembler only 4926let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, 4927 SchedRW = [WriteVecLogic] in { 4928def VMOVPQI2QIrr : VS2I<0xD6, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 4929 "movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVQ_RR>, VEX; 4930def MOVPQI2QIrr : S2I<0xD6, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 4931 "movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVQ_RR>; 4932} 4933 4934//===---------------------------------------------------------------------===// 4935// Store / copy lower 64-bits of a XMM register. 4936// 4937let Predicates = [UseAVX] in 4938def : Pat<(int_x86_sse2_storel_dq addr:$dst, VR128:$src), 4939 (VMOVPQI2QImr addr:$dst, VR128:$src)>; 4940let Predicates = [UseSSE2] in 4941def : Pat<(int_x86_sse2_storel_dq addr:$dst, VR128:$src), 4942 (MOVPQI2QImr addr:$dst, VR128:$src)>; 4943 4944let isCodeGenOnly = 1, AddedComplexity = 20 in { 4945def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 4946 "vmovq\t{$src, $dst|$dst, $src}", 4947 [(set VR128:$dst, 4948 (v2i64 (X86vzmovl (v2i64 (scalar_to_vector 4949 (loadi64 addr:$src))))))], 4950 IIC_SSE_MOVDQ>, 4951 XS, VEX, Requires<[UseAVX]>, Sched<[WriteLoad]>; 4952 4953def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 4954 "movq\t{$src, $dst|$dst, $src}", 4955 [(set VR128:$dst, 4956 (v2i64 (X86vzmovl (v2i64 (scalar_to_vector 4957 (loadi64 addr:$src))))))], 4958 IIC_SSE_MOVDQ>, 4959 XS, Requires<[UseSSE2]>, Sched<[WriteLoad]>; 4960} 4961 4962let Predicates = [UseAVX], AddedComplexity = 20 in { 4963 def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))), 4964 (VMOVZQI2PQIrm addr:$src)>; 4965 def : Pat<(v2i64 (X86vzload addr:$src)), 4966 (VMOVZQI2PQIrm addr:$src)>; 4967} 4968 4969let Predicates = [UseSSE2], AddedComplexity = 20 in { 4970 def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))), 4971 (MOVZQI2PQIrm addr:$src)>; 4972 def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>; 4973} 4974 4975let Predicates = [HasAVX] in { 4976def : Pat<(v4i64 (alignedX86vzload addr:$src)), 4977 (SUBREG_TO_REG (i32 0), (VMOVAPSrm addr:$src), sub_xmm)>; 4978def : Pat<(v4i64 (X86vzload addr:$src)), 4979 (SUBREG_TO_REG (i32 0), (VMOVUPSrm addr:$src), sub_xmm)>; 4980} 4981 4982//===---------------------------------------------------------------------===// 4983// Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in 4984// IA32 document. movq xmm1, xmm2 does clear the high bits. 4985// 4986let SchedRW = [WriteVecLogic] in { 4987let AddedComplexity = 15 in 4988def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 4989 "vmovq\t{$src, $dst|$dst, $src}", 4990 [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))], 4991 IIC_SSE_MOVQ_RR>, 4992 XS, VEX, Requires<[UseAVX]>; 4993let AddedComplexity = 15 in 4994def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 4995 "movq\t{$src, $dst|$dst, $src}", 4996 [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))], 4997 IIC_SSE_MOVQ_RR>, 4998 XS, Requires<[UseSSE2]>; 4999} // SchedRW 5000 5001let isCodeGenOnly = 1, SchedRW = [WriteVecLogicLd] in { 5002let AddedComplexity = 20 in 5003def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 5004 "vmovq\t{$src, $dst|$dst, $src}", 5005 [(set VR128:$dst, (v2i64 (X86vzmovl 5006 (loadv2i64 addr:$src))))], 5007 IIC_SSE_MOVDQ>, 5008 XS, VEX, Requires<[UseAVX]>; 5009let AddedComplexity = 20 in { 5010def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 5011 "movq\t{$src, $dst|$dst, $src}", 5012 [(set VR128:$dst, (v2i64 (X86vzmovl 5013 (loadv2i64 addr:$src))))], 5014 IIC_SSE_MOVDQ>, 5015 XS, Requires<[UseSSE2]>; 5016} 5017} // isCodeGenOnly, SchedRW 5018 5019let AddedComplexity = 20 in { 5020 let Predicates = [UseAVX] in { 5021 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), 5022 (VMOVZPQILo2PQIrr VR128:$src)>; 5023 } 5024 let Predicates = [UseSSE2] in { 5025 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), 5026 (MOVZPQILo2PQIrr VR128:$src)>; 5027 } 5028} 5029 5030//===---------------------------------------------------------------------===// 5031// SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP 5032//===---------------------------------------------------------------------===// 5033multiclass sse3_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr, 5034 ValueType vt, RegisterClass RC, PatFrag mem_frag, 5035 X86MemOperand x86memop> { 5036def rr : S3SI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src), 5037 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5038 [(set RC:$dst, (vt (OpNode RC:$src)))], 5039 IIC_SSE_MOV_LH>, Sched<[WriteFShuffle]>; 5040def rm : S3SI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 5041 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5042 [(set RC:$dst, (OpNode (mem_frag addr:$src)))], 5043 IIC_SSE_MOV_LH>, Sched<[WriteLoad]>; 5044} 5045 5046let Predicates = [HasAVX] in { 5047 defm VMOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup", 5048 v4f32, VR128, loadv4f32, f128mem>, VEX; 5049 defm VMOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup", 5050 v4f32, VR128, loadv4f32, f128mem>, VEX; 5051 defm VMOVSHDUPY : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup", 5052 v8f32, VR256, loadv8f32, f256mem>, VEX, VEX_L; 5053 defm VMOVSLDUPY : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup", 5054 v8f32, VR256, loadv8f32, f256mem>, VEX, VEX_L; 5055} 5056defm MOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "movshdup", v4f32, VR128, 5057 memopv4f32, f128mem>; 5058defm MOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "movsldup", v4f32, VR128, 5059 memopv4f32, f128mem>; 5060 5061let Predicates = [HasAVX] in { 5062 def : Pat<(v4i32 (X86Movshdup VR128:$src)), 5063 (VMOVSHDUPrr VR128:$src)>; 5064 def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (loadv2i64 addr:$src)))), 5065 (VMOVSHDUPrm addr:$src)>; 5066 def : Pat<(v4i32 (X86Movsldup VR128:$src)), 5067 (VMOVSLDUPrr VR128:$src)>; 5068 def : Pat<(v4i32 (X86Movsldup (bc_v4i32 (loadv2i64 addr:$src)))), 5069 (VMOVSLDUPrm addr:$src)>; 5070 def : Pat<(v8i32 (X86Movshdup VR256:$src)), 5071 (VMOVSHDUPYrr VR256:$src)>; 5072 def : Pat<(v8i32 (X86Movshdup (bc_v8i32 (loadv4i64 addr:$src)))), 5073 (VMOVSHDUPYrm addr:$src)>; 5074 def : Pat<(v8i32 (X86Movsldup VR256:$src)), 5075 (VMOVSLDUPYrr VR256:$src)>; 5076 def : Pat<(v8i32 (X86Movsldup (bc_v8i32 (loadv4i64 addr:$src)))), 5077 (VMOVSLDUPYrm addr:$src)>; 5078} 5079 5080let Predicates = [UseSSE3] in { 5081 def : Pat<(v4i32 (X86Movshdup VR128:$src)), 5082 (MOVSHDUPrr VR128:$src)>; 5083 def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (memopv2i64 addr:$src)))), 5084 (MOVSHDUPrm addr:$src)>; 5085 def : Pat<(v4i32 (X86Movsldup VR128:$src)), 5086 (MOVSLDUPrr VR128:$src)>; 5087 def : Pat<(v4i32 (X86Movsldup (bc_v4i32 (memopv2i64 addr:$src)))), 5088 (MOVSLDUPrm addr:$src)>; 5089} 5090 5091//===---------------------------------------------------------------------===// 5092// SSE3 - Replicate Double FP - MOVDDUP 5093//===---------------------------------------------------------------------===// 5094 5095multiclass sse3_replicate_dfp<string OpcodeStr> { 5096let neverHasSideEffects = 1 in 5097def rr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 5098 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5099 [], IIC_SSE_MOV_LH>, Sched<[WriteFShuffle]>; 5100def rm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), 5101 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5102 [(set VR128:$dst, 5103 (v2f64 (X86Movddup 5104 (scalar_to_vector (loadf64 addr:$src)))))], 5105 IIC_SSE_MOV_LH>, Sched<[WriteLoad]>; 5106} 5107 5108// FIXME: Merge with above classe when there're patterns for the ymm version 5109multiclass sse3_replicate_dfp_y<string OpcodeStr> { 5110def rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 5111 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5112 [(set VR256:$dst, (v4f64 (X86Movddup VR256:$src)))]>, 5113 Sched<[WriteFShuffle]>; 5114def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 5115 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5116 [(set VR256:$dst, 5117 (v4f64 (X86Movddup 5118 (scalar_to_vector (loadf64 addr:$src)))))]>, 5119 Sched<[WriteLoad]>; 5120} 5121 5122let Predicates = [HasAVX] in { 5123 defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX; 5124 defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX, VEX_L; 5125} 5126 5127defm MOVDDUP : sse3_replicate_dfp<"movddup">; 5128 5129let Predicates = [HasAVX] in { 5130 def : Pat<(X86Movddup (loadv2f64 addr:$src)), 5131 (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; 5132 def : Pat<(X86Movddup (bc_v2f64 (loadv4f32 addr:$src))), 5133 (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; 5134 def : Pat<(X86Movddup (bc_v2f64 (loadv2i64 addr:$src))), 5135 (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; 5136 def : Pat<(X86Movddup (bc_v2f64 5137 (v2i64 (scalar_to_vector (loadi64 addr:$src))))), 5138 (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; 5139 5140 // 256-bit version 5141 def : Pat<(X86Movddup (loadv4f64 addr:$src)), 5142 (VMOVDDUPYrm addr:$src)>; 5143 def : Pat<(X86Movddup (loadv4i64 addr:$src)), 5144 (VMOVDDUPYrm addr:$src)>; 5145 def : Pat<(X86Movddup (v4i64 (scalar_to_vector (loadi64 addr:$src)))), 5146 (VMOVDDUPYrm addr:$src)>; 5147 def : Pat<(X86Movddup (v4i64 VR256:$src)), 5148 (VMOVDDUPYrr VR256:$src)>; 5149} 5150 5151let Predicates = [UseSSE3] in { 5152 def : Pat<(X86Movddup (memopv2f64 addr:$src)), 5153 (MOVDDUPrm addr:$src)>; 5154 def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))), 5155 (MOVDDUPrm addr:$src)>; 5156 def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))), 5157 (MOVDDUPrm addr:$src)>; 5158 def : Pat<(X86Movddup (bc_v2f64 5159 (v2i64 (scalar_to_vector (loadi64 addr:$src))))), 5160 (MOVDDUPrm addr:$src)>; 5161} 5162 5163//===---------------------------------------------------------------------===// 5164// SSE3 - Move Unaligned Integer 5165//===---------------------------------------------------------------------===// 5166 5167let SchedRW = [WriteLoad] in { 5168let Predicates = [HasAVX] in { 5169 def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 5170 "vlddqu\t{$src, $dst|$dst, $src}", 5171 [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX; 5172 def VLDDQUYrm : S3DI<0xF0, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), 5173 "vlddqu\t{$src, $dst|$dst, $src}", 5174 [(set VR256:$dst, (int_x86_avx_ldu_dq_256 addr:$src))]>, 5175 VEX, VEX_L; 5176} 5177def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 5178 "lddqu\t{$src, $dst|$dst, $src}", 5179 [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))], 5180 IIC_SSE_LDDQU>; 5181} 5182 5183//===---------------------------------------------------------------------===// 5184// SSE3 - Arithmetic 5185//===---------------------------------------------------------------------===// 5186 5187multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, RegisterClass RC, 5188 X86MemOperand x86memop, OpndItins itins, 5189 bit Is2Addr = 1> { 5190 def rr : I<0xD0, MRMSrcReg, 5191 (outs RC:$dst), (ins RC:$src1, RC:$src2), 5192 !if(Is2Addr, 5193 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5194 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5195 [(set RC:$dst, (Int RC:$src1, RC:$src2))], itins.rr>, 5196 Sched<[itins.Sched]>; 5197 def rm : I<0xD0, MRMSrcMem, 5198 (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 5199 !if(Is2Addr, 5200 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5201 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5202 [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))], itins.rr>, 5203 Sched<[itins.Sched.Folded, ReadAfterLd]>; 5204} 5205 5206let Predicates = [HasAVX] in { 5207 let ExeDomain = SSEPackedSingle in { 5208 defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", VR128, 5209 f128mem, SSE_ALU_F32P, 0>, XD, VEX_4V; 5210 defm VADDSUBPSY : sse3_addsub<int_x86_avx_addsub_ps_256, "vaddsubps", VR256, 5211 f256mem, SSE_ALU_F32P, 0>, XD, VEX_4V, VEX_L; 5212 } 5213 let ExeDomain = SSEPackedDouble in { 5214 defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", VR128, 5215 f128mem, SSE_ALU_F64P, 0>, PD, VEX_4V; 5216 defm VADDSUBPDY : sse3_addsub<int_x86_avx_addsub_pd_256, "vaddsubpd", VR256, 5217 f256mem, SSE_ALU_F64P, 0>, PD, VEX_4V, VEX_L; 5218 } 5219} 5220let Constraints = "$src1 = $dst", Predicates = [UseSSE3] in { 5221 let ExeDomain = SSEPackedSingle in 5222 defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps", VR128, 5223 f128mem, SSE_ALU_F32P>, XD; 5224 let ExeDomain = SSEPackedDouble in 5225 defm ADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "addsubpd", VR128, 5226 f128mem, SSE_ALU_F64P>, PD; 5227} 5228 5229//===---------------------------------------------------------------------===// 5230// SSE3 Instructions 5231//===---------------------------------------------------------------------===// 5232 5233// Horizontal ops 5234multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, 5235 X86MemOperand x86memop, SDNode OpNode, bit Is2Addr = 1> { 5236 def rr : S3DI<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 5237 !if(Is2Addr, 5238 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5239 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5240 [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>, 5241 Sched<[WriteFAdd]>; 5242 5243 def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 5244 !if(Is2Addr, 5245 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5246 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5247 [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))], 5248 IIC_SSE_HADDSUB_RM>, Sched<[WriteFAddLd, ReadAfterLd]>; 5249} 5250multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, 5251 X86MemOperand x86memop, SDNode OpNode, bit Is2Addr = 1> { 5252 def rr : S3I<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 5253 !if(Is2Addr, 5254 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5255 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5256 [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>, 5257 Sched<[WriteFAdd]>; 5258 5259 def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 5260 !if(Is2Addr, 5261 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5262 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5263 [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))], 5264 IIC_SSE_HADDSUB_RM>, Sched<[WriteFAddLd, ReadAfterLd]>; 5265} 5266 5267let Predicates = [HasAVX] in { 5268 let ExeDomain = SSEPackedSingle in { 5269 defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem, 5270 X86fhadd, 0>, VEX_4V; 5271 defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem, 5272 X86fhsub, 0>, VEX_4V; 5273 defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem, 5274 X86fhadd, 0>, VEX_4V, VEX_L; 5275 defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem, 5276 X86fhsub, 0>, VEX_4V, VEX_L; 5277 } 5278 let ExeDomain = SSEPackedDouble in { 5279 defm VHADDPD : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem, 5280 X86fhadd, 0>, VEX_4V; 5281 defm VHSUBPD : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem, 5282 X86fhsub, 0>, VEX_4V; 5283 defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem, 5284 X86fhadd, 0>, VEX_4V, VEX_L; 5285 defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem, 5286 X86fhsub, 0>, VEX_4V, VEX_L; 5287 } 5288} 5289 5290let Constraints = "$src1 = $dst" in { 5291 let ExeDomain = SSEPackedSingle in { 5292 defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem, X86fhadd>; 5293 defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem, X86fhsub>; 5294 } 5295 let ExeDomain = SSEPackedDouble in { 5296 defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem, X86fhadd>; 5297 defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem, X86fhsub>; 5298 } 5299} 5300 5301//===---------------------------------------------------------------------===// 5302// SSSE3 - Packed Absolute Instructions 5303//===---------------------------------------------------------------------===// 5304 5305 5306/// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. 5307multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr, 5308 Intrinsic IntId128> { 5309 def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst), 5310 (ins VR128:$src), 5311 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5312 [(set VR128:$dst, (IntId128 VR128:$src))], IIC_SSE_PABS_RR>, 5313 Sched<[WriteVecALU]>; 5314 5315 def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst), 5316 (ins i128mem:$src), 5317 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5318 [(set VR128:$dst, 5319 (IntId128 5320 (bitconvert (memopv2i64 addr:$src))))], IIC_SSE_PABS_RM>, 5321 Sched<[WriteVecALULd]>; 5322} 5323 5324/// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. 5325multiclass SS3I_unop_rm_int_y<bits<8> opc, string OpcodeStr, 5326 Intrinsic IntId256> { 5327 def rr256 : SS38I<opc, MRMSrcReg, (outs VR256:$dst), 5328 (ins VR256:$src), 5329 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5330 [(set VR256:$dst, (IntId256 VR256:$src))]>, 5331 Sched<[WriteVecALU]>; 5332 5333 def rm256 : SS38I<opc, MRMSrcMem, (outs VR256:$dst), 5334 (ins i256mem:$src), 5335 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5336 [(set VR256:$dst, 5337 (IntId256 5338 (bitconvert (memopv4i64 addr:$src))))]>, 5339 Sched<[WriteVecALULd]>; 5340} 5341 5342// Helper fragments to match sext vXi1 to vXiY. 5343def v16i1sextv16i8 : PatLeaf<(v16i8 (X86pcmpgt (bc_v16i8 (v4i32 immAllZerosV)), 5344 VR128:$src))>; 5345def v8i1sextv8i16 : PatLeaf<(v8i16 (X86vsrai VR128:$src, (i8 15)))>; 5346def v4i1sextv4i32 : PatLeaf<(v4i32 (X86vsrai VR128:$src, (i8 31)))>; 5347def v32i1sextv32i8 : PatLeaf<(v32i8 (X86pcmpgt (bc_v32i8 (v8i32 immAllZerosV)), 5348 VR256:$src))>; 5349def v16i1sextv16i16: PatLeaf<(v16i16 (X86vsrai VR256:$src, (i8 15)))>; 5350def v8i1sextv8i32 : PatLeaf<(v8i32 (X86vsrai VR256:$src, (i8 31)))>; 5351 5352let Predicates = [HasAVX] in { 5353 defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb", 5354 int_x86_ssse3_pabs_b_128>, VEX; 5355 defm VPABSW : SS3I_unop_rm_int<0x1D, "vpabsw", 5356 int_x86_ssse3_pabs_w_128>, VEX; 5357 defm VPABSD : SS3I_unop_rm_int<0x1E, "vpabsd", 5358 int_x86_ssse3_pabs_d_128>, VEX; 5359 5360 def : Pat<(xor 5361 (bc_v2i64 (v16i1sextv16i8)), 5362 (bc_v2i64 (add (v16i8 VR128:$src), (v16i1sextv16i8)))), 5363 (VPABSBrr128 VR128:$src)>; 5364 def : Pat<(xor 5365 (bc_v2i64 (v8i1sextv8i16)), 5366 (bc_v2i64 (add (v8i16 VR128:$src), (v8i1sextv8i16)))), 5367 (VPABSWrr128 VR128:$src)>; 5368 def : Pat<(xor 5369 (bc_v2i64 (v4i1sextv4i32)), 5370 (bc_v2i64 (add (v4i32 VR128:$src), (v4i1sextv4i32)))), 5371 (VPABSDrr128 VR128:$src)>; 5372} 5373 5374let Predicates = [HasAVX2] in { 5375 defm VPABSB : SS3I_unop_rm_int_y<0x1C, "vpabsb", 5376 int_x86_avx2_pabs_b>, VEX, VEX_L; 5377 defm VPABSW : SS3I_unop_rm_int_y<0x1D, "vpabsw", 5378 int_x86_avx2_pabs_w>, VEX, VEX_L; 5379 defm VPABSD : SS3I_unop_rm_int_y<0x1E, "vpabsd", 5380 int_x86_avx2_pabs_d>, VEX, VEX_L; 5381 5382 def : Pat<(xor 5383 (bc_v4i64 (v32i1sextv32i8)), 5384 (bc_v4i64 (add (v32i8 VR256:$src), (v32i1sextv32i8)))), 5385 (VPABSBrr256 VR256:$src)>; 5386 def : Pat<(xor 5387 (bc_v4i64 (v16i1sextv16i16)), 5388 (bc_v4i64 (add (v16i16 VR256:$src), (v16i1sextv16i16)))), 5389 (VPABSWrr256 VR256:$src)>; 5390 def : Pat<(xor 5391 (bc_v4i64 (v8i1sextv8i32)), 5392 (bc_v4i64 (add (v8i32 VR256:$src), (v8i1sextv8i32)))), 5393 (VPABSDrr256 VR256:$src)>; 5394} 5395 5396defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", 5397 int_x86_ssse3_pabs_b_128>; 5398defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", 5399 int_x86_ssse3_pabs_w_128>; 5400defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", 5401 int_x86_ssse3_pabs_d_128>; 5402 5403let Predicates = [HasSSSE3] in { 5404 def : Pat<(xor 5405 (bc_v2i64 (v16i1sextv16i8)), 5406 (bc_v2i64 (add (v16i8 VR128:$src), (v16i1sextv16i8)))), 5407 (PABSBrr128 VR128:$src)>; 5408 def : Pat<(xor 5409 (bc_v2i64 (v8i1sextv8i16)), 5410 (bc_v2i64 (add (v8i16 VR128:$src), (v8i1sextv8i16)))), 5411 (PABSWrr128 VR128:$src)>; 5412 def : Pat<(xor 5413 (bc_v2i64 (v4i1sextv4i32)), 5414 (bc_v2i64 (add (v4i32 VR128:$src), (v4i1sextv4i32)))), 5415 (PABSDrr128 VR128:$src)>; 5416} 5417 5418//===---------------------------------------------------------------------===// 5419// SSSE3 - Packed Binary Operator Instructions 5420//===---------------------------------------------------------------------===// 5421 5422let Sched = WriteVecALU in { 5423def SSE_PHADDSUBD : OpndItins< 5424 IIC_SSE_PHADDSUBD_RR, IIC_SSE_PHADDSUBD_RM 5425>; 5426def SSE_PHADDSUBSW : OpndItins< 5427 IIC_SSE_PHADDSUBSW_RR, IIC_SSE_PHADDSUBSW_RM 5428>; 5429def SSE_PHADDSUBW : OpndItins< 5430 IIC_SSE_PHADDSUBW_RR, IIC_SSE_PHADDSUBW_RM 5431>; 5432} 5433let Sched = WriteShuffle in 5434def SSE_PSHUFB : OpndItins< 5435 IIC_SSE_PSHUFB_RR, IIC_SSE_PSHUFB_RM 5436>; 5437let Sched = WriteVecALU in 5438def SSE_PSIGN : OpndItins< 5439 IIC_SSE_PSIGN_RR, IIC_SSE_PSIGN_RM 5440>; 5441let Sched = WriteVecIMul in 5442def SSE_PMULHRSW : OpndItins< 5443 IIC_SSE_PMULHRSW, IIC_SSE_PMULHRSW 5444>; 5445 5446/// SS3I_binop_rm - Simple SSSE3 bin op 5447multiclass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 5448 ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 5449 X86MemOperand x86memop, OpndItins itins, 5450 bit Is2Addr = 1> { 5451 let isCommutable = 1 in 5452 def rr : SS38I<opc, MRMSrcReg, (outs RC:$dst), 5453 (ins RC:$src1, RC:$src2), 5454 !if(Is2Addr, 5455 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5456 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5457 [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>, 5458 Sched<[itins.Sched]>; 5459 def rm : SS38I<opc, MRMSrcMem, (outs RC:$dst), 5460 (ins RC:$src1, x86memop:$src2), 5461 !if(Is2Addr, 5462 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5463 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5464 [(set RC:$dst, 5465 (OpVT (OpNode RC:$src1, 5466 (bitconvert (memop_frag addr:$src2)))))], itins.rm>, 5467 Sched<[itins.Sched.Folded, ReadAfterLd]>; 5468} 5469 5470/// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}. 5471multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr, 5472 Intrinsic IntId128, OpndItins itins, 5473 bit Is2Addr = 1> { 5474 let isCommutable = 1 in 5475 def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst), 5476 (ins VR128:$src1, VR128:$src2), 5477 !if(Is2Addr, 5478 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5479 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5480 [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, 5481 Sched<[itins.Sched]>; 5482 def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst), 5483 (ins VR128:$src1, i128mem:$src2), 5484 !if(Is2Addr, 5485 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5486 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5487 [(set VR128:$dst, 5488 (IntId128 VR128:$src1, 5489 (bitconvert (memopv2i64 addr:$src2))))]>, 5490 Sched<[itins.Sched.Folded, ReadAfterLd]>; 5491} 5492 5493multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr, 5494 Intrinsic IntId256, 5495 X86FoldableSchedWrite Sched> { 5496 let isCommutable = 1 in 5497 def rr256 : SS38I<opc, MRMSrcReg, (outs VR256:$dst), 5498 (ins VR256:$src1, VR256:$src2), 5499 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5500 [(set VR256:$dst, (IntId256 VR256:$src1, VR256:$src2))]>, 5501 Sched<[Sched]>; 5502 def rm256 : SS38I<opc, MRMSrcMem, (outs VR256:$dst), 5503 (ins VR256:$src1, i256mem:$src2), 5504 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5505 [(set VR256:$dst, 5506 (IntId256 VR256:$src1, (bitconvert (loadv4i64 addr:$src2))))]>, 5507 Sched<[Sched.Folded, ReadAfterLd]>; 5508} 5509 5510let ImmT = NoImm, Predicates = [HasAVX] in { 5511let isCommutable = 0 in { 5512 defm VPHADDW : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v8i16, VR128, 5513 loadv2i64, i128mem, 5514 SSE_PHADDSUBW, 0>, VEX_4V; 5515 defm VPHADDD : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v4i32, VR128, 5516 loadv2i64, i128mem, 5517 SSE_PHADDSUBD, 0>, VEX_4V; 5518 defm VPHSUBW : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v8i16, VR128, 5519 loadv2i64, i128mem, 5520 SSE_PHADDSUBW, 0>, VEX_4V; 5521 defm VPHSUBD : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, VR128, 5522 loadv2i64, i128mem, 5523 SSE_PHADDSUBD, 0>, VEX_4V; 5524 defm VPSIGNB : SS3I_binop_rm<0x08, "vpsignb", X86psign, v16i8, VR128, 5525 loadv2i64, i128mem, 5526 SSE_PSIGN, 0>, VEX_4V; 5527 defm VPSIGNW : SS3I_binop_rm<0x09, "vpsignw", X86psign, v8i16, VR128, 5528 loadv2i64, i128mem, 5529 SSE_PSIGN, 0>, VEX_4V; 5530 defm VPSIGND : SS3I_binop_rm<0x0A, "vpsignd", X86psign, v4i32, VR128, 5531 loadv2i64, i128mem, 5532 SSE_PSIGN, 0>, VEX_4V; 5533 defm VPSHUFB : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v16i8, VR128, 5534 loadv2i64, i128mem, 5535 SSE_PSHUFB, 0>, VEX_4V; 5536 defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw", 5537 int_x86_ssse3_phadd_sw_128, 5538 SSE_PHADDSUBSW, 0>, VEX_4V; 5539 defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw", 5540 int_x86_ssse3_phsub_sw_128, 5541 SSE_PHADDSUBSW, 0>, VEX_4V; 5542 defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw", 5543 int_x86_ssse3_pmadd_ub_sw_128, 5544 SSE_PMADD, 0>, VEX_4V; 5545} 5546defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw", 5547 int_x86_ssse3_pmul_hr_sw_128, 5548 SSE_PMULHRSW, 0>, VEX_4V; 5549} 5550 5551let ImmT = NoImm, Predicates = [HasAVX2] in { 5552let isCommutable = 0 in { 5553 defm VPHADDWY : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v16i16, VR256, 5554 loadv4i64, i256mem, 5555 SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; 5556 defm VPHADDDY : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v8i32, VR256, 5557 loadv4i64, i256mem, 5558 SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; 5559 defm VPHSUBWY : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v16i16, VR256, 5560 loadv4i64, i256mem, 5561 SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; 5562 defm VPHSUBDY : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, VR256, 5563 loadv4i64, i256mem, 5564 SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; 5565 defm VPSIGNBY : SS3I_binop_rm<0x08, "vpsignb", X86psign, v32i8, VR256, 5566 loadv4i64, i256mem, 5567 SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; 5568 defm VPSIGNWY : SS3I_binop_rm<0x09, "vpsignw", X86psign, v16i16, VR256, 5569 loadv4i64, i256mem, 5570 SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; 5571 defm VPSIGNDY : SS3I_binop_rm<0x0A, "vpsignd", X86psign, v8i32, VR256, 5572 loadv4i64, i256mem, 5573 SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; 5574 defm VPSHUFBY : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v32i8, VR256, 5575 loadv4i64, i256mem, 5576 SSE_PSHUFB, 0>, VEX_4V, VEX_L; 5577 defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw", 5578 int_x86_avx2_phadd_sw, 5579 WriteVecALU>, VEX_4V, VEX_L; 5580 defm VPHSUBSW : SS3I_binop_rm_int_y<0x07, "vphsubsw", 5581 int_x86_avx2_phsub_sw, 5582 WriteVecALU>, VEX_4V, VEX_L; 5583 defm VPMADDUBSW : SS3I_binop_rm_int_y<0x04, "vpmaddubsw", 5584 int_x86_avx2_pmadd_ub_sw, 5585 WriteVecIMul>, VEX_4V, VEX_L; 5586} 5587defm VPMULHRSW : SS3I_binop_rm_int_y<0x0B, "vpmulhrsw", 5588 int_x86_avx2_pmul_hr_sw, 5589 WriteVecIMul>, VEX_4V, VEX_L; 5590} 5591 5592// None of these have i8 immediate fields. 5593let ImmT = NoImm, Constraints = "$src1 = $dst" in { 5594let isCommutable = 0 in { 5595 defm PHADDW : SS3I_binop_rm<0x01, "phaddw", X86hadd, v8i16, VR128, 5596 memopv2i64, i128mem, SSE_PHADDSUBW>; 5597 defm PHADDD : SS3I_binop_rm<0x02, "phaddd", X86hadd, v4i32, VR128, 5598 memopv2i64, i128mem, SSE_PHADDSUBD>; 5599 defm PHSUBW : SS3I_binop_rm<0x05, "phsubw", X86hsub, v8i16, VR128, 5600 memopv2i64, i128mem, SSE_PHADDSUBW>; 5601 defm PHSUBD : SS3I_binop_rm<0x06, "phsubd", X86hsub, v4i32, VR128, 5602 memopv2i64, i128mem, SSE_PHADDSUBD>; 5603 defm PSIGNB : SS3I_binop_rm<0x08, "psignb", X86psign, v16i8, VR128, 5604 memopv2i64, i128mem, SSE_PSIGN>; 5605 defm PSIGNW : SS3I_binop_rm<0x09, "psignw", X86psign, v8i16, VR128, 5606 memopv2i64, i128mem, SSE_PSIGN>; 5607 defm PSIGND : SS3I_binop_rm<0x0A, "psignd", X86psign, v4i32, VR128, 5608 memopv2i64, i128mem, SSE_PSIGN>; 5609 defm PSHUFB : SS3I_binop_rm<0x00, "pshufb", X86pshufb, v16i8, VR128, 5610 memopv2i64, i128mem, SSE_PSHUFB>; 5611 defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", 5612 int_x86_ssse3_phadd_sw_128, 5613 SSE_PHADDSUBSW>; 5614 defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw", 5615 int_x86_ssse3_phsub_sw_128, 5616 SSE_PHADDSUBSW>; 5617 defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", 5618 int_x86_ssse3_pmadd_ub_sw_128, SSE_PMADD>; 5619} 5620defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw", 5621 int_x86_ssse3_pmul_hr_sw_128, 5622 SSE_PMULHRSW>; 5623} 5624 5625//===---------------------------------------------------------------------===// 5626// SSSE3 - Packed Align Instruction Patterns 5627//===---------------------------------------------------------------------===// 5628 5629multiclass ssse3_palignr<string asm, bit Is2Addr = 1> { 5630 let neverHasSideEffects = 1 in { 5631 def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst), 5632 (ins VR128:$src1, VR128:$src2, i8imm:$src3), 5633 !if(Is2Addr, 5634 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5635 !strconcat(asm, 5636 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5637 [], IIC_SSE_PALIGNRR>, Sched<[WriteShuffle]>; 5638 let mayLoad = 1 in 5639 def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst), 5640 (ins VR128:$src1, i128mem:$src2, i8imm:$src3), 5641 !if(Is2Addr, 5642 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5643 !strconcat(asm, 5644 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5645 [], IIC_SSE_PALIGNRM>, Sched<[WriteShuffleLd, ReadAfterLd]>; 5646 } 5647} 5648 5649multiclass ssse3_palignr_y<string asm, bit Is2Addr = 1> { 5650 let neverHasSideEffects = 1 in { 5651 def R256rr : SS3AI<0x0F, MRMSrcReg, (outs VR256:$dst), 5652 (ins VR256:$src1, VR256:$src2, i8imm:$src3), 5653 !strconcat(asm, 5654 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 5655 []>, Sched<[WriteShuffle]>; 5656 let mayLoad = 1 in 5657 def R256rm : SS3AI<0x0F, MRMSrcMem, (outs VR256:$dst), 5658 (ins VR256:$src1, i256mem:$src2, i8imm:$src3), 5659 !strconcat(asm, 5660 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 5661 []>, Sched<[WriteShuffleLd, ReadAfterLd]>; 5662 } 5663} 5664 5665let Predicates = [HasAVX] in 5666 defm VPALIGN : ssse3_palignr<"vpalignr", 0>, VEX_4V; 5667let Predicates = [HasAVX2] in 5668 defm VPALIGN : ssse3_palignr_y<"vpalignr", 0>, VEX_4V, VEX_L; 5669let Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in 5670 defm PALIGN : ssse3_palignr<"palignr">; 5671 5672let Predicates = [HasAVX2] in { 5673def : Pat<(v8i32 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))), 5674 (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; 5675def : Pat<(v8f32 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))), 5676 (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; 5677def : Pat<(v16i16 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))), 5678 (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; 5679def : Pat<(v32i8 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))), 5680 (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; 5681} 5682 5683let Predicates = [HasAVX] in { 5684def : Pat<(v4i32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), 5685 (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; 5686def : Pat<(v4f32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), 5687 (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; 5688def : Pat<(v8i16 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), 5689 (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; 5690def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), 5691 (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; 5692} 5693 5694let Predicates = [UseSSSE3] in { 5695def : Pat<(v4i32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), 5696 (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; 5697def : Pat<(v4f32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), 5698 (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; 5699def : Pat<(v8i16 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), 5700 (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; 5701def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), 5702 (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; 5703} 5704 5705//===---------------------------------------------------------------------===// 5706// SSSE3 - Thread synchronization 5707//===---------------------------------------------------------------------===// 5708 5709let SchedRW = [WriteSystem] in { 5710let usesCustomInserter = 1 in { 5711def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3), 5712 [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>, 5713 Requires<[HasSSE3]>; 5714} 5715 5716let Uses = [EAX, ECX, EDX] in 5717def MONITORrrr : I<0x01, MRM_C8, (outs), (ins), "monitor", [], IIC_SSE_MONITOR>, 5718 TB, Requires<[HasSSE3]>; 5719let Uses = [ECX, EAX] in 5720def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait", 5721 [(int_x86_sse3_mwait ECX, EAX)], IIC_SSE_MWAIT>, 5722 TB, Requires<[HasSSE3]>; 5723} // SchedRW 5724 5725def : InstAlias<"mwait\t{%eax, %ecx|ecx, eax}", (MWAITrr)>, Requires<[Not64BitMode]>; 5726def : InstAlias<"mwait\t{%rax, %rcx|rcx, rax}", (MWAITrr)>, Requires<[In64BitMode]>; 5727 5728def : InstAlias<"monitor\t{%eax, %ecx, %edx|edx, ecx, eax}", (MONITORrrr)>, 5729 Requires<[Not64BitMode]>; 5730def : InstAlias<"monitor\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITORrrr)>, 5731 Requires<[In64BitMode]>; 5732 5733//===----------------------------------------------------------------------===// 5734// SSE4.1 - Packed Move with Sign/Zero Extend 5735//===----------------------------------------------------------------------===// 5736 5737multiclass SS41I_binop_rm_int8<bits<8> opc, string OpcodeStr, Intrinsic IntId, 5738 OpndItins itins = DEFAULT_ITINS> { 5739 def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 5740 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5741 [(set VR128:$dst, (IntId VR128:$src))], itins.rr>, 5742 Sched<[itins.Sched]>; 5743 5744 def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 5745 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5746 [(set VR128:$dst, 5747 (IntId (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))], 5748 itins.rm>, Sched<[itins.Sched.Folded]>; 5749} 5750 5751multiclass SS41I_binop_rm_int16_y<bits<8> opc, string OpcodeStr, 5752 Intrinsic IntId, X86FoldableSchedWrite Sched> { 5753 def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), 5754 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5755 [(set VR256:$dst, (IntId VR128:$src))]>, Sched<[Sched]>; 5756 5757 def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src), 5758 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5759 [(set VR256:$dst, (IntId (load addr:$src)))]>, 5760 Sched<[Sched.Folded]>; 5761} 5762 5763let Predicates = [HasAVX] in { 5764defm VPMOVSXBW : SS41I_binop_rm_int8<0x20, "vpmovsxbw", 5765 int_x86_sse41_pmovsxbw, 5766 DEFAULT_ITINS_SHUFFLESCHED>, VEX; 5767defm VPMOVSXWD : SS41I_binop_rm_int8<0x23, "vpmovsxwd", 5768 int_x86_sse41_pmovsxwd, 5769 DEFAULT_ITINS_SHUFFLESCHED>, VEX; 5770defm VPMOVSXDQ : SS41I_binop_rm_int8<0x25, "vpmovsxdq", 5771 int_x86_sse41_pmovsxdq, 5772 DEFAULT_ITINS_SHUFFLESCHED>, VEX; 5773defm VPMOVZXBW : SS41I_binop_rm_int8<0x30, "vpmovzxbw", 5774 int_x86_sse41_pmovzxbw, 5775 DEFAULT_ITINS_SHUFFLESCHED>, VEX; 5776defm VPMOVZXWD : SS41I_binop_rm_int8<0x33, "vpmovzxwd", 5777 int_x86_sse41_pmovzxwd, 5778 DEFAULT_ITINS_SHUFFLESCHED>, VEX; 5779defm VPMOVZXDQ : SS41I_binop_rm_int8<0x35, "vpmovzxdq", 5780 int_x86_sse41_pmovzxdq, 5781 DEFAULT_ITINS_SHUFFLESCHED>, VEX; 5782} 5783 5784let Predicates = [HasAVX2] in { 5785defm VPMOVSXBW : SS41I_binop_rm_int16_y<0x20, "vpmovsxbw", 5786 int_x86_avx2_pmovsxbw, 5787 WriteShuffle>, VEX, VEX_L; 5788defm VPMOVSXWD : SS41I_binop_rm_int16_y<0x23, "vpmovsxwd", 5789 int_x86_avx2_pmovsxwd, 5790 WriteShuffle>, VEX, VEX_L; 5791defm VPMOVSXDQ : SS41I_binop_rm_int16_y<0x25, "vpmovsxdq", 5792 int_x86_avx2_pmovsxdq, 5793 WriteShuffle>, VEX, VEX_L; 5794defm VPMOVZXBW : SS41I_binop_rm_int16_y<0x30, "vpmovzxbw", 5795 int_x86_avx2_pmovzxbw, 5796 WriteShuffle>, VEX, VEX_L; 5797defm VPMOVZXWD : SS41I_binop_rm_int16_y<0x33, "vpmovzxwd", 5798 int_x86_avx2_pmovzxwd, 5799 WriteShuffle>, VEX, VEX_L; 5800defm VPMOVZXDQ : SS41I_binop_rm_int16_y<0x35, "vpmovzxdq", 5801 int_x86_avx2_pmovzxdq, 5802 WriteShuffle>, VEX, VEX_L; 5803} 5804 5805defm PMOVSXBW : SS41I_binop_rm_int8<0x20, "pmovsxbw", int_x86_sse41_pmovsxbw, 5806 SSE_INTALU_ITINS_SHUFF_P>; 5807defm PMOVSXWD : SS41I_binop_rm_int8<0x23, "pmovsxwd", int_x86_sse41_pmovsxwd, 5808 SSE_INTALU_ITINS_SHUFF_P>; 5809defm PMOVSXDQ : SS41I_binop_rm_int8<0x25, "pmovsxdq", int_x86_sse41_pmovsxdq, 5810 SSE_INTALU_ITINS_SHUFF_P>; 5811defm PMOVZXBW : SS41I_binop_rm_int8<0x30, "pmovzxbw", int_x86_sse41_pmovzxbw, 5812 SSE_INTALU_ITINS_SHUFF_P>; 5813defm PMOVZXWD : SS41I_binop_rm_int8<0x33, "pmovzxwd", int_x86_sse41_pmovzxwd, 5814 SSE_INTALU_ITINS_SHUFF_P>; 5815defm PMOVZXDQ : SS41I_binop_rm_int8<0x35, "pmovzxdq", int_x86_sse41_pmovzxdq, 5816 SSE_INTALU_ITINS_SHUFF_P>; 5817 5818let Predicates = [HasAVX] in { 5819 // Common patterns involving scalar load. 5820 def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)), 5821 (VPMOVSXBWrm addr:$src)>; 5822 def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)), 5823 (VPMOVSXBWrm addr:$src)>; 5824 def : Pat<(int_x86_sse41_pmovsxbw (bc_v16i8 (loadv2i64 addr:$src))), 5825 (VPMOVSXBWrm addr:$src)>; 5826 5827 def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)), 5828 (VPMOVSXWDrm addr:$src)>; 5829 def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)), 5830 (VPMOVSXWDrm addr:$src)>; 5831 def : Pat<(int_x86_sse41_pmovsxwd (bc_v8i16 (loadv2i64 addr:$src))), 5832 (VPMOVSXWDrm addr:$src)>; 5833 5834 def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)), 5835 (VPMOVSXDQrm addr:$src)>; 5836 def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)), 5837 (VPMOVSXDQrm addr:$src)>; 5838 def : Pat<(int_x86_sse41_pmovsxdq (bc_v4i32 (loadv2i64 addr:$src))), 5839 (VPMOVSXDQrm addr:$src)>; 5840 5841 def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)), 5842 (VPMOVZXBWrm addr:$src)>; 5843 def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)), 5844 (VPMOVZXBWrm addr:$src)>; 5845 def : Pat<(int_x86_sse41_pmovzxbw (bc_v16i8 (loadv2i64 addr:$src))), 5846 (VPMOVZXBWrm addr:$src)>; 5847 5848 def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)), 5849 (VPMOVZXWDrm addr:$src)>; 5850 def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)), 5851 (VPMOVZXWDrm addr:$src)>; 5852 def : Pat<(int_x86_sse41_pmovzxwd (bc_v8i16 (loadv2i64 addr:$src))), 5853 (VPMOVZXWDrm addr:$src)>; 5854 5855 def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)), 5856 (VPMOVZXDQrm addr:$src)>; 5857 def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)), 5858 (VPMOVZXDQrm addr:$src)>; 5859 def : Pat<(int_x86_sse41_pmovzxdq (bc_v4i32 (loadv2i64 addr:$src))), 5860 (VPMOVZXDQrm addr:$src)>; 5861} 5862 5863let Predicates = [UseSSE41] in { 5864 // Common patterns involving scalar load. 5865 def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)), 5866 (PMOVSXBWrm addr:$src)>; 5867 def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)), 5868 (PMOVSXBWrm addr:$src)>; 5869 def : Pat<(int_x86_sse41_pmovsxbw (bc_v16i8 (loadv2i64 addr:$src))), 5870 (PMOVSXBWrm addr:$src)>; 5871 5872 def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)), 5873 (PMOVSXWDrm addr:$src)>; 5874 def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)), 5875 (PMOVSXWDrm addr:$src)>; 5876 def : Pat<(int_x86_sse41_pmovsxwd (bc_v8i16 (loadv2i64 addr:$src))), 5877 (PMOVSXWDrm addr:$src)>; 5878 5879 def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)), 5880 (PMOVSXDQrm addr:$src)>; 5881 def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)), 5882 (PMOVSXDQrm addr:$src)>; 5883 def : Pat<(int_x86_sse41_pmovsxdq (bc_v4i32 (loadv2i64 addr:$src))), 5884 (PMOVSXDQrm addr:$src)>; 5885 5886 def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)), 5887 (PMOVZXBWrm addr:$src)>; 5888 def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)), 5889 (PMOVZXBWrm addr:$src)>; 5890 def : Pat<(int_x86_sse41_pmovzxbw (bc_v16i8 (loadv2i64 addr:$src))), 5891 (PMOVZXBWrm addr:$src)>; 5892 5893 def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)), 5894 (PMOVZXWDrm addr:$src)>; 5895 def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)), 5896 (PMOVZXWDrm addr:$src)>; 5897 def : Pat<(int_x86_sse41_pmovzxwd (bc_v8i16 (loadv2i64 addr:$src))), 5898 (PMOVZXWDrm addr:$src)>; 5899 5900 def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)), 5901 (PMOVZXDQrm addr:$src)>; 5902 def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)), 5903 (PMOVZXDQrm addr:$src)>; 5904 def : Pat<(int_x86_sse41_pmovzxdq (bc_v4i32 (loadv2i64 addr:$src))), 5905 (PMOVZXDQrm addr:$src)>; 5906} 5907 5908multiclass SS41I_binop_rm_int4<bits<8> opc, string OpcodeStr, Intrinsic IntId, 5909 OpndItins itins = DEFAULT_ITINS> { 5910 def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 5911 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5912 [(set VR128:$dst, (IntId VR128:$src))], itins.rr>, 5913 Sched<[itins.Sched]>; 5914 5915 def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), 5916 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5917 [(set VR128:$dst, 5918 (IntId (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))], 5919 itins.rm>, Sched<[itins.Sched.Folded]>; 5920} 5921 5922multiclass SS41I_binop_rm_int8_y<bits<8> opc, string OpcodeStr, 5923 Intrinsic IntId, X86FoldableSchedWrite Sched> { 5924 def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), 5925 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5926 [(set VR256:$dst, (IntId VR128:$src))]>, Sched<[Sched]>; 5927 5928 def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst), (ins i32mem:$src), 5929 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5930 [(set VR256:$dst, 5931 (IntId (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))]>, 5932 Sched<[Sched.Folded]>; 5933} 5934 5935let Predicates = [HasAVX] in { 5936defm VPMOVSXBD : SS41I_binop_rm_int4<0x21, "vpmovsxbd", int_x86_sse41_pmovsxbd, 5937 DEFAULT_ITINS_SHUFFLESCHED>, VEX; 5938defm VPMOVSXWQ : SS41I_binop_rm_int4<0x24, "vpmovsxwq", int_x86_sse41_pmovsxwq, 5939 DEFAULT_ITINS_SHUFFLESCHED>, VEX; 5940defm VPMOVZXBD : SS41I_binop_rm_int4<0x31, "vpmovzxbd", int_x86_sse41_pmovzxbd, 5941 DEFAULT_ITINS_SHUFFLESCHED>, VEX; 5942defm VPMOVZXWQ : SS41I_binop_rm_int4<0x34, "vpmovzxwq", int_x86_sse41_pmovzxwq, 5943 DEFAULT_ITINS_SHUFFLESCHED>, VEX; 5944} 5945 5946let Predicates = [HasAVX2] in { 5947defm VPMOVSXBD : SS41I_binop_rm_int8_y<0x21, "vpmovsxbd", 5948 int_x86_avx2_pmovsxbd, WriteShuffle>, 5949 VEX, VEX_L; 5950defm VPMOVSXWQ : SS41I_binop_rm_int8_y<0x24, "vpmovsxwq", 5951 int_x86_avx2_pmovsxwq, WriteShuffle>, 5952 VEX, VEX_L; 5953defm VPMOVZXBD : SS41I_binop_rm_int8_y<0x31, "vpmovzxbd", 5954 int_x86_avx2_pmovzxbd, WriteShuffle>, 5955 VEX, VEX_L; 5956defm VPMOVZXWQ : SS41I_binop_rm_int8_y<0x34, "vpmovzxwq", 5957 int_x86_avx2_pmovzxwq, WriteShuffle>, 5958 VEX, VEX_L; 5959} 5960 5961defm PMOVSXBD : SS41I_binop_rm_int4<0x21, "pmovsxbd", int_x86_sse41_pmovsxbd, 5962 SSE_INTALU_ITINS_SHUFF_P>; 5963defm PMOVSXWQ : SS41I_binop_rm_int4<0x24, "pmovsxwq", int_x86_sse41_pmovsxwq, 5964 SSE_INTALU_ITINS_SHUFF_P>; 5965defm PMOVZXBD : SS41I_binop_rm_int4<0x31, "pmovzxbd", int_x86_sse41_pmovzxbd, 5966 SSE_INTALU_ITINS_SHUFF_P>; 5967defm PMOVZXWQ : SS41I_binop_rm_int4<0x34, "pmovzxwq", int_x86_sse41_pmovzxwq, 5968 SSE_INTALU_ITINS_SHUFF_P>; 5969 5970let Predicates = [HasAVX] in { 5971 // Common patterns involving scalar load 5972 def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)), 5973 (VPMOVSXBDrm addr:$src)>; 5974 def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)), 5975 (VPMOVSXWQrm addr:$src)>; 5976 5977 def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)), 5978 (VPMOVZXBDrm addr:$src)>; 5979 def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)), 5980 (VPMOVZXWQrm addr:$src)>; 5981} 5982 5983let Predicates = [UseSSE41] in { 5984 // Common patterns involving scalar load 5985 def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)), 5986 (PMOVSXBDrm addr:$src)>; 5987 def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)), 5988 (PMOVSXWQrm addr:$src)>; 5989 5990 def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)), 5991 (PMOVZXBDrm addr:$src)>; 5992 def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)), 5993 (PMOVZXWQrm addr:$src)>; 5994} 5995 5996multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId, 5997 X86FoldableSchedWrite Sched> { 5998 def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 5999 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 6000 [(set VR128:$dst, (IntId VR128:$src))]>, Sched<[Sched]>; 6001 6002 // Expecting a i16 load any extended to i32 value. 6003 def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i16mem:$src), 6004 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 6005 [(set VR128:$dst, (IntId (bitconvert 6006 (v4i32 (scalar_to_vector (loadi16_anyext addr:$src))))))]>, 6007 Sched<[Sched.Folded]>; 6008} 6009 6010multiclass SS41I_binop_rm_int4_y<bits<8> opc, string OpcodeStr, 6011 Intrinsic IntId, X86FoldableSchedWrite Sched> { 6012 def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), 6013 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 6014 [(set VR256:$dst, (IntId VR128:$src))]>, Sched<[Sched]>; 6015 6016 // Expecting a i16 load any extended to i32 value. 6017 def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst), (ins i16mem:$src), 6018 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 6019 [(set VR256:$dst, (IntId (bitconvert 6020 (v4i32 (scalar_to_vector (loadi32 addr:$src))))))]>, 6021 Sched<[Sched.Folded]>; 6022} 6023 6024let Predicates = [HasAVX] in { 6025defm VPMOVSXBQ : SS41I_binop_rm_int2<0x22, "vpmovsxbq", int_x86_sse41_pmovsxbq, 6026 WriteShuffle>, VEX; 6027defm VPMOVZXBQ : SS41I_binop_rm_int2<0x32, "vpmovzxbq", int_x86_sse41_pmovzxbq, 6028 WriteShuffle>, VEX; 6029} 6030let Predicates = [HasAVX2] in { 6031defm VPMOVSXBQ : SS41I_binop_rm_int4_y<0x22, "vpmovsxbq", int_x86_avx2_pmovsxbq, 6032 WriteShuffle>, VEX, VEX_L; 6033defm VPMOVZXBQ : SS41I_binop_rm_int4_y<0x32, "vpmovzxbq", int_x86_avx2_pmovzxbq, 6034 WriteShuffle>, VEX, VEX_L; 6035} 6036defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq, 6037 WriteShuffle>; 6038defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq, 6039 WriteShuffle>; 6040 6041let Predicates = [HasAVX2] in { 6042 def : Pat<(v16i16 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBWYrr VR128:$src)>; 6043 def : Pat<(v8i32 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBDYrr VR128:$src)>; 6044 def : Pat<(v4i64 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBQYrr VR128:$src)>; 6045 6046 def : Pat<(v8i32 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWDYrr VR128:$src)>; 6047 def : Pat<(v4i64 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWQYrr VR128:$src)>; 6048 6049 def : Pat<(v4i64 (X86vsext (v4i32 VR128:$src))), (VPMOVSXDQYrr VR128:$src)>; 6050 6051 def : Pat<(v16i16 (X86vsext (v32i8 VR256:$src))), 6052 (VPMOVSXBWYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; 6053 def : Pat<(v8i32 (X86vsext (v32i8 VR256:$src))), 6054 (VPMOVSXBDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; 6055 def : Pat<(v4i64 (X86vsext (v32i8 VR256:$src))), 6056 (VPMOVSXBQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; 6057 6058 def : Pat<(v8i32 (X86vsext (v16i16 VR256:$src))), 6059 (VPMOVSXWDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; 6060 def : Pat<(v4i64 (X86vsext (v16i16 VR256:$src))), 6061 (VPMOVSXWQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; 6062 6063 def : Pat<(v4i64 (X86vsext (v8i32 VR256:$src))), 6064 (VPMOVSXDQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; 6065 6066 def : Pat<(v8i32 (X86vsext (v8i16 (bitconvert (v2i64 (load addr:$src)))))), 6067 (VPMOVSXWDYrm addr:$src)>; 6068 def : Pat<(v4i64 (X86vsext (v4i32 (bitconvert (v2i64 (load addr:$src)))))), 6069 (VPMOVSXDQYrm addr:$src)>; 6070 6071 def : Pat<(v8i32 (X86vsext (v16i8 (bitconvert (v2i64 6072 (scalar_to_vector (loadi64 addr:$src))))))), 6073 (VPMOVSXBDYrm addr:$src)>; 6074 def : Pat<(v8i32 (X86vsext (v16i8 (bitconvert (v2f64 6075 (scalar_to_vector (loadf64 addr:$src))))))), 6076 (VPMOVSXBDYrm addr:$src)>; 6077 6078 def : Pat<(v4i64 (X86vsext (v8i16 (bitconvert (v2i64 6079 (scalar_to_vector (loadi64 addr:$src))))))), 6080 (VPMOVSXWQYrm addr:$src)>; 6081 def : Pat<(v4i64 (X86vsext (v8i16 (bitconvert (v2f64 6082 (scalar_to_vector (loadf64 addr:$src))))))), 6083 (VPMOVSXWQYrm addr:$src)>; 6084 6085 def : Pat<(v4i64 (X86vsext (v16i8 (bitconvert (v4i32 6086 (scalar_to_vector (loadi32 addr:$src))))))), 6087 (VPMOVSXBQYrm addr:$src)>; 6088} 6089 6090let Predicates = [HasAVX] in { 6091 // Common patterns involving scalar load 6092 def : Pat<(int_x86_sse41_pmovsxbq 6093 (bitconvert (v4i32 (X86vzmovl 6094 (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), 6095 (VPMOVSXBQrm addr:$src)>; 6096 6097 def : Pat<(int_x86_sse41_pmovzxbq 6098 (bitconvert (v4i32 (X86vzmovl 6099 (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), 6100 (VPMOVZXBQrm addr:$src)>; 6101} 6102 6103let Predicates = [UseSSE41] in { 6104 def : Pat<(v8i16 (X86vsext (v16i8 VR128:$src))), (PMOVSXBWrr VR128:$src)>; 6105 def : Pat<(v4i32 (X86vsext (v16i8 VR128:$src))), (PMOVSXBDrr VR128:$src)>; 6106 def : Pat<(v2i64 (X86vsext (v16i8 VR128:$src))), (PMOVSXBQrr VR128:$src)>; 6107 6108 def : Pat<(v4i32 (X86vsext (v8i16 VR128:$src))), (PMOVSXWDrr VR128:$src)>; 6109 def : Pat<(v2i64 (X86vsext (v8i16 VR128:$src))), (PMOVSXWQrr VR128:$src)>; 6110 6111 def : Pat<(v2i64 (X86vsext (v4i32 VR128:$src))), (PMOVSXDQrr VR128:$src)>; 6112 6113 // Common patterns involving scalar load 6114 def : Pat<(int_x86_sse41_pmovsxbq 6115 (bitconvert (v4i32 (X86vzmovl 6116 (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), 6117 (PMOVSXBQrm addr:$src)>; 6118 6119 def : Pat<(int_x86_sse41_pmovzxbq 6120 (bitconvert (v4i32 (X86vzmovl 6121 (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), 6122 (PMOVZXBQrm addr:$src)>; 6123 6124 def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2i64 6125 (scalar_to_vector (loadi64 addr:$src))))))), 6126 (PMOVSXWDrm addr:$src)>; 6127 def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2f64 6128 (scalar_to_vector (loadf64 addr:$src))))))), 6129 (PMOVSXWDrm addr:$src)>; 6130 def : Pat<(v4i32 (X86vsext (v16i8 (bitconvert (v4i32 6131 (scalar_to_vector (loadi32 addr:$src))))))), 6132 (PMOVSXBDrm addr:$src)>; 6133 def : Pat<(v2i64 (X86vsext (v8i16 (bitconvert (v4i32 6134 (scalar_to_vector (loadi32 addr:$src))))))), 6135 (PMOVSXWQrm addr:$src)>; 6136 def : Pat<(v2i64 (X86vsext (v16i8 (bitconvert (v4i32 6137 (scalar_to_vector (extloadi32i16 addr:$src))))))), 6138 (PMOVSXBQrm addr:$src)>; 6139 def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2i64 6140 (scalar_to_vector (loadi64 addr:$src))))))), 6141 (PMOVSXDQrm addr:$src)>; 6142 def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2f64 6143 (scalar_to_vector (loadf64 addr:$src))))))), 6144 (PMOVSXDQrm addr:$src)>; 6145 def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2i64 6146 (scalar_to_vector (loadi64 addr:$src))))))), 6147 (PMOVSXBWrm addr:$src)>; 6148 def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2f64 6149 (scalar_to_vector (loadf64 addr:$src))))))), 6150 (PMOVSXBWrm addr:$src)>; 6151} 6152 6153let Predicates = [HasAVX2] in { 6154 def : Pat<(v16i16 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBWYrr VR128:$src)>; 6155 def : Pat<(v8i32 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBDYrr VR128:$src)>; 6156 def : Pat<(v4i64 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBQYrr VR128:$src)>; 6157 6158 def : Pat<(v8i32 (X86vzext (v8i16 VR128:$src))), (VPMOVZXWDYrr VR128:$src)>; 6159 def : Pat<(v4i64 (X86vzext (v8i16 VR128:$src))), (VPMOVZXWQYrr VR128:$src)>; 6160 6161 def : Pat<(v4i64 (X86vzext (v4i32 VR128:$src))), (VPMOVZXDQYrr VR128:$src)>; 6162 6163 def : Pat<(v16i16 (X86vzext (v32i8 VR256:$src))), 6164 (VPMOVZXBWYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; 6165 def : Pat<(v8i32 (X86vzext (v32i8 VR256:$src))), 6166 (VPMOVZXBDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; 6167 def : Pat<(v4i64 (X86vzext (v32i8 VR256:$src))), 6168 (VPMOVZXBQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; 6169 6170 def : Pat<(v8i32 (X86vzext (v16i16 VR256:$src))), 6171 (VPMOVZXWDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; 6172 def : Pat<(v4i64 (X86vzext (v16i16 VR256:$src))), 6173 (VPMOVZXWQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; 6174 6175 def : Pat<(v4i64 (X86vzext (v8i32 VR256:$src))), 6176 (VPMOVZXDQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; 6177} 6178 6179let Predicates = [HasAVX] in { 6180 def : Pat<(v8i16 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBWrr VR128:$src)>; 6181 def : Pat<(v4i32 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBDrr VR128:$src)>; 6182 def : Pat<(v2i64 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBQrr VR128:$src)>; 6183 6184 def : Pat<(v4i32 (X86vzext (v8i16 VR128:$src))), (VPMOVZXWDrr VR128:$src)>; 6185 def : Pat<(v2i64 (X86vzext (v8i16 VR128:$src))), (VPMOVZXWQrr VR128:$src)>; 6186 6187 def : Pat<(v2i64 (X86vzext (v4i32 VR128:$src))), (VPMOVZXDQrr VR128:$src)>; 6188 6189 def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), 6190 (VPMOVZXBWrm addr:$src)>; 6191 def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))), 6192 (VPMOVZXBWrm addr:$src)>; 6193 def : Pat<(v4i32 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), 6194 (VPMOVZXBDrm addr:$src)>; 6195 def : Pat<(v2i64 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector (loadi16_anyext addr:$src))))))), 6196 (VPMOVZXBQrm addr:$src)>; 6197 6198 def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), 6199 (VPMOVZXWDrm addr:$src)>; 6200 def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))), 6201 (VPMOVZXWDrm addr:$src)>; 6202 def : Pat<(v2i64 (X86vzext (v8i16 (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), 6203 (VPMOVZXWQrm addr:$src)>; 6204 6205 def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), 6206 (VPMOVZXDQrm addr:$src)>; 6207 def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))), 6208 (VPMOVZXDQrm addr:$src)>; 6209 def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (X86vzload addr:$src)))))), 6210 (VPMOVZXDQrm addr:$src)>; 6211 6212 def : Pat<(v8i16 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBWrr VR128:$src)>; 6213 def : Pat<(v4i32 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBDrr VR128:$src)>; 6214 def : Pat<(v2i64 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBQrr VR128:$src)>; 6215 6216 def : Pat<(v4i32 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWDrr VR128:$src)>; 6217 def : Pat<(v2i64 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWQrr VR128:$src)>; 6218 6219 def : Pat<(v2i64 (X86vsext (v4i32 VR128:$src))), (VPMOVSXDQrr VR128:$src)>; 6220 6221 def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2i64 6222 (scalar_to_vector (loadi64 addr:$src))))))), 6223 (VPMOVSXWDrm addr:$src)>; 6224 def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2i64 6225 (scalar_to_vector (loadi64 addr:$src))))))), 6226 (VPMOVSXDQrm addr:$src)>; 6227 def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2f64 6228 (scalar_to_vector (loadf64 addr:$src))))))), 6229 (VPMOVSXWDrm addr:$src)>; 6230 def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2f64 6231 (scalar_to_vector (loadf64 addr:$src))))))), 6232 (VPMOVSXDQrm addr:$src)>; 6233 def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2i64 6234 (scalar_to_vector (loadi64 addr:$src))))))), 6235 (VPMOVSXBWrm addr:$src)>; 6236 def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2f64 6237 (scalar_to_vector (loadf64 addr:$src))))))), 6238 (VPMOVSXBWrm addr:$src)>; 6239 6240 def : Pat<(v4i32 (X86vsext (v16i8 (bitconvert (v4i32 6241 (scalar_to_vector (loadi32 addr:$src))))))), 6242 (VPMOVSXBDrm addr:$src)>; 6243 def : Pat<(v2i64 (X86vsext (v8i16 (bitconvert (v4i32 6244 (scalar_to_vector (loadi32 addr:$src))))))), 6245 (VPMOVSXWQrm addr:$src)>; 6246 def : Pat<(v2i64 (X86vsext (v16i8 (bitconvert (v4i32 6247 (scalar_to_vector (extloadi32i16 addr:$src))))))), 6248 (VPMOVSXBQrm addr:$src)>; 6249} 6250 6251let Predicates = [UseSSE41] in { 6252 def : Pat<(v8i16 (X86vzext (v16i8 VR128:$src))), (PMOVZXBWrr VR128:$src)>; 6253 def : Pat<(v4i32 (X86vzext (v16i8 VR128:$src))), (PMOVZXBDrr VR128:$src)>; 6254 def : Pat<(v2i64 (X86vzext (v16i8 VR128:$src))), (PMOVZXBQrr VR128:$src)>; 6255 6256 def : Pat<(v4i32 (X86vzext (v8i16 VR128:$src))), (PMOVZXWDrr VR128:$src)>; 6257 def : Pat<(v2i64 (X86vzext (v8i16 VR128:$src))), (PMOVZXWQrr VR128:$src)>; 6258 6259 def : Pat<(v2i64 (X86vzext (v4i32 VR128:$src))), (PMOVZXDQrr VR128:$src)>; 6260 6261 def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), 6262 (PMOVZXBWrm addr:$src)>; 6263 def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))), 6264 (PMOVZXBWrm addr:$src)>; 6265 def : Pat<(v4i32 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), 6266 (PMOVZXBDrm addr:$src)>; 6267 def : Pat<(v2i64 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector (loadi16_anyext addr:$src))))))), 6268 (PMOVZXBQrm addr:$src)>; 6269 6270 def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), 6271 (PMOVZXWDrm addr:$src)>; 6272 def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))), 6273 (PMOVZXWDrm addr:$src)>; 6274 def : Pat<(v2i64 (X86vzext (v8i16 (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), 6275 (PMOVZXWQrm addr:$src)>; 6276 6277 def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), 6278 (PMOVZXDQrm addr:$src)>; 6279 def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))), 6280 (PMOVZXDQrm addr:$src)>; 6281 def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (X86vzload addr:$src)))))), 6282 (PMOVZXDQrm addr:$src)>; 6283} 6284 6285//===----------------------------------------------------------------------===// 6286// SSE4.1 - Extract Instructions 6287//===----------------------------------------------------------------------===// 6288 6289/// SS41I_binop_ext8 - SSE 4.1 extract 8 bits to 32 bit reg or 8 bit mem 6290multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> { 6291 def rr : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst), 6292 (ins VR128:$src1, i32i8imm:$src2), 6293 !strconcat(OpcodeStr, 6294 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6295 [(set GR32orGR64:$dst, (X86pextrb (v16i8 VR128:$src1), 6296 imm:$src2))]>, 6297 Sched<[WriteShuffle]>; 6298 let neverHasSideEffects = 1, mayStore = 1, 6299 SchedRW = [WriteShuffleLd, WriteRMW] in 6300 def mr : SS4AIi8<opc, MRMDestMem, (outs), 6301 (ins i8mem:$dst, VR128:$src1, i32i8imm:$src2), 6302 !strconcat(OpcodeStr, 6303 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6304 [(store (i8 (trunc (assertzext (X86pextrb (v16i8 VR128:$src1), 6305 imm:$src2)))), addr:$dst)]>; 6306} 6307 6308let Predicates = [HasAVX] in 6309 defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX; 6310 6311defm PEXTRB : SS41I_extract8<0x14, "pextrb">; 6312 6313 6314/// SS41I_extract16 - SSE 4.1 extract 16 bits to memory destination 6315multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> { 6316 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in 6317 def rr_REV : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst), 6318 (ins VR128:$src1, i32i8imm:$src2), 6319 !strconcat(OpcodeStr, 6320 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6321 []>, Sched<[WriteShuffle]>; 6322 6323 let neverHasSideEffects = 1, mayStore = 1, 6324 SchedRW = [WriteShuffleLd, WriteRMW] in 6325 def mr : SS4AIi8<opc, MRMDestMem, (outs), 6326 (ins i16mem:$dst, VR128:$src1, i32i8imm:$src2), 6327 !strconcat(OpcodeStr, 6328 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6329 [(store (i16 (trunc (assertzext (X86pextrw (v8i16 VR128:$src1), 6330 imm:$src2)))), addr:$dst)]>; 6331} 6332 6333let Predicates = [HasAVX] in 6334 defm VPEXTRW : SS41I_extract16<0x15, "vpextrw">, VEX; 6335 6336defm PEXTRW : SS41I_extract16<0x15, "pextrw">; 6337 6338 6339/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination 6340multiclass SS41I_extract32<bits<8> opc, string OpcodeStr> { 6341 def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst), 6342 (ins VR128:$src1, i32i8imm:$src2), 6343 !strconcat(OpcodeStr, 6344 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6345 [(set GR32:$dst, 6346 (extractelt (v4i32 VR128:$src1), imm:$src2))]>, 6347 Sched<[WriteShuffle]>; 6348 let SchedRW = [WriteShuffleLd, WriteRMW] in 6349 def mr : SS4AIi8<opc, MRMDestMem, (outs), 6350 (ins i32mem:$dst, VR128:$src1, i32i8imm:$src2), 6351 !strconcat(OpcodeStr, 6352 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6353 [(store (extractelt (v4i32 VR128:$src1), imm:$src2), 6354 addr:$dst)]>; 6355} 6356 6357let Predicates = [HasAVX] in 6358 defm VPEXTRD : SS41I_extract32<0x16, "vpextrd">, VEX; 6359 6360defm PEXTRD : SS41I_extract32<0x16, "pextrd">; 6361 6362/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination 6363multiclass SS41I_extract64<bits<8> opc, string OpcodeStr> { 6364 def rr : SS4AIi8<opc, MRMDestReg, (outs GR64:$dst), 6365 (ins VR128:$src1, i32i8imm:$src2), 6366 !strconcat(OpcodeStr, 6367 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6368 [(set GR64:$dst, 6369 (extractelt (v2i64 VR128:$src1), imm:$src2))]>, 6370 Sched<[WriteShuffle]>, REX_W; 6371 let SchedRW = [WriteShuffleLd, WriteRMW] in 6372 def mr : SS4AIi8<opc, MRMDestMem, (outs), 6373 (ins i64mem:$dst, VR128:$src1, i32i8imm:$src2), 6374 !strconcat(OpcodeStr, 6375 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6376 [(store (extractelt (v2i64 VR128:$src1), imm:$src2), 6377 addr:$dst)]>, REX_W; 6378} 6379 6380let Predicates = [HasAVX] in 6381 defm VPEXTRQ : SS41I_extract64<0x16, "vpextrq">, VEX, VEX_W; 6382 6383defm PEXTRQ : SS41I_extract64<0x16, "pextrq">; 6384 6385/// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory 6386/// destination 6387multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr, 6388 OpndItins itins = DEFAULT_ITINS> { 6389 def rr : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst), 6390 (ins VR128:$src1, i32i8imm:$src2), 6391 !strconcat(OpcodeStr, 6392 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6393 [(set GR32orGR64:$dst, 6394 (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))], 6395 itins.rr>, Sched<[WriteFBlend]>; 6396 let SchedRW = [WriteFBlendLd, WriteRMW] in 6397 def mr : SS4AIi8<opc, MRMDestMem, (outs), 6398 (ins f32mem:$dst, VR128:$src1, i32i8imm:$src2), 6399 !strconcat(OpcodeStr, 6400 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6401 [(store (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2), 6402 addr:$dst)], itins.rm>; 6403} 6404 6405let ExeDomain = SSEPackedSingle in { 6406 let Predicates = [UseAVX] in 6407 defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX; 6408 defm EXTRACTPS : SS41I_extractf32<0x17, "extractps", SSE_EXTRACT_ITINS>; 6409} 6410 6411// Also match an EXTRACTPS store when the store is done as f32 instead of i32. 6412def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)), 6413 imm:$src2))), 6414 addr:$dst), 6415 (VEXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>, 6416 Requires<[HasAVX]>; 6417def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)), 6418 imm:$src2))), 6419 addr:$dst), 6420 (EXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>, 6421 Requires<[UseSSE41]>; 6422 6423//===----------------------------------------------------------------------===// 6424// SSE4.1 - Insert Instructions 6425//===----------------------------------------------------------------------===// 6426 6427multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> { 6428 def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), 6429 (ins VR128:$src1, GR32orGR64:$src2, i32i8imm:$src3), 6430 !if(Is2Addr, 6431 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6432 !strconcat(asm, 6433 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6434 [(set VR128:$dst, 6435 (X86pinsrb VR128:$src1, GR32orGR64:$src2, imm:$src3))]>, 6436 Sched<[WriteShuffle]>; 6437 def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), 6438 (ins VR128:$src1, i8mem:$src2, i32i8imm:$src3), 6439 !if(Is2Addr, 6440 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6441 !strconcat(asm, 6442 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6443 [(set VR128:$dst, 6444 (X86pinsrb VR128:$src1, (extloadi8 addr:$src2), 6445 imm:$src3))]>, Sched<[WriteShuffleLd, ReadAfterLd]>; 6446} 6447 6448let Predicates = [HasAVX] in 6449 defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V; 6450let Constraints = "$src1 = $dst" in 6451 defm PINSRB : SS41I_insert8<0x20, "pinsrb">; 6452 6453multiclass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> { 6454 def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), 6455 (ins VR128:$src1, GR32:$src2, i32i8imm:$src3), 6456 !if(Is2Addr, 6457 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6458 !strconcat(asm, 6459 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6460 [(set VR128:$dst, 6461 (v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>, 6462 Sched<[WriteShuffle]>; 6463 def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), 6464 (ins VR128:$src1, i32mem:$src2, i32i8imm:$src3), 6465 !if(Is2Addr, 6466 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6467 !strconcat(asm, 6468 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6469 [(set VR128:$dst, 6470 (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2), 6471 imm:$src3)))]>, Sched<[WriteShuffleLd, ReadAfterLd]>; 6472} 6473 6474let Predicates = [HasAVX] in 6475 defm VPINSRD : SS41I_insert32<0x22, "vpinsrd", 0>, VEX_4V; 6476let Constraints = "$src1 = $dst" in 6477 defm PINSRD : SS41I_insert32<0x22, "pinsrd">; 6478 6479multiclass SS41I_insert64<bits<8> opc, string asm, bit Is2Addr = 1> { 6480 def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), 6481 (ins VR128:$src1, GR64:$src2, i32i8imm:$src3), 6482 !if(Is2Addr, 6483 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6484 !strconcat(asm, 6485 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6486 [(set VR128:$dst, 6487 (v2i64 (insertelt VR128:$src1, GR64:$src2, imm:$src3)))]>, 6488 Sched<[WriteShuffle]>; 6489 def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), 6490 (ins VR128:$src1, i64mem:$src2, i32i8imm:$src3), 6491 !if(Is2Addr, 6492 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6493 !strconcat(asm, 6494 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6495 [(set VR128:$dst, 6496 (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2), 6497 imm:$src3)))]>, Sched<[WriteShuffleLd, ReadAfterLd]>; 6498} 6499 6500let Predicates = [HasAVX] in 6501 defm VPINSRQ : SS41I_insert64<0x22, "vpinsrq", 0>, VEX_4V, VEX_W; 6502let Constraints = "$src1 = $dst" in 6503 defm PINSRQ : SS41I_insert64<0x22, "pinsrq">, REX_W; 6504 6505// insertps has a few different modes, there's the first two here below which 6506// are optimized inserts that won't zero arbitrary elements in the destination 6507// vector. The next one matches the intrinsic and could zero arbitrary elements 6508// in the target vector. 6509multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1, 6510 OpndItins itins = DEFAULT_ITINS> { 6511 def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), 6512 (ins VR128:$src1, VR128:$src2, u32u8imm:$src3), 6513 !if(Is2Addr, 6514 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6515 !strconcat(asm, 6516 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6517 [(set VR128:$dst, 6518 (X86insrtps VR128:$src1, VR128:$src2, imm:$src3))], itins.rr>, 6519 Sched<[WriteFShuffle]>; 6520 def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), 6521 (ins VR128:$src1, f32mem:$src2, u32u8imm:$src3), 6522 !if(Is2Addr, 6523 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6524 !strconcat(asm, 6525 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6526 [(set VR128:$dst, 6527 (X86insrtps VR128:$src1, 6528 (v4f32 (scalar_to_vector (loadf32 addr:$src2))), 6529 imm:$src3))], itins.rm>, 6530 Sched<[WriteFShuffleLd, ReadAfterLd]>; 6531} 6532 6533let ExeDomain = SSEPackedSingle in { 6534 let Predicates = [UseAVX] in 6535 defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V; 6536 let Constraints = "$src1 = $dst" in 6537 defm INSERTPS : SS41I_insertf32<0x21, "insertps", 1, SSE_INSERT_ITINS>; 6538} 6539 6540//===----------------------------------------------------------------------===// 6541// SSE4.1 - Round Instructions 6542//===----------------------------------------------------------------------===// 6543 6544multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr, 6545 X86MemOperand x86memop, RegisterClass RC, 6546 PatFrag mem_frag32, PatFrag mem_frag64, 6547 Intrinsic V4F32Int, Intrinsic V2F64Int> { 6548let ExeDomain = SSEPackedSingle in { 6549 // Intrinsic operation, reg. 6550 // Vector intrinsic operation, reg 6551 def PSr : SS4AIi8<opcps, MRMSrcReg, 6552 (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2), 6553 !strconcat(OpcodeStr, 6554 "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6555 [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))], 6556 IIC_SSE_ROUNDPS_REG>, Sched<[WriteFAdd]>; 6557 6558 // Vector intrinsic operation, mem 6559 def PSm : SS4AIi8<opcps, MRMSrcMem, 6560 (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2), 6561 !strconcat(OpcodeStr, 6562 "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6563 [(set RC:$dst, 6564 (V4F32Int (mem_frag32 addr:$src1),imm:$src2))], 6565 IIC_SSE_ROUNDPS_MEM>, Sched<[WriteFAddLd]>; 6566} // ExeDomain = SSEPackedSingle 6567 6568let ExeDomain = SSEPackedDouble in { 6569 // Vector intrinsic operation, reg 6570 def PDr : SS4AIi8<opcpd, MRMSrcReg, 6571 (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2), 6572 !strconcat(OpcodeStr, 6573 "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6574 [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))], 6575 IIC_SSE_ROUNDPS_REG>, Sched<[WriteFAdd]>; 6576 6577 // Vector intrinsic operation, mem 6578 def PDm : SS4AIi8<opcpd, MRMSrcMem, 6579 (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2), 6580 !strconcat(OpcodeStr, 6581 "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6582 [(set RC:$dst, 6583 (V2F64Int (mem_frag64 addr:$src1),imm:$src2))], 6584 IIC_SSE_ROUNDPS_REG>, Sched<[WriteFAddLd]>; 6585} // ExeDomain = SSEPackedDouble 6586} 6587 6588multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd, 6589 string OpcodeStr, 6590 Intrinsic F32Int, 6591 Intrinsic F64Int, bit Is2Addr = 1> { 6592let ExeDomain = GenericDomain in { 6593 // Operation, reg. 6594 let hasSideEffects = 0 in 6595 def SSr : SS4AIi8<opcss, MRMSrcReg, 6596 (outs FR32:$dst), (ins FR32:$src1, FR32:$src2, i32i8imm:$src3), 6597 !if(Is2Addr, 6598 !strconcat(OpcodeStr, 6599 "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6600 !strconcat(OpcodeStr, 6601 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6602 []>, Sched<[WriteFAdd]>; 6603 6604 // Intrinsic operation, reg. 6605 let isCodeGenOnly = 1 in 6606 def SSr_Int : SS4AIi8<opcss, MRMSrcReg, 6607 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), 6608 !if(Is2Addr, 6609 !strconcat(OpcodeStr, 6610 "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6611 !strconcat(OpcodeStr, 6612 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6613 [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2, imm:$src3))]>, 6614 Sched<[WriteFAdd]>; 6615 6616 // Intrinsic operation, mem. 6617 def SSm : SS4AIi8<opcss, MRMSrcMem, 6618 (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3), 6619 !if(Is2Addr, 6620 !strconcat(OpcodeStr, 6621 "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6622 !strconcat(OpcodeStr, 6623 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6624 [(set VR128:$dst, 6625 (F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>, 6626 Sched<[WriteFAddLd, ReadAfterLd]>; 6627 6628 // Operation, reg. 6629 let hasSideEffects = 0 in 6630 def SDr : SS4AIi8<opcsd, MRMSrcReg, 6631 (outs FR64:$dst), (ins FR64:$src1, FR64:$src2, i32i8imm:$src3), 6632 !if(Is2Addr, 6633 !strconcat(OpcodeStr, 6634 "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6635 !strconcat(OpcodeStr, 6636 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6637 []>, Sched<[WriteFAdd]>; 6638 6639 // Intrinsic operation, reg. 6640 let isCodeGenOnly = 1 in 6641 def SDr_Int : SS4AIi8<opcsd, MRMSrcReg, 6642 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), 6643 !if(Is2Addr, 6644 !strconcat(OpcodeStr, 6645 "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6646 !strconcat(OpcodeStr, 6647 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6648 [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2, imm:$src3))]>, 6649 Sched<[WriteFAdd]>; 6650 6651 // Intrinsic operation, mem. 6652 def SDm : SS4AIi8<opcsd, MRMSrcMem, 6653 (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3), 6654 !if(Is2Addr, 6655 !strconcat(OpcodeStr, 6656 "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6657 !strconcat(OpcodeStr, 6658 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6659 [(set VR128:$dst, 6660 (F64Int VR128:$src1, sse_load_f64:$src2, imm:$src3))]>, 6661 Sched<[WriteFAddLd, ReadAfterLd]>; 6662} // ExeDomain = GenericDomain 6663} 6664 6665// FP round - roundss, roundps, roundsd, roundpd 6666let Predicates = [HasAVX] in { 6667 // Intrinsic form 6668 defm VROUND : sse41_fp_unop_rm<0x08, 0x09, "vround", f128mem, VR128, 6669 loadv4f32, loadv2f64, 6670 int_x86_sse41_round_ps, 6671 int_x86_sse41_round_pd>, VEX; 6672 defm VROUNDY : sse41_fp_unop_rm<0x08, 0x09, "vround", f256mem, VR256, 6673 loadv8f32, loadv4f64, 6674 int_x86_avx_round_ps_256, 6675 int_x86_avx_round_pd_256>, VEX, VEX_L; 6676 defm VROUND : sse41_fp_binop_rm<0x0A, 0x0B, "vround", 6677 int_x86_sse41_round_ss, 6678 int_x86_sse41_round_sd, 0>, VEX_4V, VEX_LIG; 6679 6680 def : Pat<(ffloor FR32:$src), 6681 (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>; 6682 def : Pat<(f64 (ffloor FR64:$src)), 6683 (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>; 6684 def : Pat<(f32 (fnearbyint FR32:$src)), 6685 (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>; 6686 def : Pat<(f64 (fnearbyint FR64:$src)), 6687 (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>; 6688 def : Pat<(f32 (fceil FR32:$src)), 6689 (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>; 6690 def : Pat<(f64 (fceil FR64:$src)), 6691 (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>; 6692 def : Pat<(f32 (frint FR32:$src)), 6693 (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>; 6694 def : Pat<(f64 (frint FR64:$src)), 6695 (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>; 6696 def : Pat<(f32 (ftrunc FR32:$src)), 6697 (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>; 6698 def : Pat<(f64 (ftrunc FR64:$src)), 6699 (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>; 6700 6701 def : Pat<(v4f32 (ffloor VR128:$src)), 6702 (VROUNDPSr VR128:$src, (i32 0x1))>; 6703 def : Pat<(v4f32 (fnearbyint VR128:$src)), 6704 (VROUNDPSr VR128:$src, (i32 0xC))>; 6705 def : Pat<(v4f32 (fceil VR128:$src)), 6706 (VROUNDPSr VR128:$src, (i32 0x2))>; 6707 def : Pat<(v4f32 (frint VR128:$src)), 6708 (VROUNDPSr VR128:$src, (i32 0x4))>; 6709 def : Pat<(v4f32 (ftrunc VR128:$src)), 6710 (VROUNDPSr VR128:$src, (i32 0x3))>; 6711 6712 def : Pat<(v2f64 (ffloor VR128:$src)), 6713 (VROUNDPDr VR128:$src, (i32 0x1))>; 6714 def : Pat<(v2f64 (fnearbyint VR128:$src)), 6715 (VROUNDPDr VR128:$src, (i32 0xC))>; 6716 def : Pat<(v2f64 (fceil VR128:$src)), 6717 (VROUNDPDr VR128:$src, (i32 0x2))>; 6718 def : Pat<(v2f64 (frint VR128:$src)), 6719 (VROUNDPDr VR128:$src, (i32 0x4))>; 6720 def : Pat<(v2f64 (ftrunc VR128:$src)), 6721 (VROUNDPDr VR128:$src, (i32 0x3))>; 6722 6723 def : Pat<(v8f32 (ffloor VR256:$src)), 6724 (VROUNDYPSr VR256:$src, (i32 0x1))>; 6725 def : Pat<(v8f32 (fnearbyint VR256:$src)), 6726 (VROUNDYPSr VR256:$src, (i32 0xC))>; 6727 def : Pat<(v8f32 (fceil VR256:$src)), 6728 (VROUNDYPSr VR256:$src, (i32 0x2))>; 6729 def : Pat<(v8f32 (frint VR256:$src)), 6730 (VROUNDYPSr VR256:$src, (i32 0x4))>; 6731 def : Pat<(v8f32 (ftrunc VR256:$src)), 6732 (VROUNDYPSr VR256:$src, (i32 0x3))>; 6733 6734 def : Pat<(v4f64 (ffloor VR256:$src)), 6735 (VROUNDYPDr VR256:$src, (i32 0x1))>; 6736 def : Pat<(v4f64 (fnearbyint VR256:$src)), 6737 (VROUNDYPDr VR256:$src, (i32 0xC))>; 6738 def : Pat<(v4f64 (fceil VR256:$src)), 6739 (VROUNDYPDr VR256:$src, (i32 0x2))>; 6740 def : Pat<(v4f64 (frint VR256:$src)), 6741 (VROUNDYPDr VR256:$src, (i32 0x4))>; 6742 def : Pat<(v4f64 (ftrunc VR256:$src)), 6743 (VROUNDYPDr VR256:$src, (i32 0x3))>; 6744} 6745 6746defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128, 6747 memopv4f32, memopv2f64, 6748 int_x86_sse41_round_ps, int_x86_sse41_round_pd>; 6749let Constraints = "$src1 = $dst" in 6750defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round", 6751 int_x86_sse41_round_ss, int_x86_sse41_round_sd>; 6752 6753let Predicates = [UseSSE41] in { 6754 def : Pat<(ffloor FR32:$src), 6755 (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>; 6756 def : Pat<(f64 (ffloor FR64:$src)), 6757 (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>; 6758 def : Pat<(f32 (fnearbyint FR32:$src)), 6759 (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>; 6760 def : Pat<(f64 (fnearbyint FR64:$src)), 6761 (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>; 6762 def : Pat<(f32 (fceil FR32:$src)), 6763 (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>; 6764 def : Pat<(f64 (fceil FR64:$src)), 6765 (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>; 6766 def : Pat<(f32 (frint FR32:$src)), 6767 (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>; 6768 def : Pat<(f64 (frint FR64:$src)), 6769 (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>; 6770 def : Pat<(f32 (ftrunc FR32:$src)), 6771 (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>; 6772 def : Pat<(f64 (ftrunc FR64:$src)), 6773 (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>; 6774 6775 def : Pat<(v4f32 (ffloor VR128:$src)), 6776 (ROUNDPSr VR128:$src, (i32 0x1))>; 6777 def : Pat<(v4f32 (fnearbyint VR128:$src)), 6778 (ROUNDPSr VR128:$src, (i32 0xC))>; 6779 def : Pat<(v4f32 (fceil VR128:$src)), 6780 (ROUNDPSr VR128:$src, (i32 0x2))>; 6781 def : Pat<(v4f32 (frint VR128:$src)), 6782 (ROUNDPSr VR128:$src, (i32 0x4))>; 6783 def : Pat<(v4f32 (ftrunc VR128:$src)), 6784 (ROUNDPSr VR128:$src, (i32 0x3))>; 6785 6786 def : Pat<(v2f64 (ffloor VR128:$src)), 6787 (ROUNDPDr VR128:$src, (i32 0x1))>; 6788 def : Pat<(v2f64 (fnearbyint VR128:$src)), 6789 (ROUNDPDr VR128:$src, (i32 0xC))>; 6790 def : Pat<(v2f64 (fceil VR128:$src)), 6791 (ROUNDPDr VR128:$src, (i32 0x2))>; 6792 def : Pat<(v2f64 (frint VR128:$src)), 6793 (ROUNDPDr VR128:$src, (i32 0x4))>; 6794 def : Pat<(v2f64 (ftrunc VR128:$src)), 6795 (ROUNDPDr VR128:$src, (i32 0x3))>; 6796} 6797 6798//===----------------------------------------------------------------------===// 6799// SSE4.1 - Packed Bit Test 6800//===----------------------------------------------------------------------===// 6801 6802// ptest instruction we'll lower to this in X86ISelLowering primarily from 6803// the intel intrinsic that corresponds to this. 6804let Defs = [EFLAGS], Predicates = [HasAVX] in { 6805def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), 6806 "vptest\t{$src2, $src1|$src1, $src2}", 6807 [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>, 6808 Sched<[WriteVecLogic]>, VEX; 6809def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), 6810 "vptest\t{$src2, $src1|$src1, $src2}", 6811 [(set EFLAGS,(X86ptest VR128:$src1, (loadv2i64 addr:$src2)))]>, 6812 Sched<[WriteVecLogicLd, ReadAfterLd]>, VEX; 6813 6814def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2), 6815 "vptest\t{$src2, $src1|$src1, $src2}", 6816 [(set EFLAGS, (X86ptest VR256:$src1, (v4i64 VR256:$src2)))]>, 6817 Sched<[WriteVecLogic]>, VEX, VEX_L; 6818def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2), 6819 "vptest\t{$src2, $src1|$src1, $src2}", 6820 [(set EFLAGS,(X86ptest VR256:$src1, (loadv4i64 addr:$src2)))]>, 6821 Sched<[WriteVecLogicLd, ReadAfterLd]>, VEX, VEX_L; 6822} 6823 6824let Defs = [EFLAGS] in { 6825def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), 6826 "ptest\t{$src2, $src1|$src1, $src2}", 6827 [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>, 6828 Sched<[WriteVecLogic]>; 6829def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), 6830 "ptest\t{$src2, $src1|$src1, $src2}", 6831 [(set EFLAGS, (X86ptest VR128:$src1, (memopv2i64 addr:$src2)))]>, 6832 Sched<[WriteVecLogicLd, ReadAfterLd]>; 6833} 6834 6835// The bit test instructions below are AVX only 6836multiclass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC, 6837 X86MemOperand x86memop, PatFrag mem_frag, ValueType vt> { 6838 def rr : SS48I<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), 6839 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 6840 [(set EFLAGS, (X86testp RC:$src1, (vt RC:$src2)))]>, 6841 Sched<[WriteVecLogic]>, VEX; 6842 def rm : SS48I<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2), 6843 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 6844 [(set EFLAGS, (X86testp RC:$src1, (mem_frag addr:$src2)))]>, 6845 Sched<[WriteVecLogicLd, ReadAfterLd]>, VEX; 6846} 6847 6848let Defs = [EFLAGS], Predicates = [HasAVX] in { 6849let ExeDomain = SSEPackedSingle in { 6850defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem, loadv4f32, v4f32>; 6851defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, loadv8f32, v8f32>, 6852 VEX_L; 6853} 6854let ExeDomain = SSEPackedDouble in { 6855defm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem, loadv2f64, v2f64>; 6856defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, loadv4f64, v4f64>, 6857 VEX_L; 6858} 6859} 6860 6861//===----------------------------------------------------------------------===// 6862// SSE4.1 - Misc Instructions 6863//===----------------------------------------------------------------------===// 6864 6865let Defs = [EFLAGS], Predicates = [HasPOPCNT] in { 6866 def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), 6867 "popcnt{w}\t{$src, $dst|$dst, $src}", 6868 [(set GR16:$dst, (ctpop GR16:$src)), (implicit EFLAGS)], 6869 IIC_SSE_POPCNT_RR>, Sched<[WriteFAdd]>, 6870 OpSize16, XS; 6871 def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), 6872 "popcnt{w}\t{$src, $dst|$dst, $src}", 6873 [(set GR16:$dst, (ctpop (loadi16 addr:$src))), 6874 (implicit EFLAGS)], IIC_SSE_POPCNT_RM>, 6875 Sched<[WriteFAddLd]>, OpSize16, XS; 6876 6877 def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), 6878 "popcnt{l}\t{$src, $dst|$dst, $src}", 6879 [(set GR32:$dst, (ctpop GR32:$src)), (implicit EFLAGS)], 6880 IIC_SSE_POPCNT_RR>, Sched<[WriteFAdd]>, 6881 OpSize32, XS; 6882 6883 def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), 6884 "popcnt{l}\t{$src, $dst|$dst, $src}", 6885 [(set GR32:$dst, (ctpop (loadi32 addr:$src))), 6886 (implicit EFLAGS)], IIC_SSE_POPCNT_RM>, 6887 Sched<[WriteFAddLd]>, OpSize32, XS; 6888 6889 def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), 6890 "popcnt{q}\t{$src, $dst|$dst, $src}", 6891 [(set GR64:$dst, (ctpop GR64:$src)), (implicit EFLAGS)], 6892 IIC_SSE_POPCNT_RR>, Sched<[WriteFAdd]>, XS; 6893 def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), 6894 "popcnt{q}\t{$src, $dst|$dst, $src}", 6895 [(set GR64:$dst, (ctpop (loadi64 addr:$src))), 6896 (implicit EFLAGS)], IIC_SSE_POPCNT_RM>, 6897 Sched<[WriteFAddLd]>, XS; 6898} 6899 6900 6901 6902// SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16. 6903multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr, 6904 Intrinsic IntId128, 6905 X86FoldableSchedWrite Sched> { 6906 def rr128 : SS48I<opc, MRMSrcReg, (outs VR128:$dst), 6907 (ins VR128:$src), 6908 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 6909 [(set VR128:$dst, (IntId128 VR128:$src))]>, 6910 Sched<[Sched]>; 6911 def rm128 : SS48I<opc, MRMSrcMem, (outs VR128:$dst), 6912 (ins i128mem:$src), 6913 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 6914 [(set VR128:$dst, 6915 (IntId128 (bitconvert (memopv2i64 addr:$src))))]>, 6916 Sched<[Sched.Folded]>; 6917} 6918 6919// PHMIN has the same profile as PSAD, thus we use the same scheduling 6920// model, although the naming is misleading. 6921let Predicates = [HasAVX] in 6922defm VPHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "vphminposuw", 6923 int_x86_sse41_phminposuw, 6924 WriteVecIMul>, VEX; 6925defm PHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "phminposuw", 6926 int_x86_sse41_phminposuw, 6927 WriteVecIMul>; 6928 6929/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator 6930multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr, 6931 Intrinsic IntId128, bit Is2Addr = 1, 6932 OpndItins itins = DEFAULT_ITINS> { 6933 let isCommutable = 1 in 6934 def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), 6935 (ins VR128:$src1, VR128:$src2), 6936 !if(Is2Addr, 6937 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 6938 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 6939 [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))], 6940 itins.rr>, Sched<[itins.Sched]>; 6941 def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), 6942 (ins VR128:$src1, i128mem:$src2), 6943 !if(Is2Addr, 6944 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 6945 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 6946 [(set VR128:$dst, 6947 (IntId128 VR128:$src1, (bitconvert (memopv2i64 addr:$src2))))], 6948 itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; 6949} 6950 6951/// SS41I_binop_rm_int_y - Simple SSE 4.1 binary operator 6952multiclass SS41I_binop_rm_int_y<bits<8> opc, string OpcodeStr, 6953 Intrinsic IntId256, 6954 X86FoldableSchedWrite Sched> { 6955 let isCommutable = 1 in 6956 def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst), 6957 (ins VR256:$src1, VR256:$src2), 6958 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6959 [(set VR256:$dst, (IntId256 VR256:$src1, VR256:$src2))]>, 6960 Sched<[Sched]>; 6961 def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst), 6962 (ins VR256:$src1, i256mem:$src2), 6963 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6964 [(set VR256:$dst, 6965 (IntId256 VR256:$src1, (bitconvert (loadv4i64 addr:$src2))))]>, 6966 Sched<[Sched.Folded, ReadAfterLd]>; 6967} 6968 6969 6970/// SS48I_binop_rm - Simple SSE41 binary operator. 6971multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 6972 ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 6973 X86MemOperand x86memop, bit Is2Addr = 1, 6974 OpndItins itins = SSE_INTALU_ITINS_P> { 6975 let isCommutable = 1 in 6976 def rr : SS48I<opc, MRMSrcReg, (outs RC:$dst), 6977 (ins RC:$src1, RC:$src2), 6978 !if(Is2Addr, 6979 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 6980 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 6981 [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, 6982 Sched<[itins.Sched]>; 6983 def rm : SS48I<opc, MRMSrcMem, (outs RC:$dst), 6984 (ins RC:$src1, x86memop:$src2), 6985 !if(Is2Addr, 6986 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 6987 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 6988 [(set RC:$dst, 6989 (OpVT (OpNode RC:$src1, (bitconvert (memop_frag addr:$src2)))))]>, 6990 Sched<[itins.Sched.Folded, ReadAfterLd]>; 6991} 6992 6993let Predicates = [HasAVX] in { 6994 let isCommutable = 0 in 6995 defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw, 6996 0, DEFAULT_ITINS_SHUFFLESCHED>, VEX_4V; 6997 defm VPMINSB : SS48I_binop_rm<0x38, "vpminsb", X86smin, v16i8, VR128, 6998 loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>, 6999 VEX_4V; 7000 defm VPMINSD : SS48I_binop_rm<0x39, "vpminsd", X86smin, v4i32, VR128, 7001 loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>, 7002 VEX_4V; 7003 defm VPMINUD : SS48I_binop_rm<0x3B, "vpminud", X86umin, v4i32, VR128, 7004 loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>, 7005 VEX_4V; 7006 defm VPMINUW : SS48I_binop_rm<0x3A, "vpminuw", X86umin, v8i16, VR128, 7007 loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>, 7008 VEX_4V; 7009 defm VPMAXSB : SS48I_binop_rm<0x3C, "vpmaxsb", X86smax, v16i8, VR128, 7010 loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>, 7011 VEX_4V; 7012 defm VPMAXSD : SS48I_binop_rm<0x3D, "vpmaxsd", X86smax, v4i32, VR128, 7013 loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>, 7014 VEX_4V; 7015 defm VPMAXUD : SS48I_binop_rm<0x3F, "vpmaxud", X86umax, v4i32, VR128, 7016 loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>, 7017 VEX_4V; 7018 defm VPMAXUW : SS48I_binop_rm<0x3E, "vpmaxuw", X86umax, v8i16, VR128, 7019 loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>, 7020 VEX_4V; 7021 defm VPMULDQ : SS41I_binop_rm_int<0x28, "vpmuldq", int_x86_sse41_pmuldq, 7022 0, DEFAULT_ITINS_VECIMULSCHED>, VEX_4V; 7023} 7024 7025let Predicates = [HasAVX2] in { 7026 let isCommutable = 0 in 7027 defm VPACKUSDW : SS41I_binop_rm_int_y<0x2B, "vpackusdw", 7028 int_x86_avx2_packusdw, WriteShuffle>, 7029 VEX_4V, VEX_L; 7030 defm VPMINSBY : SS48I_binop_rm<0x38, "vpminsb", X86smin, v32i8, VR256, 7031 loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>, 7032 VEX_4V, VEX_L; 7033 defm VPMINSDY : SS48I_binop_rm<0x39, "vpminsd", X86smin, v8i32, VR256, 7034 loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>, 7035 VEX_4V, VEX_L; 7036 defm VPMINUDY : SS48I_binop_rm<0x3B, "vpminud", X86umin, v8i32, VR256, 7037 loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>, 7038 VEX_4V, VEX_L; 7039 defm VPMINUWY : SS48I_binop_rm<0x3A, "vpminuw", X86umin, v16i16, VR256, 7040 loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>, 7041 VEX_4V, VEX_L; 7042 defm VPMAXSBY : SS48I_binop_rm<0x3C, "vpmaxsb", X86smax, v32i8, VR256, 7043 loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>, 7044 VEX_4V, VEX_L; 7045 defm VPMAXSDY : SS48I_binop_rm<0x3D, "vpmaxsd", X86smax, v8i32, VR256, 7046 loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>, 7047 VEX_4V, VEX_L; 7048 defm VPMAXUDY : SS48I_binop_rm<0x3F, "vpmaxud", X86umax, v8i32, VR256, 7049 loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>, 7050 VEX_4V, VEX_L; 7051 defm VPMAXUWY : SS48I_binop_rm<0x3E, "vpmaxuw", X86umax, v16i16, VR256, 7052 loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>, 7053 VEX_4V, VEX_L; 7054 defm VPMULDQ : SS41I_binop_rm_int_y<0x28, "vpmuldq", 7055 int_x86_avx2_pmul_dq, WriteVecIMul>, 7056 VEX_4V, VEX_L; 7057} 7058 7059let Constraints = "$src1 = $dst" in { 7060 let isCommutable = 0 in 7061 defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw, 7062 1, DEFAULT_ITINS_SHUFFLESCHED>; 7063 defm PMINSB : SS48I_binop_rm<0x38, "pminsb", X86smin, v16i8, VR128, 7064 memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; 7065 defm PMINSD : SS48I_binop_rm<0x39, "pminsd", X86smin, v4i32, VR128, 7066 memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; 7067 defm PMINUD : SS48I_binop_rm<0x3B, "pminud", X86umin, v4i32, VR128, 7068 memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; 7069 defm PMINUW : SS48I_binop_rm<0x3A, "pminuw", X86umin, v8i16, VR128, 7070 memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; 7071 defm PMAXSB : SS48I_binop_rm<0x3C, "pmaxsb", X86smax, v16i8, VR128, 7072 memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; 7073 defm PMAXSD : SS48I_binop_rm<0x3D, "pmaxsd", X86smax, v4i32, VR128, 7074 memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; 7075 defm PMAXUD : SS48I_binop_rm<0x3F, "pmaxud", X86umax, v4i32, VR128, 7076 memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; 7077 defm PMAXUW : SS48I_binop_rm<0x3E, "pmaxuw", X86umax, v8i16, VR128, 7078 memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; 7079 defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq, 7080 1, SSE_INTMUL_ITINS_P>; 7081} 7082 7083let Predicates = [HasAVX] in { 7084 defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, VR128, 7085 memopv2i64, i128mem, 0, SSE_INTALU_ITINS_P>, 7086 VEX_4V; 7087 defm VPCMPEQQ : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v2i64, VR128, 7088 memopv2i64, i128mem, 0, SSE_INTALU_ITINS_P>, 7089 VEX_4V; 7090} 7091let Predicates = [HasAVX2] in { 7092 defm VPMULLDY : SS48I_binop_rm<0x40, "vpmulld", mul, v8i32, VR256, 7093 memopv4i64, i256mem, 0, SSE_INTALU_ITINS_P>, 7094 VEX_4V, VEX_L; 7095 defm VPCMPEQQY : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v4i64, VR256, 7096 memopv4i64, i256mem, 0, SSE_INTALU_ITINS_P>, 7097 VEX_4V, VEX_L; 7098} 7099 7100let Constraints = "$src1 = $dst" in { 7101 defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, VR128, 7102 memopv2i64, i128mem, 1, SSE_PMULLD_ITINS>; 7103 defm PCMPEQQ : SS48I_binop_rm<0x29, "pcmpeqq", X86pcmpeq, v2i64, VR128, 7104 memopv2i64, i128mem, 1, SSE_INTALUQ_ITINS_P>; 7105} 7106 7107/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate 7108multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr, 7109 Intrinsic IntId, RegisterClass RC, PatFrag memop_frag, 7110 X86MemOperand x86memop, bit Is2Addr = 1, 7111 OpndItins itins = DEFAULT_ITINS> { 7112 let isCommutable = 1 in 7113 def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst), 7114 (ins RC:$src1, RC:$src2, u32u8imm:$src3), 7115 !if(Is2Addr, 7116 !strconcat(OpcodeStr, 7117 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 7118 !strconcat(OpcodeStr, 7119 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 7120 [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))], itins.rr>, 7121 Sched<[itins.Sched]>; 7122 def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst), 7123 (ins RC:$src1, x86memop:$src2, u32u8imm:$src3), 7124 !if(Is2Addr, 7125 !strconcat(OpcodeStr, 7126 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 7127 !strconcat(OpcodeStr, 7128 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 7129 [(set RC:$dst, 7130 (IntId RC:$src1, 7131 (bitconvert (memop_frag addr:$src2)), imm:$src3))], itins.rm>, 7132 Sched<[itins.Sched.Folded, ReadAfterLd]>; 7133} 7134 7135let Predicates = [HasAVX] in { 7136 let isCommutable = 0 in { 7137 let ExeDomain = SSEPackedSingle in { 7138 defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps, 7139 VR128, loadv4f32, f128mem, 0, 7140 DEFAULT_ITINS_FBLENDSCHED>, VEX_4V; 7141 defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps", 7142 int_x86_avx_blend_ps_256, VR256, loadv8f32, 7143 f256mem, 0, DEFAULT_ITINS_FBLENDSCHED>, 7144 VEX_4V, VEX_L; 7145 } 7146 let ExeDomain = SSEPackedDouble in { 7147 defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd, 7148 VR128, loadv2f64, f128mem, 0, 7149 DEFAULT_ITINS_FBLENDSCHED>, VEX_4V; 7150 defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd", 7151 int_x86_avx_blend_pd_256,VR256, loadv4f64, 7152 f256mem, 0, DEFAULT_ITINS_FBLENDSCHED>, 7153 VEX_4V, VEX_L; 7154 } 7155 defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw, 7156 VR128, loadv2i64, i128mem, 0, 7157 DEFAULT_ITINS_BLENDSCHED>, VEX_4V; 7158 defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw, 7159 VR128, loadv2i64, i128mem, 0, 7160 DEFAULT_ITINS_MPSADSCHED>, VEX_4V; 7161 } 7162 let ExeDomain = SSEPackedSingle in 7163 defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps, 7164 VR128, loadv4f32, f128mem, 0, 7165 SSE_DPPS_ITINS>, VEX_4V; 7166 let ExeDomain = SSEPackedDouble in 7167 defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd, 7168 VR128, loadv2f64, f128mem, 0, 7169 SSE_DPPS_ITINS>, VEX_4V; 7170 let ExeDomain = SSEPackedSingle in 7171 defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256, 7172 VR256, loadv8f32, i256mem, 0, 7173 SSE_DPPS_ITINS>, VEX_4V, VEX_L; 7174} 7175 7176let Predicates = [HasAVX2] in { 7177 let isCommutable = 0 in { 7178 defm VPBLENDWY : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_avx2_pblendw, 7179 VR256, loadv4i64, i256mem, 0, 7180 DEFAULT_ITINS_BLENDSCHED>, VEX_4V, VEX_L; 7181 defm VMPSADBWY : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_avx2_mpsadbw, 7182 VR256, loadv4i64, i256mem, 0, 7183 DEFAULT_ITINS_MPSADSCHED>, VEX_4V, VEX_L; 7184 } 7185} 7186 7187let Constraints = "$src1 = $dst" in { 7188 let isCommutable = 0 in { 7189 let ExeDomain = SSEPackedSingle in 7190 defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps, 7191 VR128, memopv4f32, f128mem, 7192 1, SSE_INTALU_ITINS_FBLEND_P>; 7193 let ExeDomain = SSEPackedDouble in 7194 defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd, 7195 VR128, memopv2f64, f128mem, 7196 1, SSE_INTALU_ITINS_FBLEND_P>; 7197 defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw, 7198 VR128, memopv2i64, i128mem, 7199 1, SSE_INTALU_ITINS_FBLEND_P>; 7200 defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw, 7201 VR128, memopv2i64, i128mem, 7202 1, SSE_MPSADBW_ITINS>; 7203 } 7204 let ExeDomain = SSEPackedSingle in 7205 defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps, 7206 VR128, memopv4f32, f128mem, 1, 7207 SSE_DPPS_ITINS>; 7208 let ExeDomain = SSEPackedDouble in 7209 defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd, 7210 VR128, memopv2f64, f128mem, 1, 7211 SSE_DPPD_ITINS>; 7212} 7213 7214/// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators 7215multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr, 7216 RegisterClass RC, X86MemOperand x86memop, 7217 PatFrag mem_frag, Intrinsic IntId, 7218 X86FoldableSchedWrite Sched> { 7219 def rr : Ii8<opc, MRMSrcReg, (outs RC:$dst), 7220 (ins RC:$src1, RC:$src2, RC:$src3), 7221 !strconcat(OpcodeStr, 7222 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 7223 [(set RC:$dst, (IntId RC:$src1, RC:$src2, RC:$src3))], 7224 NoItinerary, SSEPackedInt>, TAPD, VEX_4V, VEX_I8IMM, 7225 Sched<[Sched]>; 7226 7227 def rm : Ii8<opc, MRMSrcMem, (outs RC:$dst), 7228 (ins RC:$src1, x86memop:$src2, RC:$src3), 7229 !strconcat(OpcodeStr, 7230 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 7231 [(set RC:$dst, 7232 (IntId RC:$src1, (bitconvert (mem_frag addr:$src2)), 7233 RC:$src3))], 7234 NoItinerary, SSEPackedInt>, TAPD, VEX_4V, VEX_I8IMM, 7235 Sched<[Sched.Folded, ReadAfterLd]>; 7236} 7237 7238let Predicates = [HasAVX] in { 7239let ExeDomain = SSEPackedDouble in { 7240defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, f128mem, 7241 loadv2f64, int_x86_sse41_blendvpd, 7242 WriteFVarBlend>; 7243defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, f256mem, 7244 loadv4f64, int_x86_avx_blendv_pd_256, 7245 WriteFVarBlend>, VEX_L; 7246} // ExeDomain = SSEPackedDouble 7247let ExeDomain = SSEPackedSingle in { 7248defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, f128mem, 7249 loadv4f32, int_x86_sse41_blendvps, 7250 WriteFVarBlend>; 7251defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, f256mem, 7252 loadv8f32, int_x86_avx_blendv_ps_256, 7253 WriteFVarBlend>, VEX_L; 7254} // ExeDomain = SSEPackedSingle 7255defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem, 7256 loadv2i64, int_x86_sse41_pblendvb, 7257 WriteVarBlend>; 7258} 7259 7260let Predicates = [HasAVX2] in { 7261defm VPBLENDVBY : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR256, i256mem, 7262 loadv4i64, int_x86_avx2_pblendvb, 7263 WriteVarBlend>, VEX_L; 7264} 7265 7266let Predicates = [HasAVX] in { 7267 def : Pat<(v16i8 (vselect (v16i8 VR128:$mask), (v16i8 VR128:$src1), 7268 (v16i8 VR128:$src2))), 7269 (VPBLENDVBrr VR128:$src2, VR128:$src1, VR128:$mask)>; 7270 def : Pat<(v4i32 (vselect (v4i32 VR128:$mask), (v4i32 VR128:$src1), 7271 (v4i32 VR128:$src2))), 7272 (VBLENDVPSrr VR128:$src2, VR128:$src1, VR128:$mask)>; 7273 def : Pat<(v4f32 (vselect (v4i32 VR128:$mask), (v4f32 VR128:$src1), 7274 (v4f32 VR128:$src2))), 7275 (VBLENDVPSrr VR128:$src2, VR128:$src1, VR128:$mask)>; 7276 def : Pat<(v2i64 (vselect (v2i64 VR128:$mask), (v2i64 VR128:$src1), 7277 (v2i64 VR128:$src2))), 7278 (VBLENDVPDrr VR128:$src2, VR128:$src1, VR128:$mask)>; 7279 def : Pat<(v2f64 (vselect (v2i64 VR128:$mask), (v2f64 VR128:$src1), 7280 (v2f64 VR128:$src2))), 7281 (VBLENDVPDrr VR128:$src2, VR128:$src1, VR128:$mask)>; 7282 def : Pat<(v8i32 (vselect (v8i32 VR256:$mask), (v8i32 VR256:$src1), 7283 (v8i32 VR256:$src2))), 7284 (VBLENDVPSYrr VR256:$src2, VR256:$src1, VR256:$mask)>; 7285 def : Pat<(v8f32 (vselect (v8i32 VR256:$mask), (v8f32 VR256:$src1), 7286 (v8f32 VR256:$src2))), 7287 (VBLENDVPSYrr VR256:$src2, VR256:$src1, VR256:$mask)>; 7288 def : Pat<(v4i64 (vselect (v4i64 VR256:$mask), (v4i64 VR256:$src1), 7289 (v4i64 VR256:$src2))), 7290 (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>; 7291 def : Pat<(v4f64 (vselect (v4i64 VR256:$mask), (v4f64 VR256:$src1), 7292 (v4f64 VR256:$src2))), 7293 (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>; 7294 7295 def : Pat<(v8f32 (X86Blendi (v8f32 VR256:$src1), (v8f32 VR256:$src2), 7296 (imm:$mask))), 7297 (VBLENDPSYrri VR256:$src1, VR256:$src2, imm:$mask)>; 7298 def : Pat<(v4f64 (X86Blendi (v4f64 VR256:$src1), (v4f64 VR256:$src2), 7299 (imm:$mask))), 7300 (VBLENDPDYrri VR256:$src1, VR256:$src2, imm:$mask)>; 7301 7302 def : Pat<(v8i16 (X86Blendi (v8i16 VR128:$src1), (v8i16 VR128:$src2), 7303 (imm:$mask))), 7304 (VPBLENDWrri VR128:$src1, VR128:$src2, imm:$mask)>; 7305 def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$src1), (v4f32 VR128:$src2), 7306 (imm:$mask))), 7307 (VBLENDPSrri VR128:$src1, VR128:$src2, imm:$mask)>; 7308 def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$src1), (v2f64 VR128:$src2), 7309 (imm:$mask))), 7310 (VBLENDPDrri VR128:$src1, VR128:$src2, imm:$mask)>; 7311} 7312 7313let Predicates = [HasAVX2] in { 7314 def : Pat<(v32i8 (vselect (v32i8 VR256:$mask), (v32i8 VR256:$src1), 7315 (v32i8 VR256:$src2))), 7316 (VPBLENDVBYrr VR256:$src2, VR256:$src1, VR256:$mask)>; 7317 def : Pat<(v16i16 (X86Blendi (v16i16 VR256:$src1), (v16i16 VR256:$src2), 7318 (imm:$mask))), 7319 (VPBLENDWYrri VR256:$src1, VR256:$src2, imm:$mask)>; 7320} 7321 7322/// SS41I_ternary_int - SSE 4.1 ternary operator 7323let Uses = [XMM0], Constraints = "$src1 = $dst" in { 7324 multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, PatFrag mem_frag, 7325 X86MemOperand x86memop, Intrinsic IntId, 7326 OpndItins itins = DEFAULT_ITINS> { 7327 def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst), 7328 (ins VR128:$src1, VR128:$src2), 7329 !strconcat(OpcodeStr, 7330 "\t{$src2, $dst|$dst, $src2}"), 7331 [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))], 7332 itins.rr>; 7333 7334 def rm0 : SS48I<opc, MRMSrcMem, (outs VR128:$dst), 7335 (ins VR128:$src1, x86memop:$src2), 7336 !strconcat(OpcodeStr, 7337 "\t{$src2, $dst|$dst, $src2}"), 7338 [(set VR128:$dst, 7339 (IntId VR128:$src1, 7340 (bitconvert (mem_frag addr:$src2)), XMM0))], 7341 itins.rm>; 7342 } 7343} 7344 7345let ExeDomain = SSEPackedDouble in 7346defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", memopv2f64, f128mem, 7347 int_x86_sse41_blendvpd>; 7348let ExeDomain = SSEPackedSingle in 7349defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", memopv4f32, f128mem, 7350 int_x86_sse41_blendvps>; 7351defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", memopv2i64, i128mem, 7352 int_x86_sse41_pblendvb>; 7353 7354// Aliases with the implicit xmm0 argument 7355def : InstAlias<"blendvpd\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}", 7356 (BLENDVPDrr0 VR128:$dst, VR128:$src2)>; 7357def : InstAlias<"blendvpd\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}", 7358 (BLENDVPDrm0 VR128:$dst, f128mem:$src2)>; 7359def : InstAlias<"blendvps\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}", 7360 (BLENDVPSrr0 VR128:$dst, VR128:$src2)>; 7361def : InstAlias<"blendvps\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}", 7362 (BLENDVPSrm0 VR128:$dst, f128mem:$src2)>; 7363def : InstAlias<"pblendvb\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}", 7364 (PBLENDVBrr0 VR128:$dst, VR128:$src2)>; 7365def : InstAlias<"pblendvb\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}", 7366 (PBLENDVBrm0 VR128:$dst, i128mem:$src2)>; 7367 7368let Predicates = [UseSSE41] in { 7369 def : Pat<(v16i8 (vselect (v16i8 XMM0), (v16i8 VR128:$src1), 7370 (v16i8 VR128:$src2))), 7371 (PBLENDVBrr0 VR128:$src2, VR128:$src1)>; 7372 def : Pat<(v4i32 (vselect (v4i32 XMM0), (v4i32 VR128:$src1), 7373 (v4i32 VR128:$src2))), 7374 (BLENDVPSrr0 VR128:$src2, VR128:$src1)>; 7375 def : Pat<(v4f32 (vselect (v4i32 XMM0), (v4f32 VR128:$src1), 7376 (v4f32 VR128:$src2))), 7377 (BLENDVPSrr0 VR128:$src2, VR128:$src1)>; 7378 def : Pat<(v2i64 (vselect (v2i64 XMM0), (v2i64 VR128:$src1), 7379 (v2i64 VR128:$src2))), 7380 (BLENDVPDrr0 VR128:$src2, VR128:$src1)>; 7381 def : Pat<(v2f64 (vselect (v2i64 XMM0), (v2f64 VR128:$src1), 7382 (v2f64 VR128:$src2))), 7383 (BLENDVPDrr0 VR128:$src2, VR128:$src1)>; 7384 7385 def : Pat<(v8i16 (X86Blendi (v8i16 VR128:$src1), (v8i16 VR128:$src2), 7386 (imm:$mask))), 7387 (PBLENDWrri VR128:$src1, VR128:$src2, imm:$mask)>; 7388 def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$src1), (v4f32 VR128:$src2), 7389 (imm:$mask))), 7390 (BLENDPSrri VR128:$src1, VR128:$src2, imm:$mask)>; 7391 def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$src1), (v2f64 VR128:$src2), 7392 (imm:$mask))), 7393 (BLENDPDrri VR128:$src1, VR128:$src2, imm:$mask)>; 7394 7395} 7396 7397let Predicates = [HasAVX] in 7398def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 7399 "vmovntdqa\t{$src, $dst|$dst, $src}", 7400 [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>, 7401 VEX; 7402let Predicates = [HasAVX2] in 7403def VMOVNTDQAYrm : SS48I<0x2A, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), 7404 "vmovntdqa\t{$src, $dst|$dst, $src}", 7405 [(set VR256:$dst, (int_x86_avx2_movntdqa addr:$src))]>, 7406 VEX, VEX_L; 7407def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 7408 "movntdqa\t{$src, $dst|$dst, $src}", 7409 [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>; 7410 7411//===----------------------------------------------------------------------===// 7412// SSE4.2 - Compare Instructions 7413//===----------------------------------------------------------------------===// 7414 7415/// SS42I_binop_rm - Simple SSE 4.2 binary operator 7416multiclass SS42I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 7417 ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 7418 X86MemOperand x86memop, bit Is2Addr = 1> { 7419 def rr : SS428I<opc, MRMSrcReg, (outs RC:$dst), 7420 (ins RC:$src1, RC:$src2), 7421 !if(Is2Addr, 7422 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 7423 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 7424 [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>; 7425 def rm : SS428I<opc, MRMSrcMem, (outs RC:$dst), 7426 (ins RC:$src1, x86memop:$src2), 7427 !if(Is2Addr, 7428 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 7429 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 7430 [(set RC:$dst, 7431 (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>; 7432} 7433 7434let Predicates = [HasAVX] in 7435 defm VPCMPGTQ : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v2i64, VR128, 7436 loadv2i64, i128mem, 0>, VEX_4V; 7437 7438let Predicates = [HasAVX2] in 7439 defm VPCMPGTQY : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v4i64, VR256, 7440 loadv4i64, i256mem, 0>, VEX_4V, VEX_L; 7441 7442let Constraints = "$src1 = $dst" in 7443 defm PCMPGTQ : SS42I_binop_rm<0x37, "pcmpgtq", X86pcmpgt, v2i64, VR128, 7444 memopv2i64, i128mem>; 7445 7446//===----------------------------------------------------------------------===// 7447// SSE4.2 - String/text Processing Instructions 7448//===----------------------------------------------------------------------===// 7449 7450// Packed Compare Implicit Length Strings, Return Mask 7451multiclass pseudo_pcmpistrm<string asm> { 7452 def REG : PseudoI<(outs VR128:$dst), 7453 (ins VR128:$src1, VR128:$src2, i8imm:$src3), 7454 [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, VR128:$src2, 7455 imm:$src3))]>; 7456 def MEM : PseudoI<(outs VR128:$dst), 7457 (ins VR128:$src1, i128mem:$src2, i8imm:$src3), 7458 [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, 7459 (bc_v16i8 (memopv2i64 addr:$src2)), imm:$src3))]>; 7460} 7461 7462let Defs = [EFLAGS], usesCustomInserter = 1 in { 7463 defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128">, Requires<[HasAVX]>; 7464 defm PCMPISTRM128 : pseudo_pcmpistrm<"#PCMPISTRM128">, Requires<[UseSSE42]>; 7465} 7466 7467multiclass pcmpistrm_SS42AI<string asm> { 7468 def rr : SS42AI<0x62, MRMSrcReg, (outs), 7469 (ins VR128:$src1, VR128:$src2, i8imm:$src3), 7470 !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), 7471 []>, Sched<[WritePCmpIStrM]>; 7472 let mayLoad = 1 in 7473 def rm :SS42AI<0x62, MRMSrcMem, (outs), 7474 (ins VR128:$src1, i128mem:$src2, i8imm:$src3), 7475 !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), 7476 []>, Sched<[WritePCmpIStrMLd, ReadAfterLd]>; 7477} 7478 7479let Defs = [XMM0, EFLAGS], neverHasSideEffects = 1 in { 7480 let Predicates = [HasAVX] in 7481 defm VPCMPISTRM128 : pcmpistrm_SS42AI<"vpcmpistrm">, VEX; 7482 defm PCMPISTRM128 : pcmpistrm_SS42AI<"pcmpistrm"> ; 7483} 7484 7485// Packed Compare Explicit Length Strings, Return Mask 7486multiclass pseudo_pcmpestrm<string asm> { 7487 def REG : PseudoI<(outs VR128:$dst), 7488 (ins VR128:$src1, VR128:$src3, i8imm:$src5), 7489 [(set VR128:$dst, (int_x86_sse42_pcmpestrm128 7490 VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>; 7491 def MEM : PseudoI<(outs VR128:$dst), 7492 (ins VR128:$src1, i128mem:$src3, i8imm:$src5), 7493 [(set VR128:$dst, (int_x86_sse42_pcmpestrm128 VR128:$src1, EAX, 7494 (bc_v16i8 (memopv2i64 addr:$src3)), EDX, imm:$src5))]>; 7495} 7496 7497let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in { 7498 defm VPCMPESTRM128 : pseudo_pcmpestrm<"#VPCMPESTRM128">, Requires<[HasAVX]>; 7499 defm PCMPESTRM128 : pseudo_pcmpestrm<"#PCMPESTRM128">, Requires<[UseSSE42]>; 7500} 7501 7502multiclass SS42AI_pcmpestrm<string asm> { 7503 def rr : SS42AI<0x60, MRMSrcReg, (outs), 7504 (ins VR128:$src1, VR128:$src3, i8imm:$src5), 7505 !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), 7506 []>, Sched<[WritePCmpEStrM]>; 7507 let mayLoad = 1 in 7508 def rm : SS42AI<0x60, MRMSrcMem, (outs), 7509 (ins VR128:$src1, i128mem:$src3, i8imm:$src5), 7510 !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), 7511 []>, Sched<[WritePCmpEStrMLd, ReadAfterLd]>; 7512} 7513 7514let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in { 7515 let Predicates = [HasAVX] in 7516 defm VPCMPESTRM128 : SS42AI_pcmpestrm<"vpcmpestrm">, VEX; 7517 defm PCMPESTRM128 : SS42AI_pcmpestrm<"pcmpestrm">; 7518} 7519 7520// Packed Compare Implicit Length Strings, Return Index 7521multiclass pseudo_pcmpistri<string asm> { 7522 def REG : PseudoI<(outs GR32:$dst), 7523 (ins VR128:$src1, VR128:$src2, i8imm:$src3), 7524 [(set GR32:$dst, EFLAGS, 7525 (X86pcmpistri VR128:$src1, VR128:$src2, imm:$src3))]>; 7526 def MEM : PseudoI<(outs GR32:$dst), 7527 (ins VR128:$src1, i128mem:$src2, i8imm:$src3), 7528 [(set GR32:$dst, EFLAGS, (X86pcmpistri VR128:$src1, 7529 (bc_v16i8 (memopv2i64 addr:$src2)), imm:$src3))]>; 7530} 7531 7532let Defs = [EFLAGS], usesCustomInserter = 1 in { 7533 defm VPCMPISTRI : pseudo_pcmpistri<"#VPCMPISTRI">, Requires<[HasAVX]>; 7534 defm PCMPISTRI : pseudo_pcmpistri<"#PCMPISTRI">, Requires<[UseSSE42]>; 7535} 7536 7537multiclass SS42AI_pcmpistri<string asm> { 7538 def rr : SS42AI<0x63, MRMSrcReg, (outs), 7539 (ins VR128:$src1, VR128:$src2, i8imm:$src3), 7540 !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), 7541 []>, Sched<[WritePCmpIStrI]>; 7542 let mayLoad = 1 in 7543 def rm : SS42AI<0x63, MRMSrcMem, (outs), 7544 (ins VR128:$src1, i128mem:$src2, i8imm:$src3), 7545 !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), 7546 []>, Sched<[WritePCmpIStrILd, ReadAfterLd]>; 7547} 7548 7549let Defs = [ECX, EFLAGS], neverHasSideEffects = 1 in { 7550 let Predicates = [HasAVX] in 7551 defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX; 7552 defm PCMPISTRI : SS42AI_pcmpistri<"pcmpistri">; 7553} 7554 7555// Packed Compare Explicit Length Strings, Return Index 7556multiclass pseudo_pcmpestri<string asm> { 7557 def REG : PseudoI<(outs GR32:$dst), 7558 (ins VR128:$src1, VR128:$src3, i8imm:$src5), 7559 [(set GR32:$dst, EFLAGS, 7560 (X86pcmpestri VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>; 7561 def MEM : PseudoI<(outs GR32:$dst), 7562 (ins VR128:$src1, i128mem:$src3, i8imm:$src5), 7563 [(set GR32:$dst, EFLAGS, 7564 (X86pcmpestri VR128:$src1, EAX, (bc_v16i8 (memopv2i64 addr:$src3)), EDX, 7565 imm:$src5))]>; 7566} 7567 7568let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in { 7569 defm VPCMPESTRI : pseudo_pcmpestri<"#VPCMPESTRI">, Requires<[HasAVX]>; 7570 defm PCMPESTRI : pseudo_pcmpestri<"#PCMPESTRI">, Requires<[UseSSE42]>; 7571} 7572 7573multiclass SS42AI_pcmpestri<string asm> { 7574 def rr : SS42AI<0x61, MRMSrcReg, (outs), 7575 (ins VR128:$src1, VR128:$src3, i8imm:$src5), 7576 !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), 7577 []>, Sched<[WritePCmpEStrI]>; 7578 let mayLoad = 1 in 7579 def rm : SS42AI<0x61, MRMSrcMem, (outs), 7580 (ins VR128:$src1, i128mem:$src3, i8imm:$src5), 7581 !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), 7582 []>, Sched<[WritePCmpEStrILd, ReadAfterLd]>; 7583} 7584 7585let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in { 7586 let Predicates = [HasAVX] in 7587 defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX; 7588 defm PCMPESTRI : SS42AI_pcmpestri<"pcmpestri">; 7589} 7590 7591//===----------------------------------------------------------------------===// 7592// SSE4.2 - CRC Instructions 7593//===----------------------------------------------------------------------===// 7594 7595// No CRC instructions have AVX equivalents 7596 7597// crc intrinsic instruction 7598// This set of instructions are only rm, the only difference is the size 7599// of r and m. 7600class SS42I_crc32r<bits<8> opc, string asm, RegisterClass RCOut, 7601 RegisterClass RCIn, SDPatternOperator Int> : 7602 SS42FI<opc, MRMSrcReg, (outs RCOut:$dst), (ins RCOut:$src1, RCIn:$src2), 7603 !strconcat(asm, "\t{$src2, $src1|$src1, $src2}"), 7604 [(set RCOut:$dst, (Int RCOut:$src1, RCIn:$src2))], IIC_CRC32_REG>, 7605 Sched<[WriteFAdd]>; 7606 7607class SS42I_crc32m<bits<8> opc, string asm, RegisterClass RCOut, 7608 X86MemOperand x86memop, SDPatternOperator Int> : 7609 SS42FI<opc, MRMSrcMem, (outs RCOut:$dst), (ins RCOut:$src1, x86memop:$src2), 7610 !strconcat(asm, "\t{$src2, $src1|$src1, $src2}"), 7611 [(set RCOut:$dst, (Int RCOut:$src1, (load addr:$src2)))], 7612 IIC_CRC32_MEM>, Sched<[WriteFAddLd, ReadAfterLd]>; 7613 7614let Constraints = "$src1 = $dst" in { 7615 def CRC32r32m8 : SS42I_crc32m<0xF0, "crc32{b}", GR32, i8mem, 7616 int_x86_sse42_crc32_32_8>; 7617 def CRC32r32r8 : SS42I_crc32r<0xF0, "crc32{b}", GR32, GR8, 7618 int_x86_sse42_crc32_32_8>; 7619 def CRC32r32m16 : SS42I_crc32m<0xF1, "crc32{w}", GR32, i16mem, 7620 int_x86_sse42_crc32_32_16>, OpSize16; 7621 def CRC32r32r16 : SS42I_crc32r<0xF1, "crc32{w}", GR32, GR16, 7622 int_x86_sse42_crc32_32_16>, OpSize16; 7623 def CRC32r32m32 : SS42I_crc32m<0xF1, "crc32{l}", GR32, i32mem, 7624 int_x86_sse42_crc32_32_32>, OpSize32; 7625 def CRC32r32r32 : SS42I_crc32r<0xF1, "crc32{l}", GR32, GR32, 7626 int_x86_sse42_crc32_32_32>, OpSize32; 7627 def CRC32r64m64 : SS42I_crc32m<0xF1, "crc32{q}", GR64, i64mem, 7628 int_x86_sse42_crc32_64_64>, REX_W; 7629 def CRC32r64r64 : SS42I_crc32r<0xF1, "crc32{q}", GR64, GR64, 7630 int_x86_sse42_crc32_64_64>, REX_W; 7631 let hasSideEffects = 0 in { 7632 let mayLoad = 1 in 7633 def CRC32r64m8 : SS42I_crc32m<0xF0, "crc32{b}", GR64, i8mem, 7634 null_frag>, REX_W; 7635 def CRC32r64r8 : SS42I_crc32r<0xF0, "crc32{b}", GR64, GR8, 7636 null_frag>, REX_W; 7637 } 7638} 7639 7640//===----------------------------------------------------------------------===// 7641// SHA-NI Instructions 7642//===----------------------------------------------------------------------===// 7643 7644multiclass SHAI_binop<bits<8> Opc, string OpcodeStr, Intrinsic IntId, 7645 bit UsesXMM0 = 0> { 7646 def rr : I<Opc, MRMSrcReg, (outs VR128:$dst), 7647 (ins VR128:$src1, VR128:$src2), 7648 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 7649 [!if(UsesXMM0, 7650 (set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0)), 7651 (set VR128:$dst, (IntId VR128:$src1, VR128:$src2)))]>, T8; 7652 7653 def rm : I<Opc, MRMSrcMem, (outs VR128:$dst), 7654 (ins VR128:$src1, i128mem:$src2), 7655 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 7656 [!if(UsesXMM0, 7657 (set VR128:$dst, (IntId VR128:$src1, 7658 (bc_v4i32 (memopv2i64 addr:$src2)), XMM0)), 7659 (set VR128:$dst, (IntId VR128:$src1, 7660 (bc_v4i32 (memopv2i64 addr:$src2)))))]>, T8; 7661} 7662 7663let Constraints = "$src1 = $dst", Predicates = [HasSHA] in { 7664 def SHA1RNDS4rri : Ii8<0xCC, MRMSrcReg, (outs VR128:$dst), 7665 (ins VR128:$src1, VR128:$src2, i8imm:$src3), 7666 "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}", 7667 [(set VR128:$dst, 7668 (int_x86_sha1rnds4 VR128:$src1, VR128:$src2, 7669 (i8 imm:$src3)))]>, TA; 7670 def SHA1RNDS4rmi : Ii8<0xCC, MRMSrcMem, (outs VR128:$dst), 7671 (ins VR128:$src1, i128mem:$src2, i8imm:$src3), 7672 "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}", 7673 [(set VR128:$dst, 7674 (int_x86_sha1rnds4 VR128:$src1, 7675 (bc_v4i32 (memopv2i64 addr:$src2)), 7676 (i8 imm:$src3)))]>, TA; 7677 7678 defm SHA1NEXTE : SHAI_binop<0xC8, "sha1nexte", int_x86_sha1nexte>; 7679 defm SHA1MSG1 : SHAI_binop<0xC9, "sha1msg1", int_x86_sha1msg1>; 7680 defm SHA1MSG2 : SHAI_binop<0xCA, "sha1msg2", int_x86_sha1msg2>; 7681 7682 let Uses=[XMM0] in 7683 defm SHA256RNDS2 : SHAI_binop<0xCB, "sha256rnds2", int_x86_sha256rnds2, 1>; 7684 7685 defm SHA256MSG1 : SHAI_binop<0xCC, "sha256msg1", int_x86_sha256msg1>; 7686 defm SHA256MSG2 : SHAI_binop<0xCD, "sha256msg2", int_x86_sha256msg2>; 7687} 7688 7689// Aliases with explicit %xmm0 7690def : InstAlias<"sha256rnds2\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}", 7691 (SHA256RNDS2rr VR128:$dst, VR128:$src2)>; 7692def : InstAlias<"sha256rnds2\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}", 7693 (SHA256RNDS2rm VR128:$dst, i128mem:$src2)>; 7694 7695//===----------------------------------------------------------------------===// 7696// AES-NI Instructions 7697//===----------------------------------------------------------------------===// 7698 7699multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr, 7700 Intrinsic IntId128, bit Is2Addr = 1> { 7701 def rr : AES8I<opc, MRMSrcReg, (outs VR128:$dst), 7702 (ins VR128:$src1, VR128:$src2), 7703 !if(Is2Addr, 7704 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 7705 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 7706 [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, 7707 Sched<[WriteAESDecEnc]>; 7708 def rm : AES8I<opc, MRMSrcMem, (outs VR128:$dst), 7709 (ins VR128:$src1, i128mem:$src2), 7710 !if(Is2Addr, 7711 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 7712 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 7713 [(set VR128:$dst, 7714 (IntId128 VR128:$src1, (memopv2i64 addr:$src2)))]>, 7715 Sched<[WriteAESDecEncLd, ReadAfterLd]>; 7716} 7717 7718// Perform One Round of an AES Encryption/Decryption Flow 7719let Predicates = [HasAVX, HasAES] in { 7720 defm VAESENC : AESI_binop_rm_int<0xDC, "vaesenc", 7721 int_x86_aesni_aesenc, 0>, VEX_4V; 7722 defm VAESENCLAST : AESI_binop_rm_int<0xDD, "vaesenclast", 7723 int_x86_aesni_aesenclast, 0>, VEX_4V; 7724 defm VAESDEC : AESI_binop_rm_int<0xDE, "vaesdec", 7725 int_x86_aesni_aesdec, 0>, VEX_4V; 7726 defm VAESDECLAST : AESI_binop_rm_int<0xDF, "vaesdeclast", 7727 int_x86_aesni_aesdeclast, 0>, VEX_4V; 7728} 7729 7730let Constraints = "$src1 = $dst" in { 7731 defm AESENC : AESI_binop_rm_int<0xDC, "aesenc", 7732 int_x86_aesni_aesenc>; 7733 defm AESENCLAST : AESI_binop_rm_int<0xDD, "aesenclast", 7734 int_x86_aesni_aesenclast>; 7735 defm AESDEC : AESI_binop_rm_int<0xDE, "aesdec", 7736 int_x86_aesni_aesdec>; 7737 defm AESDECLAST : AESI_binop_rm_int<0xDF, "aesdeclast", 7738 int_x86_aesni_aesdeclast>; 7739} 7740 7741// Perform the AES InvMixColumn Transformation 7742let Predicates = [HasAVX, HasAES] in { 7743 def VAESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst), 7744 (ins VR128:$src1), 7745 "vaesimc\t{$src1, $dst|$dst, $src1}", 7746 [(set VR128:$dst, 7747 (int_x86_aesni_aesimc VR128:$src1))]>, Sched<[WriteAESIMC]>, 7748 VEX; 7749 def VAESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst), 7750 (ins i128mem:$src1), 7751 "vaesimc\t{$src1, $dst|$dst, $src1}", 7752 [(set VR128:$dst, (int_x86_aesni_aesimc (loadv2i64 addr:$src1)))]>, 7753 Sched<[WriteAESIMCLd]>, VEX; 7754} 7755def AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst), 7756 (ins VR128:$src1), 7757 "aesimc\t{$src1, $dst|$dst, $src1}", 7758 [(set VR128:$dst, 7759 (int_x86_aesni_aesimc VR128:$src1))]>, Sched<[WriteAESIMC]>; 7760def AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst), 7761 (ins i128mem:$src1), 7762 "aesimc\t{$src1, $dst|$dst, $src1}", 7763 [(set VR128:$dst, (int_x86_aesni_aesimc (memopv2i64 addr:$src1)))]>, 7764 Sched<[WriteAESIMCLd]>; 7765 7766// AES Round Key Generation Assist 7767let Predicates = [HasAVX, HasAES] in { 7768 def VAESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst), 7769 (ins VR128:$src1, i8imm:$src2), 7770 "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7771 [(set VR128:$dst, 7772 (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>, 7773 Sched<[WriteAESKeyGen]>, VEX; 7774 def VAESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), 7775 (ins i128mem:$src1, i8imm:$src2), 7776 "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7777 [(set VR128:$dst, 7778 (int_x86_aesni_aeskeygenassist (loadv2i64 addr:$src1), imm:$src2))]>, 7779 Sched<[WriteAESKeyGenLd]>, VEX; 7780} 7781def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst), 7782 (ins VR128:$src1, i8imm:$src2), 7783 "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7784 [(set VR128:$dst, 7785 (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>, 7786 Sched<[WriteAESKeyGen]>; 7787def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), 7788 (ins i128mem:$src1, i8imm:$src2), 7789 "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7790 [(set VR128:$dst, 7791 (int_x86_aesni_aeskeygenassist (memopv2i64 addr:$src1), imm:$src2))]>, 7792 Sched<[WriteAESKeyGenLd]>; 7793 7794//===----------------------------------------------------------------------===// 7795// PCLMUL Instructions 7796//===----------------------------------------------------------------------===// 7797 7798// AVX carry-less Multiplication instructions 7799def VPCLMULQDQrr : AVXPCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), 7800 (ins VR128:$src1, VR128:$src2, i8imm:$src3), 7801 "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 7802 [(set VR128:$dst, 7803 (int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))]>, 7804 Sched<[WriteCLMul]>; 7805 7806def VPCLMULQDQrm : AVXPCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), 7807 (ins VR128:$src1, i128mem:$src2, i8imm:$src3), 7808 "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 7809 [(set VR128:$dst, (int_x86_pclmulqdq VR128:$src1, 7810 (loadv2i64 addr:$src2), imm:$src3))]>, 7811 Sched<[WriteCLMulLd, ReadAfterLd]>; 7812 7813// Carry-less Multiplication instructions 7814let Constraints = "$src1 = $dst" in { 7815def PCLMULQDQrr : PCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), 7816 (ins VR128:$src1, VR128:$src2, i8imm:$src3), 7817 "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", 7818 [(set VR128:$dst, 7819 (int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))], 7820 IIC_SSE_PCLMULQDQ_RR>, Sched<[WriteCLMul]>; 7821 7822def PCLMULQDQrm : PCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), 7823 (ins VR128:$src1, i128mem:$src2, i8imm:$src3), 7824 "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", 7825 [(set VR128:$dst, (int_x86_pclmulqdq VR128:$src1, 7826 (memopv2i64 addr:$src2), imm:$src3))], 7827 IIC_SSE_PCLMULQDQ_RM>, 7828 Sched<[WriteCLMulLd, ReadAfterLd]>; 7829} // Constraints = "$src1 = $dst" 7830 7831 7832multiclass pclmul_alias<string asm, int immop> { 7833 def : InstAlias<!strconcat("pclmul", asm, "dq {$src, $dst|$dst, $src}"), 7834 (PCLMULQDQrr VR128:$dst, VR128:$src, immop)>; 7835 7836 def : InstAlias<!strconcat("pclmul", asm, "dq {$src, $dst|$dst, $src}"), 7837 (PCLMULQDQrm VR128:$dst, i128mem:$src, immop)>; 7838 7839 def : InstAlias<!strconcat("vpclmul", asm, 7840 "dq {$src2, $src1, $dst|$dst, $src1, $src2}"), 7841 (VPCLMULQDQrr VR128:$dst, VR128:$src1, VR128:$src2, immop)>; 7842 7843 def : InstAlias<!strconcat("vpclmul", asm, 7844 "dq {$src2, $src1, $dst|$dst, $src1, $src2}"), 7845 (VPCLMULQDQrm VR128:$dst, VR128:$src1, i128mem:$src2, immop)>; 7846} 7847defm : pclmul_alias<"hqhq", 0x11>; 7848defm : pclmul_alias<"hqlq", 0x01>; 7849defm : pclmul_alias<"lqhq", 0x10>; 7850defm : pclmul_alias<"lqlq", 0x00>; 7851 7852//===----------------------------------------------------------------------===// 7853// SSE4A Instructions 7854//===----------------------------------------------------------------------===// 7855 7856let Predicates = [HasSSE4A] in { 7857 7858let Constraints = "$src = $dst" in { 7859def EXTRQI : Ii8<0x78, MRMXr, (outs VR128:$dst), 7860 (ins VR128:$src, i8imm:$len, i8imm:$idx), 7861 "extrq\t{$idx, $len, $src|$src, $len, $idx}", 7862 [(set VR128:$dst, (int_x86_sse4a_extrqi VR128:$src, imm:$len, 7863 imm:$idx))]>, PD; 7864def EXTRQ : I<0x79, MRMSrcReg, (outs VR128:$dst), 7865 (ins VR128:$src, VR128:$mask), 7866 "extrq\t{$mask, $src|$src, $mask}", 7867 [(set VR128:$dst, (int_x86_sse4a_extrq VR128:$src, 7868 VR128:$mask))]>, PD; 7869 7870def INSERTQI : Ii8<0x78, MRMSrcReg, (outs VR128:$dst), 7871 (ins VR128:$src, VR128:$src2, i8imm:$len, i8imm:$idx), 7872 "insertq\t{$idx, $len, $src2, $src|$src, $src2, $len, $idx}", 7873 [(set VR128:$dst, (int_x86_sse4a_insertqi VR128:$src, 7874 VR128:$src2, imm:$len, imm:$idx))]>, XD; 7875def INSERTQ : I<0x79, MRMSrcReg, (outs VR128:$dst), 7876 (ins VR128:$src, VR128:$mask), 7877 "insertq\t{$mask, $src|$src, $mask}", 7878 [(set VR128:$dst, (int_x86_sse4a_insertq VR128:$src, 7879 VR128:$mask))]>, XD; 7880} 7881 7882def MOVNTSS : I<0x2B, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src), 7883 "movntss\t{$src, $dst|$dst, $src}", 7884 [(int_x86_sse4a_movnt_ss addr:$dst, VR128:$src)]>, XS; 7885 7886def MOVNTSD : I<0x2B, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 7887 "movntsd\t{$src, $dst|$dst, $src}", 7888 [(int_x86_sse4a_movnt_sd addr:$dst, VR128:$src)]>, XD; 7889} 7890 7891//===----------------------------------------------------------------------===// 7892// AVX Instructions 7893//===----------------------------------------------------------------------===// 7894 7895//===----------------------------------------------------------------------===// 7896// VBROADCAST - Load from memory and broadcast to all elements of the 7897// destination operand 7898// 7899class avx_broadcast<bits<8> opc, string OpcodeStr, RegisterClass RC, 7900 X86MemOperand x86memop, Intrinsic Int, SchedWrite Sched> : 7901 AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 7902 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 7903 [(set RC:$dst, (Int addr:$src))]>, Sched<[Sched]>, VEX; 7904 7905// AVX2 adds register forms 7906class avx2_broadcast_reg<bits<8> opc, string OpcodeStr, RegisterClass RC, 7907 Intrinsic Int, SchedWrite Sched> : 7908 AVX28I<opc, MRMSrcReg, (outs RC:$dst), (ins VR128:$src), 7909 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 7910 [(set RC:$dst, (Int VR128:$src))]>, Sched<[Sched]>, VEX; 7911 7912let ExeDomain = SSEPackedSingle in { 7913 def VBROADCASTSSrm : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem, 7914 int_x86_avx_vbroadcast_ss, WriteLoad>; 7915 def VBROADCASTSSYrm : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem, 7916 int_x86_avx_vbroadcast_ss_256, 7917 WriteFShuffleLd>, VEX_L; 7918} 7919let ExeDomain = SSEPackedDouble in 7920def VBROADCASTSDYrm : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem, 7921 int_x86_avx_vbroadcast_sd_256, 7922 WriteFShuffleLd>, VEX_L; 7923def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem, 7924 int_x86_avx_vbroadcastf128_pd_256, 7925 WriteFShuffleLd>, VEX_L; 7926 7927let ExeDomain = SSEPackedSingle in { 7928 def VBROADCASTSSrr : avx2_broadcast_reg<0x18, "vbroadcastss", VR128, 7929 int_x86_avx2_vbroadcast_ss_ps, 7930 WriteFShuffle>; 7931 def VBROADCASTSSYrr : avx2_broadcast_reg<0x18, "vbroadcastss", VR256, 7932 int_x86_avx2_vbroadcast_ss_ps_256, 7933 WriteFShuffle256>, VEX_L; 7934} 7935let ExeDomain = SSEPackedDouble in 7936def VBROADCASTSDYrr : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256, 7937 int_x86_avx2_vbroadcast_sd_pd_256, 7938 WriteFShuffle256>, VEX_L; 7939 7940let Predicates = [HasAVX2] in 7941def VBROADCASTI128 : avx_broadcast<0x5A, "vbroadcasti128", VR256, i128mem, 7942 int_x86_avx2_vbroadcasti128, WriteLoad>, 7943 VEX_L; 7944 7945let Predicates = [HasAVX] in 7946def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src), 7947 (VBROADCASTF128 addr:$src)>; 7948 7949 7950//===----------------------------------------------------------------------===// 7951// VINSERTF128 - Insert packed floating-point values 7952// 7953let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in { 7954def VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst), 7955 (ins VR256:$src1, VR128:$src2, i8imm:$src3), 7956 "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 7957 []>, Sched<[WriteFShuffle]>, VEX_4V, VEX_L; 7958let mayLoad = 1 in 7959def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst), 7960 (ins VR256:$src1, f128mem:$src2, i8imm:$src3), 7961 "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 7962 []>, Sched<[WriteFShuffleLd, ReadAfterLd]>, VEX_4V, VEX_L; 7963} 7964 7965let Predicates = [HasAVX] in { 7966def : Pat<(vinsert128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2), 7967 (iPTR imm)), 7968 (VINSERTF128rr VR256:$src1, VR128:$src2, 7969 (INSERT_get_vinsert128_imm VR256:$ins))>; 7970def : Pat<(vinsert128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2), 7971 (iPTR imm)), 7972 (VINSERTF128rr VR256:$src1, VR128:$src2, 7973 (INSERT_get_vinsert128_imm VR256:$ins))>; 7974 7975def : Pat<(vinsert128_insert:$ins (v8f32 VR256:$src1), (loadv4f32 addr:$src2), 7976 (iPTR imm)), 7977 (VINSERTF128rm VR256:$src1, addr:$src2, 7978 (INSERT_get_vinsert128_imm VR256:$ins))>; 7979def : Pat<(vinsert128_insert:$ins (v4f64 VR256:$src1), (loadv2f64 addr:$src2), 7980 (iPTR imm)), 7981 (VINSERTF128rm VR256:$src1, addr:$src2, 7982 (INSERT_get_vinsert128_imm VR256:$ins))>; 7983} 7984 7985let Predicates = [HasAVX1Only] in { 7986def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2), 7987 (iPTR imm)), 7988 (VINSERTF128rr VR256:$src1, VR128:$src2, 7989 (INSERT_get_vinsert128_imm VR256:$ins))>; 7990def : Pat<(vinsert128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2), 7991 (iPTR imm)), 7992 (VINSERTF128rr VR256:$src1, VR128:$src2, 7993 (INSERT_get_vinsert128_imm VR256:$ins))>; 7994def : Pat<(vinsert128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2), 7995 (iPTR imm)), 7996 (VINSERTF128rr VR256:$src1, VR128:$src2, 7997 (INSERT_get_vinsert128_imm VR256:$ins))>; 7998def : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2), 7999 (iPTR imm)), 8000 (VINSERTF128rr VR256:$src1, VR128:$src2, 8001 (INSERT_get_vinsert128_imm VR256:$ins))>; 8002 8003def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2), 8004 (iPTR imm)), 8005 (VINSERTF128rm VR256:$src1, addr:$src2, 8006 (INSERT_get_vinsert128_imm VR256:$ins))>; 8007def : Pat<(vinsert128_insert:$ins (v8i32 VR256:$src1), 8008 (bc_v4i32 (loadv2i64 addr:$src2)), 8009 (iPTR imm)), 8010 (VINSERTF128rm VR256:$src1, addr:$src2, 8011 (INSERT_get_vinsert128_imm VR256:$ins))>; 8012def : Pat<(vinsert128_insert:$ins (v32i8 VR256:$src1), 8013 (bc_v16i8 (loadv2i64 addr:$src2)), 8014 (iPTR imm)), 8015 (VINSERTF128rm VR256:$src1, addr:$src2, 8016 (INSERT_get_vinsert128_imm VR256:$ins))>; 8017def : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1), 8018 (bc_v8i16 (loadv2i64 addr:$src2)), 8019 (iPTR imm)), 8020 (VINSERTF128rm VR256:$src1, addr:$src2, 8021 (INSERT_get_vinsert128_imm VR256:$ins))>; 8022} 8023 8024//===----------------------------------------------------------------------===// 8025// VEXTRACTF128 - Extract packed floating-point values 8026// 8027let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in { 8028def VEXTRACTF128rr : AVXAIi8<0x19, MRMDestReg, (outs VR128:$dst), 8029 (ins VR256:$src1, i8imm:$src2), 8030 "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}", 8031 []>, Sched<[WriteFShuffle]>, VEX, VEX_L; 8032let mayStore = 1 in 8033def VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs), 8034 (ins f128mem:$dst, VR256:$src1, i8imm:$src2), 8035 "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}", 8036 []>, Sched<[WriteStore]>, VEX, VEX_L; 8037} 8038 8039// AVX1 patterns 8040let Predicates = [HasAVX] in { 8041def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)), 8042 (v4f32 (VEXTRACTF128rr 8043 (v8f32 VR256:$src1), 8044 (EXTRACT_get_vextract128_imm VR128:$ext)))>; 8045def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)), 8046 (v2f64 (VEXTRACTF128rr 8047 (v4f64 VR256:$src1), 8048 (EXTRACT_get_vextract128_imm VR128:$ext)))>; 8049 8050def : Pat<(store (v4f32 (vextract128_extract:$ext (v8f32 VR256:$src1), 8051 (iPTR imm))), addr:$dst), 8052 (VEXTRACTF128mr addr:$dst, VR256:$src1, 8053 (EXTRACT_get_vextract128_imm VR128:$ext))>; 8054def : Pat<(store (v2f64 (vextract128_extract:$ext (v4f64 VR256:$src1), 8055 (iPTR imm))), addr:$dst), 8056 (VEXTRACTF128mr addr:$dst, VR256:$src1, 8057 (EXTRACT_get_vextract128_imm VR128:$ext))>; 8058} 8059 8060let Predicates = [HasAVX1Only] in { 8061def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)), 8062 (v2i64 (VEXTRACTF128rr 8063 (v4i64 VR256:$src1), 8064 (EXTRACT_get_vextract128_imm VR128:$ext)))>; 8065def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)), 8066 (v4i32 (VEXTRACTF128rr 8067 (v8i32 VR256:$src1), 8068 (EXTRACT_get_vextract128_imm VR128:$ext)))>; 8069def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)), 8070 (v8i16 (VEXTRACTF128rr 8071 (v16i16 VR256:$src1), 8072 (EXTRACT_get_vextract128_imm VR128:$ext)))>; 8073def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)), 8074 (v16i8 (VEXTRACTF128rr 8075 (v32i8 VR256:$src1), 8076 (EXTRACT_get_vextract128_imm VR128:$ext)))>; 8077 8078def : Pat<(alignedstore (v2i64 (vextract128_extract:$ext (v4i64 VR256:$src1), 8079 (iPTR imm))), addr:$dst), 8080 (VEXTRACTF128mr addr:$dst, VR256:$src1, 8081 (EXTRACT_get_vextract128_imm VR128:$ext))>; 8082def : Pat<(alignedstore (v4i32 (vextract128_extract:$ext (v8i32 VR256:$src1), 8083 (iPTR imm))), addr:$dst), 8084 (VEXTRACTF128mr addr:$dst, VR256:$src1, 8085 (EXTRACT_get_vextract128_imm VR128:$ext))>; 8086def : Pat<(alignedstore (v8i16 (vextract128_extract:$ext (v16i16 VR256:$src1), 8087 (iPTR imm))), addr:$dst), 8088 (VEXTRACTF128mr addr:$dst, VR256:$src1, 8089 (EXTRACT_get_vextract128_imm VR128:$ext))>; 8090def : Pat<(alignedstore (v16i8 (vextract128_extract:$ext (v32i8 VR256:$src1), 8091 (iPTR imm))), addr:$dst), 8092 (VEXTRACTF128mr addr:$dst, VR256:$src1, 8093 (EXTRACT_get_vextract128_imm VR128:$ext))>; 8094} 8095 8096//===----------------------------------------------------------------------===// 8097// VMASKMOV - Conditional SIMD Packed Loads and Stores 8098// 8099multiclass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr, 8100 Intrinsic IntLd, Intrinsic IntLd256, 8101 Intrinsic IntSt, Intrinsic IntSt256> { 8102 def rm : AVX8I<opc_rm, MRMSrcMem, (outs VR128:$dst), 8103 (ins VR128:$src1, f128mem:$src2), 8104 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8105 [(set VR128:$dst, (IntLd addr:$src2, VR128:$src1))]>, 8106 VEX_4V; 8107 def Yrm : AVX8I<opc_rm, MRMSrcMem, (outs VR256:$dst), 8108 (ins VR256:$src1, f256mem:$src2), 8109 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8110 [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>, 8111 VEX_4V, VEX_L; 8112 def mr : AVX8I<opc_mr, MRMDestMem, (outs), 8113 (ins f128mem:$dst, VR128:$src1, VR128:$src2), 8114 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8115 [(IntSt addr:$dst, VR128:$src1, VR128:$src2)]>, VEX_4V; 8116 def Ymr : AVX8I<opc_mr, MRMDestMem, (outs), 8117 (ins f256mem:$dst, VR256:$src1, VR256:$src2), 8118 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8119 [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V, VEX_L; 8120} 8121 8122let ExeDomain = SSEPackedSingle in 8123defm VMASKMOVPS : avx_movmask_rm<0x2C, 0x2E, "vmaskmovps", 8124 int_x86_avx_maskload_ps, 8125 int_x86_avx_maskload_ps_256, 8126 int_x86_avx_maskstore_ps, 8127 int_x86_avx_maskstore_ps_256>; 8128let ExeDomain = SSEPackedDouble in 8129defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd", 8130 int_x86_avx_maskload_pd, 8131 int_x86_avx_maskload_pd_256, 8132 int_x86_avx_maskstore_pd, 8133 int_x86_avx_maskstore_pd_256>; 8134 8135//===----------------------------------------------------------------------===// 8136// VPERMIL - Permute Single and Double Floating-Point Values 8137// 8138multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr, 8139 RegisterClass RC, X86MemOperand x86memop_f, 8140 X86MemOperand x86memop_i, PatFrag i_frag, 8141 Intrinsic IntVar, ValueType vt> { 8142 def rr : AVX8I<opc_rm, MRMSrcReg, (outs RC:$dst), 8143 (ins RC:$src1, RC:$src2), 8144 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8145 [(set RC:$dst, (IntVar RC:$src1, RC:$src2))]>, VEX_4V, 8146 Sched<[WriteFShuffle]>; 8147 def rm : AVX8I<opc_rm, MRMSrcMem, (outs RC:$dst), 8148 (ins RC:$src1, x86memop_i:$src2), 8149 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8150 [(set RC:$dst, (IntVar RC:$src1, 8151 (bitconvert (i_frag addr:$src2))))]>, VEX_4V, 8152 Sched<[WriteFShuffleLd, ReadAfterLd]>; 8153 8154 def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst), 8155 (ins RC:$src1, i8imm:$src2), 8156 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8157 [(set RC:$dst, (vt (X86VPermilp RC:$src1, (i8 imm:$src2))))]>, VEX, 8158 Sched<[WriteFShuffle]>; 8159 def mi : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst), 8160 (ins x86memop_f:$src1, i8imm:$src2), 8161 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8162 [(set RC:$dst, 8163 (vt (X86VPermilp (memop addr:$src1), (i8 imm:$src2))))]>, VEX, 8164 Sched<[WriteFShuffleLd]>; 8165} 8166 8167let ExeDomain = SSEPackedSingle in { 8168 defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem, 8169 loadv2i64, int_x86_avx_vpermilvar_ps, v4f32>; 8170 defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem, 8171 loadv4i64, int_x86_avx_vpermilvar_ps_256, v8f32>, VEX_L; 8172} 8173let ExeDomain = SSEPackedDouble in { 8174 defm VPERMILPD : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem, 8175 loadv2i64, int_x86_avx_vpermilvar_pd, v2f64>; 8176 defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem, 8177 loadv4i64, int_x86_avx_vpermilvar_pd_256, v4f64>, VEX_L; 8178} 8179 8180let Predicates = [HasAVX] in { 8181def : Pat<(v8i32 (X86VPermilp VR256:$src1, (i8 imm:$imm))), 8182 (VPERMILPSYri VR256:$src1, imm:$imm)>; 8183def : Pat<(v4i64 (X86VPermilp VR256:$src1, (i8 imm:$imm))), 8184 (VPERMILPDYri VR256:$src1, imm:$imm)>; 8185def : Pat<(v8i32 (X86VPermilp (bc_v8i32 (loadv4i64 addr:$src1)), 8186 (i8 imm:$imm))), 8187 (VPERMILPSYmi addr:$src1, imm:$imm)>; 8188def : Pat<(v4i64 (X86VPermilp (loadv4i64 addr:$src1), (i8 imm:$imm))), 8189 (VPERMILPDYmi addr:$src1, imm:$imm)>; 8190 8191def : Pat<(v2i64 (X86VPermilp VR128:$src1, (i8 imm:$imm))), 8192 (VPERMILPDri VR128:$src1, imm:$imm)>; 8193def : Pat<(v2i64 (X86VPermilp (loadv2i64 addr:$src1), (i8 imm:$imm))), 8194 (VPERMILPDmi addr:$src1, imm:$imm)>; 8195} 8196 8197//===----------------------------------------------------------------------===// 8198// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks 8199// 8200let ExeDomain = SSEPackedSingle in { 8201def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst), 8202 (ins VR256:$src1, VR256:$src2, i8imm:$src3), 8203 "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 8204 [(set VR256:$dst, (v8f32 (X86VPerm2x128 VR256:$src1, VR256:$src2, 8205 (i8 imm:$src3))))]>, VEX_4V, VEX_L, 8206 Sched<[WriteFShuffle]>; 8207def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst), 8208 (ins VR256:$src1, f256mem:$src2, i8imm:$src3), 8209 "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 8210 [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (loadv8f32 addr:$src2), 8211 (i8 imm:$src3)))]>, VEX_4V, VEX_L, 8212 Sched<[WriteFShuffleLd, ReadAfterLd]>; 8213} 8214 8215let Predicates = [HasAVX] in { 8216def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), 8217 (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; 8218def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1, 8219 (loadv4f64 addr:$src2), (i8 imm:$imm))), 8220 (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; 8221} 8222 8223let Predicates = [HasAVX1Only] in { 8224def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), 8225 (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; 8226def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), 8227 (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; 8228def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), 8229 (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; 8230def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), 8231 (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; 8232 8233def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, 8234 (bc_v8i32 (loadv4i64 addr:$src2)), (i8 imm:$imm))), 8235 (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; 8236def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, 8237 (loadv4i64 addr:$src2), (i8 imm:$imm))), 8238 (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; 8239def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, 8240 (bc_v32i8 (loadv4i64 addr:$src2)), (i8 imm:$imm))), 8241 (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; 8242def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, 8243 (bc_v16i16 (loadv4i64 addr:$src2)), (i8 imm:$imm))), 8244 (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; 8245} 8246 8247//===----------------------------------------------------------------------===// 8248// VZERO - Zero YMM registers 8249// 8250let Defs = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, 8251 YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15] in { 8252 // Zero All YMM registers 8253 def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall", 8254 [(int_x86_avx_vzeroall)]>, PS, VEX, VEX_L, Requires<[HasAVX]>; 8255 8256 // Zero Upper bits of YMM registers 8257 def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper", 8258 [(int_x86_avx_vzeroupper)]>, PS, VEX, Requires<[HasAVX]>; 8259} 8260 8261//===----------------------------------------------------------------------===// 8262// Half precision conversion instructions 8263//===----------------------------------------------------------------------===// 8264multiclass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> { 8265 def rr : I<0x13, MRMSrcReg, (outs RC:$dst), (ins VR128:$src), 8266 "vcvtph2ps\t{$src, $dst|$dst, $src}", 8267 [(set RC:$dst, (Int VR128:$src))]>, 8268 T8PD, VEX, Sched<[WriteCvtF2F]>; 8269 let neverHasSideEffects = 1, mayLoad = 1 in 8270 def rm : I<0x13, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 8271 "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8PD, VEX, 8272 Sched<[WriteCvtF2FLd]>; 8273} 8274 8275multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> { 8276 def rr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst), 8277 (ins RC:$src1, i32i8imm:$src2), 8278 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", 8279 [(set VR128:$dst, (Int RC:$src1, imm:$src2))]>, 8280 TAPD, VEX, Sched<[WriteCvtF2F]>; 8281 let neverHasSideEffects = 1, mayStore = 1, 8282 SchedRW = [WriteCvtF2FLd, WriteRMW] in 8283 def mr : Ii8<0x1D, MRMDestMem, (outs), 8284 (ins x86memop:$dst, RC:$src1, i32i8imm:$src2), 8285 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 8286 TAPD, VEX; 8287} 8288 8289let Predicates = [HasF16C] in { 8290 defm VCVTPH2PS : f16c_ph2ps<VR128, f64mem, int_x86_vcvtph2ps_128>; 8291 defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem, int_x86_vcvtph2ps_256>, VEX_L; 8292 defm VCVTPS2PH : f16c_ps2ph<VR128, f64mem, int_x86_vcvtps2ph_128>; 8293 defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem, int_x86_vcvtps2ph_256>, VEX_L; 8294} 8295 8296//===----------------------------------------------------------------------===// 8297// AVX2 Instructions 8298//===----------------------------------------------------------------------===// 8299 8300/// AVX2_binop_rmi_int - AVX2 binary operator with 8-bit immediate 8301multiclass AVX2_binop_rmi_int<bits<8> opc, string OpcodeStr, 8302 Intrinsic IntId, RegisterClass RC, PatFrag memop_frag, 8303 X86MemOperand x86memop> { 8304 let isCommutable = 1 in 8305 def rri : AVX2AIi8<opc, MRMSrcReg, (outs RC:$dst), 8306 (ins RC:$src1, RC:$src2, u32u8imm:$src3), 8307 !strconcat(OpcodeStr, 8308 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 8309 [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>, 8310 Sched<[WriteBlend]>, VEX_4V; 8311 def rmi : AVX2AIi8<opc, MRMSrcMem, (outs RC:$dst), 8312 (ins RC:$src1, x86memop:$src2, u32u8imm:$src3), 8313 !strconcat(OpcodeStr, 8314 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 8315 [(set RC:$dst, 8316 (IntId RC:$src1, 8317 (bitconvert (memop_frag addr:$src2)), imm:$src3))]>, 8318 Sched<[WriteBlendLd, ReadAfterLd]>, VEX_4V; 8319} 8320 8321let isCommutable = 0 in { 8322defm VPBLENDD : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_128, 8323 VR128, loadv2i64, i128mem>; 8324defm VPBLENDDY : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_256, 8325 VR256, loadv4i64, i256mem>, VEX_L; 8326} 8327 8328def : Pat<(v4i32 (X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), 8329 imm:$mask)), 8330 (VPBLENDDrri VR128:$src1, VR128:$src2, imm:$mask)>; 8331def : Pat<(v8i32 (X86Blendi (v8i32 VR256:$src1), (v8i32 VR256:$src2), 8332 imm:$mask)), 8333 (VPBLENDDYrri VR256:$src1, VR256:$src2, imm:$mask)>; 8334 8335//===----------------------------------------------------------------------===// 8336// VPBROADCAST - Load from memory and broadcast to all elements of the 8337// destination operand 8338// 8339multiclass avx2_broadcast<bits<8> opc, string OpcodeStr, 8340 X86MemOperand x86memop, PatFrag ld_frag, 8341 Intrinsic Int128, Intrinsic Int256> { 8342 def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 8343 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 8344 [(set VR128:$dst, (Int128 VR128:$src))]>, 8345 Sched<[WriteShuffle]>, VEX; 8346 def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src), 8347 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 8348 [(set VR128:$dst, 8349 (Int128 (scalar_to_vector (ld_frag addr:$src))))]>, 8350 Sched<[WriteLoad]>, VEX; 8351 def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), 8352 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 8353 [(set VR256:$dst, (Int256 VR128:$src))]>, 8354 Sched<[WriteShuffle256]>, VEX, VEX_L; 8355 def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), (ins x86memop:$src), 8356 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 8357 [(set VR256:$dst, 8358 (Int256 (scalar_to_vector (ld_frag addr:$src))))]>, 8359 Sched<[WriteLoad]>, VEX, VEX_L; 8360} 8361 8362defm VPBROADCASTB : avx2_broadcast<0x78, "vpbroadcastb", i8mem, loadi8, 8363 int_x86_avx2_pbroadcastb_128, 8364 int_x86_avx2_pbroadcastb_256>; 8365defm VPBROADCASTW : avx2_broadcast<0x79, "vpbroadcastw", i16mem, loadi16, 8366 int_x86_avx2_pbroadcastw_128, 8367 int_x86_avx2_pbroadcastw_256>; 8368defm VPBROADCASTD : avx2_broadcast<0x58, "vpbroadcastd", i32mem, loadi32, 8369 int_x86_avx2_pbroadcastd_128, 8370 int_x86_avx2_pbroadcastd_256>; 8371defm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64, 8372 int_x86_avx2_pbroadcastq_128, 8373 int_x86_avx2_pbroadcastq_256>; 8374 8375let Predicates = [HasAVX2] in { 8376 def : Pat<(v16i8 (X86VBroadcast (loadi8 addr:$src))), 8377 (VPBROADCASTBrm addr:$src)>; 8378 def : Pat<(v32i8 (X86VBroadcast (loadi8 addr:$src))), 8379 (VPBROADCASTBYrm addr:$src)>; 8380 def : Pat<(v8i16 (X86VBroadcast (loadi16 addr:$src))), 8381 (VPBROADCASTWrm addr:$src)>; 8382 def : Pat<(v16i16 (X86VBroadcast (loadi16 addr:$src))), 8383 (VPBROADCASTWYrm addr:$src)>; 8384 def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), 8385 (VPBROADCASTDrm addr:$src)>; 8386 def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))), 8387 (VPBROADCASTDYrm addr:$src)>; 8388 def : Pat<(v2i64 (X86VBroadcast (loadi64 addr:$src))), 8389 (VPBROADCASTQrm addr:$src)>; 8390 def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))), 8391 (VPBROADCASTQYrm addr:$src)>; 8392 8393 def : Pat<(v16i8 (X86VBroadcast (v16i8 VR128:$src))), 8394 (VPBROADCASTBrr VR128:$src)>; 8395 def : Pat<(v32i8 (X86VBroadcast (v16i8 VR128:$src))), 8396 (VPBROADCASTBYrr VR128:$src)>; 8397 def : Pat<(v8i16 (X86VBroadcast (v8i16 VR128:$src))), 8398 (VPBROADCASTWrr VR128:$src)>; 8399 def : Pat<(v16i16 (X86VBroadcast (v8i16 VR128:$src))), 8400 (VPBROADCASTWYrr VR128:$src)>; 8401 def : Pat<(v4i32 (X86VBroadcast (v4i32 VR128:$src))), 8402 (VPBROADCASTDrr VR128:$src)>; 8403 def : Pat<(v8i32 (X86VBroadcast (v4i32 VR128:$src))), 8404 (VPBROADCASTDYrr VR128:$src)>; 8405 def : Pat<(v2i64 (X86VBroadcast (v2i64 VR128:$src))), 8406 (VPBROADCASTQrr VR128:$src)>; 8407 def : Pat<(v4i64 (X86VBroadcast (v2i64 VR128:$src))), 8408 (VPBROADCASTQYrr VR128:$src)>; 8409 def : Pat<(v4f32 (X86VBroadcast (v4f32 VR128:$src))), 8410 (VBROADCASTSSrr VR128:$src)>; 8411 def : Pat<(v8f32 (X86VBroadcast (v4f32 VR128:$src))), 8412 (VBROADCASTSSYrr VR128:$src)>; 8413 def : Pat<(v2f64 (X86VBroadcast (v2f64 VR128:$src))), 8414 (VPBROADCASTQrr VR128:$src)>; 8415 def : Pat<(v4f64 (X86VBroadcast (v2f64 VR128:$src))), 8416 (VBROADCASTSDYrr VR128:$src)>; 8417 8418 // Provide fallback in case the load node that is used in the patterns above 8419 // is used by additional users, which prevents the pattern selection. 8420 let AddedComplexity = 20 in { 8421 def : Pat<(v4f32 (X86VBroadcast FR32:$src)), 8422 (VBROADCASTSSrr (COPY_TO_REGCLASS FR32:$src, VR128))>; 8423 def : Pat<(v8f32 (X86VBroadcast FR32:$src)), 8424 (VBROADCASTSSYrr (COPY_TO_REGCLASS FR32:$src, VR128))>; 8425 def : Pat<(v4f64 (X86VBroadcast FR64:$src)), 8426 (VBROADCASTSDYrr (COPY_TO_REGCLASS FR64:$src, VR128))>; 8427 8428 def : Pat<(v4i32 (X86VBroadcast GR32:$src)), 8429 (VBROADCASTSSrr (COPY_TO_REGCLASS GR32:$src, VR128))>; 8430 def : Pat<(v8i32 (X86VBroadcast GR32:$src)), 8431 (VBROADCASTSSYrr (COPY_TO_REGCLASS GR32:$src, VR128))>; 8432 def : Pat<(v4i64 (X86VBroadcast GR64:$src)), 8433 (VBROADCASTSDYrr (COPY_TO_REGCLASS GR64:$src, VR128))>; 8434 8435 def : Pat<(v16i8 (X86VBroadcast GR8:$src)), 8436 (VPBROADCASTBrr (COPY_TO_REGCLASS 8437 (i32 (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit)), 8438 VR128))>; 8439 def : Pat<(v32i8 (X86VBroadcast GR8:$src)), 8440 (VPBROADCASTBYrr (COPY_TO_REGCLASS 8441 (i32 (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit)), 8442 VR128))>; 8443 8444 def : Pat<(v8i16 (X86VBroadcast GR16:$src)), 8445 (VPBROADCASTWrr (COPY_TO_REGCLASS 8446 (i32 (SUBREG_TO_REG (i32 0), GR16:$src, sub_16bit)), 8447 VR128))>; 8448 def : Pat<(v16i16 (X86VBroadcast GR16:$src)), 8449 (VPBROADCASTWYrr (COPY_TO_REGCLASS 8450 (i32 (SUBREG_TO_REG (i32 0), GR16:$src, sub_16bit)), 8451 VR128))>; 8452 8453 // The patterns for VPBROADCASTD are not needed because they would match 8454 // the exact same thing as VBROADCASTSS patterns. 8455 8456 def : Pat<(v2i64 (X86VBroadcast GR64:$src)), 8457 (VPBROADCASTQrr (COPY_TO_REGCLASS GR64:$src, VR128))>; 8458 // The v4i64 pattern is not needed because VBROADCASTSDYrr already match. 8459 } 8460} 8461 8462// AVX1 broadcast patterns 8463let Predicates = [HasAVX1Only] in { 8464def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))), 8465 (VBROADCASTSSYrm addr:$src)>; 8466def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))), 8467 (VBROADCASTSDYrm addr:$src)>; 8468def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), 8469 (VBROADCASTSSrm addr:$src)>; 8470} 8471 8472let Predicates = [HasAVX] in { 8473def : Pat<(v8f32 (X86VBroadcast (loadf32 addr:$src))), 8474 (VBROADCASTSSYrm addr:$src)>; 8475def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))), 8476 (VBROADCASTSDYrm addr:$src)>; 8477def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))), 8478 (VBROADCASTSSrm addr:$src)>; 8479 8480 // Provide fallback in case the load node that is used in the patterns above 8481 // is used by additional users, which prevents the pattern selection. 8482 let AddedComplexity = 20 in { 8483 // 128bit broadcasts: 8484 def : Pat<(v4f32 (X86VBroadcast FR32:$src)), 8485 (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0)>; 8486 def : Pat<(v8f32 (X86VBroadcast FR32:$src)), 8487 (VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), 8488 (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0), sub_xmm), 8489 (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0), 1)>; 8490 def : Pat<(v4f64 (X86VBroadcast FR64:$src)), 8491 (VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), 8492 (VPSHUFDri (COPY_TO_REGCLASS FR64:$src, VR128), 0x44), sub_xmm), 8493 (VPSHUFDri (COPY_TO_REGCLASS FR64:$src, VR128), 0x44), 1)>; 8494 8495 def : Pat<(v4i32 (X86VBroadcast GR32:$src)), 8496 (VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0)>; 8497 def : Pat<(v8i32 (X86VBroadcast GR32:$src)), 8498 (VINSERTF128rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 8499 (VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0), sub_xmm), 8500 (VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0), 1)>; 8501 def : Pat<(v4i64 (X86VBroadcast GR64:$src)), 8502 (VINSERTF128rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), 8503 (VPSHUFDri (COPY_TO_REGCLASS GR64:$src, VR128), 0x44), sub_xmm), 8504 (VPSHUFDri (COPY_TO_REGCLASS GR64:$src, VR128), 0x44), 1)>; 8505 } 8506} 8507 8508//===----------------------------------------------------------------------===// 8509// VPERM - Permute instructions 8510// 8511 8512multiclass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag, 8513 ValueType OpVT> { 8514 def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), 8515 (ins VR256:$src1, VR256:$src2), 8516 !strconcat(OpcodeStr, 8517 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8518 [(set VR256:$dst, 8519 (OpVT (X86VPermv VR256:$src1, VR256:$src2)))]>, 8520 Sched<[WriteFShuffle256]>, VEX_4V, VEX_L; 8521 def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), 8522 (ins VR256:$src1, i256mem:$src2), 8523 !strconcat(OpcodeStr, 8524 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8525 [(set VR256:$dst, 8526 (OpVT (X86VPermv VR256:$src1, 8527 (bitconvert (mem_frag addr:$src2)))))]>, 8528 Sched<[WriteFShuffle256Ld, ReadAfterLd]>, VEX_4V, VEX_L; 8529} 8530 8531defm VPERMD : avx2_perm<0x36, "vpermd", loadv4i64, v8i32>; 8532let ExeDomain = SSEPackedSingle in 8533defm VPERMPS : avx2_perm<0x16, "vpermps", loadv8f32, v8f32>; 8534 8535multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag, 8536 ValueType OpVT> { 8537 def Yri : AVX2AIi8<opc, MRMSrcReg, (outs VR256:$dst), 8538 (ins VR256:$src1, i8imm:$src2), 8539 !strconcat(OpcodeStr, 8540 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8541 [(set VR256:$dst, 8542 (OpVT (X86VPermi VR256:$src1, (i8 imm:$src2))))]>, 8543 Sched<[WriteShuffle256]>, VEX, VEX_L; 8544 def Ymi : AVX2AIi8<opc, MRMSrcMem, (outs VR256:$dst), 8545 (ins i256mem:$src1, i8imm:$src2), 8546 !strconcat(OpcodeStr, 8547 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8548 [(set VR256:$dst, 8549 (OpVT (X86VPermi (mem_frag addr:$src1), 8550 (i8 imm:$src2))))]>, 8551 Sched<[WriteShuffle256Ld, ReadAfterLd]>, VEX, VEX_L; 8552} 8553 8554defm VPERMQ : avx2_perm_imm<0x00, "vpermq", loadv4i64, v4i64>, VEX_W; 8555let ExeDomain = SSEPackedDouble in 8556defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", loadv4f64, v4f64>, VEX_W; 8557 8558//===----------------------------------------------------------------------===// 8559// VPERM2I128 - Permute Floating-Point Values in 128-bit chunks 8560// 8561def VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst), 8562 (ins VR256:$src1, VR256:$src2, i8imm:$src3), 8563 "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 8564 [(set VR256:$dst, (v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, 8565 (i8 imm:$src3))))]>, Sched<[WriteShuffle256]>, 8566 VEX_4V, VEX_L; 8567def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst), 8568 (ins VR256:$src1, f256mem:$src2, i8imm:$src3), 8569 "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 8570 [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (loadv4i64 addr:$src2), 8571 (i8 imm:$src3)))]>, 8572 Sched<[WriteShuffle256Ld, ReadAfterLd]>, VEX_4V, VEX_L; 8573 8574let Predicates = [HasAVX2] in { 8575def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), 8576 (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>; 8577def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), 8578 (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>; 8579def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), 8580 (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>; 8581 8582def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, (bc_v32i8 (loadv4i64 addr:$src2)), 8583 (i8 imm:$imm))), 8584 (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>; 8585def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, 8586 (bc_v16i16 (loadv4i64 addr:$src2)), (i8 imm:$imm))), 8587 (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>; 8588def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, (bc_v8i32 (loadv4i64 addr:$src2)), 8589 (i8 imm:$imm))), 8590 (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>; 8591} 8592 8593 8594//===----------------------------------------------------------------------===// 8595// VINSERTI128 - Insert packed integer values 8596// 8597let neverHasSideEffects = 1 in { 8598def VINSERTI128rr : AVX2AIi8<0x38, MRMSrcReg, (outs VR256:$dst), 8599 (ins VR256:$src1, VR128:$src2, i8imm:$src3), 8600 "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 8601 []>, Sched<[WriteShuffle256]>, VEX_4V, VEX_L; 8602let mayLoad = 1 in 8603def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst), 8604 (ins VR256:$src1, i128mem:$src2, i8imm:$src3), 8605 "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 8606 []>, Sched<[WriteShuffle256Ld, ReadAfterLd]>, VEX_4V, VEX_L; 8607} 8608 8609let Predicates = [HasAVX2] in { 8610def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2), 8611 (iPTR imm)), 8612 (VINSERTI128rr VR256:$src1, VR128:$src2, 8613 (INSERT_get_vinsert128_imm VR256:$ins))>; 8614def : Pat<(vinsert128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2), 8615 (iPTR imm)), 8616 (VINSERTI128rr VR256:$src1, VR128:$src2, 8617 (INSERT_get_vinsert128_imm VR256:$ins))>; 8618def : Pat<(vinsert128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2), 8619 (iPTR imm)), 8620 (VINSERTI128rr VR256:$src1, VR128:$src2, 8621 (INSERT_get_vinsert128_imm VR256:$ins))>; 8622def : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2), 8623 (iPTR imm)), 8624 (VINSERTI128rr VR256:$src1, VR128:$src2, 8625 (INSERT_get_vinsert128_imm VR256:$ins))>; 8626 8627def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2), 8628 (iPTR imm)), 8629 (VINSERTI128rm VR256:$src1, addr:$src2, 8630 (INSERT_get_vinsert128_imm VR256:$ins))>; 8631def : Pat<(vinsert128_insert:$ins (v8i32 VR256:$src1), 8632 (bc_v4i32 (loadv2i64 addr:$src2)), 8633 (iPTR imm)), 8634 (VINSERTI128rm VR256:$src1, addr:$src2, 8635 (INSERT_get_vinsert128_imm VR256:$ins))>; 8636def : Pat<(vinsert128_insert:$ins (v32i8 VR256:$src1), 8637 (bc_v16i8 (loadv2i64 addr:$src2)), 8638 (iPTR imm)), 8639 (VINSERTI128rm VR256:$src1, addr:$src2, 8640 (INSERT_get_vinsert128_imm VR256:$ins))>; 8641def : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1), 8642 (bc_v8i16 (loadv2i64 addr:$src2)), 8643 (iPTR imm)), 8644 (VINSERTI128rm VR256:$src1, addr:$src2, 8645 (INSERT_get_vinsert128_imm VR256:$ins))>; 8646} 8647 8648//===----------------------------------------------------------------------===// 8649// VEXTRACTI128 - Extract packed integer values 8650// 8651def VEXTRACTI128rr : AVX2AIi8<0x39, MRMDestReg, (outs VR128:$dst), 8652 (ins VR256:$src1, i8imm:$src2), 8653 "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", 8654 [(set VR128:$dst, 8655 (int_x86_avx2_vextracti128 VR256:$src1, imm:$src2))]>, 8656 Sched<[WriteShuffle256]>, VEX, VEX_L; 8657let neverHasSideEffects = 1, mayStore = 1 in 8658def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs), 8659 (ins i128mem:$dst, VR256:$src1, i8imm:$src2), 8660 "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 8661 Sched<[WriteStore]>, VEX, VEX_L; 8662 8663let Predicates = [HasAVX2] in { 8664def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)), 8665 (v2i64 (VEXTRACTI128rr 8666 (v4i64 VR256:$src1), 8667 (EXTRACT_get_vextract128_imm VR128:$ext)))>; 8668def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)), 8669 (v4i32 (VEXTRACTI128rr 8670 (v8i32 VR256:$src1), 8671 (EXTRACT_get_vextract128_imm VR128:$ext)))>; 8672def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)), 8673 (v8i16 (VEXTRACTI128rr 8674 (v16i16 VR256:$src1), 8675 (EXTRACT_get_vextract128_imm VR128:$ext)))>; 8676def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)), 8677 (v16i8 (VEXTRACTI128rr 8678 (v32i8 VR256:$src1), 8679 (EXTRACT_get_vextract128_imm VR128:$ext)))>; 8680 8681def : Pat<(store (v2i64 (vextract128_extract:$ext (v4i64 VR256:$src1), 8682 (iPTR imm))), addr:$dst), 8683 (VEXTRACTI128mr addr:$dst, VR256:$src1, 8684 (EXTRACT_get_vextract128_imm VR128:$ext))>; 8685def : Pat<(store (v4i32 (vextract128_extract:$ext (v8i32 VR256:$src1), 8686 (iPTR imm))), addr:$dst), 8687 (VEXTRACTI128mr addr:$dst, VR256:$src1, 8688 (EXTRACT_get_vextract128_imm VR128:$ext))>; 8689def : Pat<(store (v8i16 (vextract128_extract:$ext (v16i16 VR256:$src1), 8690 (iPTR imm))), addr:$dst), 8691 (VEXTRACTI128mr addr:$dst, VR256:$src1, 8692 (EXTRACT_get_vextract128_imm VR128:$ext))>; 8693def : Pat<(store (v16i8 (vextract128_extract:$ext (v32i8 VR256:$src1), 8694 (iPTR imm))), addr:$dst), 8695 (VEXTRACTI128mr addr:$dst, VR256:$src1, 8696 (EXTRACT_get_vextract128_imm VR128:$ext))>; 8697} 8698 8699//===----------------------------------------------------------------------===// 8700// VPMASKMOV - Conditional SIMD Integer Packed Loads and Stores 8701// 8702multiclass avx2_pmovmask<string OpcodeStr, 8703 Intrinsic IntLd128, Intrinsic IntLd256, 8704 Intrinsic IntSt128, Intrinsic IntSt256> { 8705 def rm : AVX28I<0x8c, MRMSrcMem, (outs VR128:$dst), 8706 (ins VR128:$src1, i128mem:$src2), 8707 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8708 [(set VR128:$dst, (IntLd128 addr:$src2, VR128:$src1))]>, VEX_4V; 8709 def Yrm : AVX28I<0x8c, MRMSrcMem, (outs VR256:$dst), 8710 (ins VR256:$src1, i256mem:$src2), 8711 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8712 [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>, 8713 VEX_4V, VEX_L; 8714 def mr : AVX28I<0x8e, MRMDestMem, (outs), 8715 (ins i128mem:$dst, VR128:$src1, VR128:$src2), 8716 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8717 [(IntSt128 addr:$dst, VR128:$src1, VR128:$src2)]>, VEX_4V; 8718 def Ymr : AVX28I<0x8e, MRMDestMem, (outs), 8719 (ins i256mem:$dst, VR256:$src1, VR256:$src2), 8720 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8721 [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V, VEX_L; 8722} 8723 8724defm VPMASKMOVD : avx2_pmovmask<"vpmaskmovd", 8725 int_x86_avx2_maskload_d, 8726 int_x86_avx2_maskload_d_256, 8727 int_x86_avx2_maskstore_d, 8728 int_x86_avx2_maskstore_d_256>; 8729defm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq", 8730 int_x86_avx2_maskload_q, 8731 int_x86_avx2_maskload_q_256, 8732 int_x86_avx2_maskstore_q, 8733 int_x86_avx2_maskstore_q_256>, VEX_W; 8734 8735 8736//===----------------------------------------------------------------------===// 8737// Variable Bit Shifts 8738// 8739multiclass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode, 8740 ValueType vt128, ValueType vt256> { 8741 def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), 8742 (ins VR128:$src1, VR128:$src2), 8743 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8744 [(set VR128:$dst, 8745 (vt128 (OpNode VR128:$src1, (vt128 VR128:$src2))))]>, 8746 VEX_4V, Sched<[WriteVarVecShift]>; 8747 def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), 8748 (ins VR128:$src1, i128mem:$src2), 8749 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8750 [(set VR128:$dst, 8751 (vt128 (OpNode VR128:$src1, 8752 (vt128 (bitconvert (loadv2i64 addr:$src2))))))]>, 8753 VEX_4V, Sched<[WriteVarVecShiftLd, ReadAfterLd]>; 8754 def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), 8755 (ins VR256:$src1, VR256:$src2), 8756 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8757 [(set VR256:$dst, 8758 (vt256 (OpNode VR256:$src1, (vt256 VR256:$src2))))]>, 8759 VEX_4V, VEX_L, Sched<[WriteVarVecShift]>; 8760 def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), 8761 (ins VR256:$src1, i256mem:$src2), 8762 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8763 [(set VR256:$dst, 8764 (vt256 (OpNode VR256:$src1, 8765 (vt256 (bitconvert (loadv4i64 addr:$src2))))))]>, 8766 VEX_4V, VEX_L, Sched<[WriteVarVecShiftLd, ReadAfterLd]>; 8767} 8768 8769defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", shl, v4i32, v8i32>; 8770defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", shl, v2i64, v4i64>, VEX_W; 8771defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", srl, v4i32, v8i32>; 8772defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", srl, v2i64, v4i64>, VEX_W; 8773defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, v4i32, v8i32>; 8774 8775//===----------------------------------------------------------------------===// 8776// VGATHER - GATHER Operations 8777multiclass avx2_gather<bits<8> opc, string OpcodeStr, RegisterClass RC256, 8778 X86MemOperand memop128, X86MemOperand memop256> { 8779 def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst, VR128:$mask_wb), 8780 (ins VR128:$src1, memop128:$src2, VR128:$mask), 8781 !strconcat(OpcodeStr, 8782 "\t{$mask, $src2, $dst|$dst, $src2, $mask}"), 8783 []>, VEX_4VOp3; 8784 def Yrm : AVX28I<opc, MRMSrcMem, (outs RC256:$dst, RC256:$mask_wb), 8785 (ins RC256:$src1, memop256:$src2, RC256:$mask), 8786 !strconcat(OpcodeStr, 8787 "\t{$mask, $src2, $dst|$dst, $src2, $mask}"), 8788 []>, VEX_4VOp3, VEX_L; 8789} 8790 8791let mayLoad = 1, Constraints 8792 = "@earlyclobber $dst,@earlyclobber $mask_wb, $src1 = $dst, $mask = $mask_wb" 8793 in { 8794 defm VPGATHERDQ : avx2_gather<0x90, "vpgatherdq", VR256, vx64mem, vx64mem>, VEX_W; 8795 defm VPGATHERQQ : avx2_gather<0x91, "vpgatherqq", VR256, vx64mem, vy64mem>, VEX_W; 8796 defm VPGATHERDD : avx2_gather<0x90, "vpgatherdd", VR256, vx32mem, vy32mem>; 8797 defm VPGATHERQD : avx2_gather<0x91, "vpgatherqd", VR128, vx32mem, vy32mem>; 8798 8799 let ExeDomain = SSEPackedDouble in { 8800 defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", VR256, vx64mem, vx64mem>, VEX_W; 8801 defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", VR256, vx64mem, vy64mem>, VEX_W; 8802 } 8803 8804 let ExeDomain = SSEPackedSingle in { 8805 defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", VR256, vx32mem, vy32mem>; 8806 defm VGATHERQPS : avx2_gather<0x93, "vgatherqps", VR128, vx32mem, vy32mem>; 8807 } 8808} 8809