assembler-x64.h revision c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7a
1// Copyright (c) 1994-2006 Sun Microsystems Inc. 2// All Rights Reserved. 3// 4// Redistribution and use in source and binary forms, with or without 5// modification, are permitted provided that the following conditions are 6// met: 7// 8// - Redistributions of source code must retain the above copyright notice, 9// this list of conditions and the following disclaimer. 10// 11// - Redistribution in binary form must reproduce the above copyright 12// notice, this list of conditions and the following disclaimer in the 13// documentation and/or other materials provided with the distribution. 14// 15// - Neither the name of Sun Microsystems or the names of contributors may 16// be used to endorse or promote products derived from this software without 17// specific prior written permission. 18// 19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 20// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 21// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 23// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 24// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 25// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 26// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 27// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 28// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 29// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31// The original source code covered by the above license above has been 32// modified significantly by Google Inc. 33// Copyright 2012 the V8 project authors. All rights reserved. 34 35// A lightweight X64 Assembler. 36 37#ifndef V8_X64_ASSEMBLER_X64_H_ 38#define V8_X64_ASSEMBLER_X64_H_ 39 40#include <deque> 41 42#include "src/assembler.h" 43#include "src/x64/sse-instr.h" 44 45namespace v8 { 46namespace internal { 47 48// Utility functions 49 50#define GENERAL_REGISTERS(V) \ 51 V(rax) \ 52 V(rcx) \ 53 V(rdx) \ 54 V(rbx) \ 55 V(rsp) \ 56 V(rbp) \ 57 V(rsi) \ 58 V(rdi) \ 59 V(r8) \ 60 V(r9) \ 61 V(r10) \ 62 V(r11) \ 63 V(r12) \ 64 V(r13) \ 65 V(r14) \ 66 V(r15) 67 68#define ALLOCATABLE_GENERAL_REGISTERS(V) \ 69 V(rax) \ 70 V(rbx) \ 71 V(rdx) \ 72 V(rcx) \ 73 V(rsi) \ 74 V(rdi) \ 75 V(r8) \ 76 V(r9) \ 77 V(r11) \ 78 V(r12) \ 79 V(r14) \ 80 V(r15) 81 82 83// CPU Registers. 84// 85// 1) We would prefer to use an enum, but enum values are assignment- 86// compatible with int, which has caused code-generation bugs. 87// 88// 2) We would prefer to use a class instead of a struct but we don't like 89// the register initialization to depend on the particular initialization 90// order (which appears to be different on OS X, Linux, and Windows for the 91// installed versions of C++ we tried). Using a struct permits C-style 92// "initialization". Also, the Register objects cannot be const as this 93// forces initialization stubs in MSVC, making us dependent on initialization 94// order. 95// 96// 3) By not using an enum, we are possibly preventing the compiler from 97// doing certain constant folds, which may significantly reduce the 98// code generated for some assembly instructions (because they boil down 99// to a few constants). If this is a problem, we could change the code 100// such that we use an enum in optimized mode, and the struct in debug 101// mode. This way we get the compile-time error checking in debug mode 102// and best performance in optimized code. 103// 104struct Register { 105 enum Code { 106#define REGISTER_CODE(R) kCode_##R, 107 GENERAL_REGISTERS(REGISTER_CODE) 108#undef REGISTER_CODE 109 kAfterLast, 110 kCode_no_reg = -1 111 }; 112 113 static const int kNumRegisters = Code::kAfterLast; 114 115 static Register from_code(int code) { 116 DCHECK(code >= 0); 117 DCHECK(code < kNumRegisters); 118 Register r = {code}; 119 return r; 120 } 121 bool is_valid() const { return 0 <= reg_code && reg_code < kNumRegisters; } 122 bool is(Register reg) const { return reg_code == reg.reg_code; } 123 int code() const { 124 DCHECK(is_valid()); 125 return reg_code; 126 } 127 int bit() const { 128 DCHECK(is_valid()); 129 return 1 << reg_code; 130 } 131 132 bool is_byte_register() const { return reg_code <= 3; } 133 // Return the high bit of the register code as a 0 or 1. Used often 134 // when constructing the REX prefix byte. 135 int high_bit() const { return reg_code >> 3; } 136 // Return the 3 low bits of the register code. Used when encoding registers 137 // in modR/M, SIB, and opcode bytes. 138 int low_bits() const { return reg_code & 0x7; } 139 140 // Unfortunately we can't make this private in a struct when initializing 141 // by assignment. 142 int reg_code; 143}; 144 145 146#define DECLARE_REGISTER(R) const Register R = {Register::kCode_##R}; 147GENERAL_REGISTERS(DECLARE_REGISTER) 148#undef DECLARE_REGISTER 149const Register no_reg = {Register::kCode_no_reg}; 150 151 152#ifdef _WIN64 153 // Windows calling convention 154const Register arg_reg_1 = {Register::kCode_rcx}; 155const Register arg_reg_2 = {Register::kCode_rdx}; 156const Register arg_reg_3 = {Register::kCode_r8}; 157const Register arg_reg_4 = {Register::kCode_r9}; 158#else 159 // AMD64 calling convention 160const Register arg_reg_1 = {Register::kCode_rdi}; 161const Register arg_reg_2 = {Register::kCode_rsi}; 162const Register arg_reg_3 = {Register::kCode_rdx}; 163const Register arg_reg_4 = {Register::kCode_rcx}; 164#endif // _WIN64 165 166 167#define DOUBLE_REGISTERS(V) \ 168 V(xmm0) \ 169 V(xmm1) \ 170 V(xmm2) \ 171 V(xmm3) \ 172 V(xmm4) \ 173 V(xmm5) \ 174 V(xmm6) \ 175 V(xmm7) \ 176 V(xmm8) \ 177 V(xmm9) \ 178 V(xmm10) \ 179 V(xmm11) \ 180 V(xmm12) \ 181 V(xmm13) \ 182 V(xmm14) \ 183 V(xmm15) 184 185#define FLOAT_REGISTERS DOUBLE_REGISTERS 186#define SIMD128_REGISTERS DOUBLE_REGISTERS 187 188#define ALLOCATABLE_DOUBLE_REGISTERS(V) \ 189 V(xmm0) \ 190 V(xmm1) \ 191 V(xmm2) \ 192 V(xmm3) \ 193 V(xmm4) \ 194 V(xmm5) \ 195 V(xmm6) \ 196 V(xmm7) \ 197 V(xmm8) \ 198 V(xmm9) \ 199 V(xmm10) \ 200 V(xmm11) \ 201 V(xmm12) \ 202 V(xmm13) \ 203 V(xmm14) 204 205static const bool kSimpleFPAliasing = true; 206 207struct XMMRegister { 208 enum Code { 209#define REGISTER_CODE(R) kCode_##R, 210 DOUBLE_REGISTERS(REGISTER_CODE) 211#undef REGISTER_CODE 212 kAfterLast, 213 kCode_no_reg = -1 214 }; 215 216 static const int kMaxNumRegisters = Code::kAfterLast; 217 218 static XMMRegister from_code(int code) { 219 XMMRegister result = {code}; 220 return result; 221 } 222 223 bool is_valid() const { return 0 <= reg_code && reg_code < kMaxNumRegisters; } 224 bool is(XMMRegister reg) const { return reg_code == reg.reg_code; } 225 int code() const { 226 DCHECK(is_valid()); 227 return reg_code; 228 } 229 230 // Return the high bit of the register code as a 0 or 1. Used often 231 // when constructing the REX prefix byte. 232 int high_bit() const { return reg_code >> 3; } 233 // Return the 3 low bits of the register code. Used when encoding registers 234 // in modR/M, SIB, and opcode bytes. 235 int low_bits() const { return reg_code & 0x7; } 236 237 // Unfortunately we can't make this private in a struct when initializing 238 // by assignment. 239 int reg_code; 240}; 241 242typedef XMMRegister FloatRegister; 243 244typedef XMMRegister DoubleRegister; 245 246typedef XMMRegister Simd128Register; 247 248#define DECLARE_REGISTER(R) \ 249 const DoubleRegister R = {DoubleRegister::kCode_##R}; 250DOUBLE_REGISTERS(DECLARE_REGISTER) 251#undef DECLARE_REGISTER 252const DoubleRegister no_double_reg = {DoubleRegister::kCode_no_reg}; 253 254enum Condition { 255 // any value < 0 is considered no_condition 256 no_condition = -1, 257 258 overflow = 0, 259 no_overflow = 1, 260 below = 2, 261 above_equal = 3, 262 equal = 4, 263 not_equal = 5, 264 below_equal = 6, 265 above = 7, 266 negative = 8, 267 positive = 9, 268 parity_even = 10, 269 parity_odd = 11, 270 less = 12, 271 greater_equal = 13, 272 less_equal = 14, 273 greater = 15, 274 275 // Fake conditions that are handled by the 276 // opcodes using them. 277 always = 16, 278 never = 17, 279 // aliases 280 carry = below, 281 not_carry = above_equal, 282 zero = equal, 283 not_zero = not_equal, 284 sign = negative, 285 not_sign = positive, 286 last_condition = greater 287}; 288 289 290// Returns the equivalent of !cc. 291// Negation of the default no_condition (-1) results in a non-default 292// no_condition value (-2). As long as tests for no_condition check 293// for condition < 0, this will work as expected. 294inline Condition NegateCondition(Condition cc) { 295 return static_cast<Condition>(cc ^ 1); 296} 297 298 299// Commute a condition such that {a cond b == b cond' a}. 300inline Condition CommuteCondition(Condition cc) { 301 switch (cc) { 302 case below: 303 return above; 304 case above: 305 return below; 306 case above_equal: 307 return below_equal; 308 case below_equal: 309 return above_equal; 310 case less: 311 return greater; 312 case greater: 313 return less; 314 case greater_equal: 315 return less_equal; 316 case less_equal: 317 return greater_equal; 318 default: 319 return cc; 320 } 321} 322 323 324enum RoundingMode { 325 kRoundToNearest = 0x0, 326 kRoundDown = 0x1, 327 kRoundUp = 0x2, 328 kRoundToZero = 0x3 329}; 330 331 332// ----------------------------------------------------------------------------- 333// Machine instruction Immediates 334 335class Immediate BASE_EMBEDDED { 336 public: 337 explicit Immediate(int32_t value) : value_(value) {} 338 explicit Immediate(int32_t value, RelocInfo::Mode rmode) 339 : value_(value), rmode_(rmode) {} 340 explicit Immediate(Smi* value) { 341 DCHECK(SmiValuesAre31Bits()); // Only available for 31-bit SMI. 342 value_ = static_cast<int32_t>(reinterpret_cast<intptr_t>(value)); 343 } 344 345 private: 346 int32_t value_; 347 RelocInfo::Mode rmode_ = RelocInfo::NONE32; 348 349 friend class Assembler; 350}; 351 352 353// ----------------------------------------------------------------------------- 354// Machine instruction Operands 355 356enum ScaleFactor { 357 times_1 = 0, 358 times_2 = 1, 359 times_4 = 2, 360 times_8 = 3, 361 times_int_size = times_4, 362 times_pointer_size = (kPointerSize == 8) ? times_8 : times_4 363}; 364 365 366class Operand BASE_EMBEDDED { 367 public: 368 // [base + disp/r] 369 Operand(Register base, int32_t disp); 370 371 // [base + index*scale + disp/r] 372 Operand(Register base, 373 Register index, 374 ScaleFactor scale, 375 int32_t disp); 376 377 // [index*scale + disp/r] 378 Operand(Register index, 379 ScaleFactor scale, 380 int32_t disp); 381 382 // Offset from existing memory operand. 383 // Offset is added to existing displacement as 32-bit signed values and 384 // this must not overflow. 385 Operand(const Operand& base, int32_t offset); 386 387 // [rip + disp/r] 388 explicit Operand(Label* label); 389 390 // Checks whether either base or index register is the given register. 391 // Does not check the "reg" part of the Operand. 392 bool AddressUsesRegister(Register reg) const; 393 394 // Queries related to the size of the generated instruction. 395 // Whether the generated instruction will have a REX prefix. 396 bool requires_rex() const { return rex_ != 0; } 397 // Size of the ModR/M, SIB and displacement parts of the generated 398 // instruction. 399 int operand_size() const { return len_; } 400 401 private: 402 byte rex_; 403 byte buf_[9]; 404 // The number of bytes of buf_ in use. 405 byte len_; 406 407 // Set the ModR/M byte without an encoded 'reg' register. The 408 // register is encoded later as part of the emit_operand operation. 409 // set_modrm can be called before or after set_sib and set_disp*. 410 inline void set_modrm(int mod, Register rm); 411 412 // Set the SIB byte if one is needed. Sets the length to 2 rather than 1. 413 inline void set_sib(ScaleFactor scale, Register index, Register base); 414 415 // Adds operand displacement fields (offsets added to the memory address). 416 // Needs to be called after set_sib, not before it. 417 inline void set_disp8(int disp); 418 inline void set_disp32(int disp); 419 inline void set_disp64(int64_t disp); // for labels. 420 421 friend class Assembler; 422}; 423 424#define ASSEMBLER_INSTRUCTION_LIST(V) \ 425 V(add) \ 426 V(and) \ 427 V(cmp) \ 428 V(cmpxchg) \ 429 V(dec) \ 430 V(idiv) \ 431 V(div) \ 432 V(imul) \ 433 V(inc) \ 434 V(lea) \ 435 V(mov) \ 436 V(movzxb) \ 437 V(movzxw) \ 438 V(neg) \ 439 V(not) \ 440 V(or) \ 441 V(repmovs) \ 442 V(sbb) \ 443 V(sub) \ 444 V(test) \ 445 V(xchg) \ 446 V(xor) 447 448// Shift instructions on operands/registers with kPointerSize, kInt32Size and 449// kInt64Size. 450#define SHIFT_INSTRUCTION_LIST(V) \ 451 V(rol, 0x0) \ 452 V(ror, 0x1) \ 453 V(rcl, 0x2) \ 454 V(rcr, 0x3) \ 455 V(shl, 0x4) \ 456 V(shr, 0x5) \ 457 V(sar, 0x7) \ 458 459 460class Assembler : public AssemblerBase { 461 private: 462 // We check before assembling an instruction that there is sufficient 463 // space to write an instruction and its relocation information. 464 // The relocation writer's position must be kGap bytes above the end of 465 // the generated instructions. This leaves enough space for the 466 // longest possible x64 instruction, 15 bytes, and the longest possible 467 // relocation information encoding, RelocInfoWriter::kMaxLength == 16. 468 // (There is a 15 byte limit on x64 instruction length that rules out some 469 // otherwise valid instructions.) 470 // This allows for a single, fast space check per instruction. 471 static const int kGap = 32; 472 473 public: 474 // Create an assembler. Instructions and relocation information are emitted 475 // into a buffer, with the instructions starting from the beginning and the 476 // relocation information starting from the end of the buffer. See CodeDesc 477 // for a detailed comment on the layout (globals.h). 478 // 479 // If the provided buffer is NULL, the assembler allocates and grows its own 480 // buffer, and buffer_size determines the initial buffer size. The buffer is 481 // owned by the assembler and deallocated upon destruction of the assembler. 482 // 483 // If the provided buffer is not NULL, the assembler uses the provided buffer 484 // for code generation and assumes its size to be buffer_size. If the buffer 485 // is too small, a fatal error occurs. No deallocation of the buffer is done 486 // upon destruction of the assembler. 487 Assembler(Isolate* isolate, void* buffer, int buffer_size); 488 virtual ~Assembler() { } 489 490 // GetCode emits any pending (non-emitted) code and fills the descriptor 491 // desc. GetCode() is idempotent; it returns the same result if no other 492 // Assembler functions are invoked in between GetCode() calls. 493 void GetCode(CodeDesc* desc); 494 495 // Read/Modify the code target in the relative branch/call instruction at pc. 496 // On the x64 architecture, we use relative jumps with a 32-bit displacement 497 // to jump to other Code objects in the Code space in the heap. 498 // Jumps to C functions are done indirectly through a 64-bit register holding 499 // the absolute address of the target. 500 // These functions convert between absolute Addresses of Code objects and 501 // the relative displacements stored in the code. 502 static inline Address target_address_at(Address pc, Address constant_pool); 503 static inline void set_target_address_at( 504 Isolate* isolate, Address pc, Address constant_pool, Address target, 505 ICacheFlushMode icache_flush_mode = FLUSH_ICACHE_IF_NEEDED); 506 static inline Address target_address_at(Address pc, Code* code) { 507 Address constant_pool = code ? code->constant_pool() : NULL; 508 return target_address_at(pc, constant_pool); 509 } 510 static inline void set_target_address_at( 511 Isolate* isolate, Address pc, Code* code, Address target, 512 ICacheFlushMode icache_flush_mode = FLUSH_ICACHE_IF_NEEDED) { 513 Address constant_pool = code ? code->constant_pool() : NULL; 514 set_target_address_at(isolate, pc, constant_pool, target, 515 icache_flush_mode); 516 } 517 518 // Return the code target address at a call site from the return address 519 // of that call in the instruction stream. 520 static inline Address target_address_from_return_address(Address pc); 521 522 // This sets the branch destination (which is in the instruction on x64). 523 // This is for calls and branches within generated code. 524 inline static void deserialization_set_special_target_at( 525 Isolate* isolate, Address instruction_payload, Code* code, 526 Address target) { 527 set_target_address_at(isolate, instruction_payload, code, target); 528 } 529 530 // This sets the internal reference at the pc. 531 inline static void deserialization_set_target_internal_reference_at( 532 Isolate* isolate, Address pc, Address target, 533 RelocInfo::Mode mode = RelocInfo::INTERNAL_REFERENCE); 534 535 static inline RelocInfo::Mode RelocInfoNone() { 536 if (kPointerSize == kInt64Size) { 537 return RelocInfo::NONE64; 538 } else { 539 DCHECK(kPointerSize == kInt32Size); 540 return RelocInfo::NONE32; 541 } 542 } 543 544 inline Handle<Object> code_target_object_handle_at(Address pc); 545 inline Address runtime_entry_at(Address pc); 546 // Number of bytes taken up by the branch target in the code. 547 static const int kSpecialTargetSize = 4; // Use 32-bit displacement. 548 // Distance between the address of the code target in the call instruction 549 // and the return address pushed on the stack. 550 static const int kCallTargetAddressOffset = 4; // Use 32-bit displacement. 551 // The length of call(kScratchRegister). 552 static const int kCallScratchRegisterInstructionLength = 3; 553 // The length of call(Immediate32). 554 static const int kShortCallInstructionLength = 5; 555 // The length of movq(kScratchRegister, address). 556 static const int kMoveAddressIntoScratchRegisterInstructionLength = 557 2 + kPointerSize; 558 // The length of movq(kScratchRegister, address) and call(kScratchRegister). 559 static const int kCallSequenceLength = 560 kMoveAddressIntoScratchRegisterInstructionLength + 561 kCallScratchRegisterInstructionLength; 562 563 // The debug break slot must be able to contain an indirect call sequence. 564 static const int kDebugBreakSlotLength = kCallSequenceLength; 565 // Distance between start of patched debug break slot and the emitted address 566 // to jump to. 567 static const int kPatchDebugBreakSlotAddressOffset = 568 kMoveAddressIntoScratchRegisterInstructionLength - kPointerSize; 569 570 // One byte opcode for test eax,0xXXXXXXXX. 571 static const byte kTestEaxByte = 0xA9; 572 // One byte opcode for test al, 0xXX. 573 static const byte kTestAlByte = 0xA8; 574 // One byte opcode for nop. 575 static const byte kNopByte = 0x90; 576 577 // One byte prefix for a short conditional jump. 578 static const byte kJccShortPrefix = 0x70; 579 static const byte kJncShortOpcode = kJccShortPrefix | not_carry; 580 static const byte kJcShortOpcode = kJccShortPrefix | carry; 581 static const byte kJnzShortOpcode = kJccShortPrefix | not_zero; 582 static const byte kJzShortOpcode = kJccShortPrefix | zero; 583 584 // VEX prefix encodings. 585 enum SIMDPrefix { kNone = 0x0, k66 = 0x1, kF3 = 0x2, kF2 = 0x3 }; 586 enum VectorLength { kL128 = 0x0, kL256 = 0x4, kLIG = kL128, kLZ = kL128 }; 587 enum VexW { kW0 = 0x0, kW1 = 0x80, kWIG = kW0 }; 588 enum LeadingOpcode { k0F = 0x1, k0F38 = 0x2, k0F3A = 0x3 }; 589 590 // --------------------------------------------------------------------------- 591 // Code generation 592 // 593 // Function names correspond one-to-one to x64 instruction mnemonics. 594 // Unless specified otherwise, instructions operate on 64-bit operands. 595 // 596 // If we need versions of an assembly instruction that operate on different 597 // width arguments, we add a single-letter suffix specifying the width. 598 // This is done for the following instructions: mov, cmp, inc, dec, 599 // add, sub, and test. 600 // There are no versions of these instructions without the suffix. 601 // - Instructions on 8-bit (byte) operands/registers have a trailing 'b'. 602 // - Instructions on 16-bit (word) operands/registers have a trailing 'w'. 603 // - Instructions on 32-bit (doubleword) operands/registers use 'l'. 604 // - Instructions on 64-bit (quadword) operands/registers use 'q'. 605 // - Instructions on operands/registers with pointer size use 'p'. 606 607 STATIC_ASSERT(kPointerSize == kInt64Size || kPointerSize == kInt32Size); 608 609#define DECLARE_INSTRUCTION(instruction) \ 610 template<class P1> \ 611 void instruction##p(P1 p1) { \ 612 emit_##instruction(p1, kPointerSize); \ 613 } \ 614 \ 615 template<class P1> \ 616 void instruction##l(P1 p1) { \ 617 emit_##instruction(p1, kInt32Size); \ 618 } \ 619 \ 620 template<class P1> \ 621 void instruction##q(P1 p1) { \ 622 emit_##instruction(p1, kInt64Size); \ 623 } \ 624 \ 625 template<class P1, class P2> \ 626 void instruction##p(P1 p1, P2 p2) { \ 627 emit_##instruction(p1, p2, kPointerSize); \ 628 } \ 629 \ 630 template<class P1, class P2> \ 631 void instruction##l(P1 p1, P2 p2) { \ 632 emit_##instruction(p1, p2, kInt32Size); \ 633 } \ 634 \ 635 template<class P1, class P2> \ 636 void instruction##q(P1 p1, P2 p2) { \ 637 emit_##instruction(p1, p2, kInt64Size); \ 638 } \ 639 \ 640 template<class P1, class P2, class P3> \ 641 void instruction##p(P1 p1, P2 p2, P3 p3) { \ 642 emit_##instruction(p1, p2, p3, kPointerSize); \ 643 } \ 644 \ 645 template<class P1, class P2, class P3> \ 646 void instruction##l(P1 p1, P2 p2, P3 p3) { \ 647 emit_##instruction(p1, p2, p3, kInt32Size); \ 648 } \ 649 \ 650 template<class P1, class P2, class P3> \ 651 void instruction##q(P1 p1, P2 p2, P3 p3) { \ 652 emit_##instruction(p1, p2, p3, kInt64Size); \ 653 } 654 ASSEMBLER_INSTRUCTION_LIST(DECLARE_INSTRUCTION) 655#undef DECLARE_INSTRUCTION 656 657 // Insert the smallest number of nop instructions 658 // possible to align the pc offset to a multiple 659 // of m, where m must be a power of 2. 660 void Align(int m); 661 // Insert the smallest number of zero bytes possible to align the pc offset 662 // to a mulitple of m. m must be a power of 2 (>= 2). 663 void DataAlign(int m); 664 void Nop(int bytes = 1); 665 // Aligns code to something that's optimal for a jump target for the platform. 666 void CodeTargetAlign(); 667 668 // Stack 669 void pushfq(); 670 void popfq(); 671 672 void pushq(Immediate value); 673 // Push a 32 bit integer, and guarantee that it is actually pushed as a 674 // 32 bit value, the normal push will optimize the 8 bit case. 675 void pushq_imm32(int32_t imm32); 676 void pushq(Register src); 677 void pushq(const Operand& src); 678 679 void popq(Register dst); 680 void popq(const Operand& dst); 681 682 void enter(Immediate size); 683 void leave(); 684 685 // Moves 686 void movb(Register dst, const Operand& src); 687 void movb(Register dst, Immediate imm); 688 void movb(const Operand& dst, Register src); 689 void movb(const Operand& dst, Immediate imm); 690 691 // Move the low 16 bits of a 64-bit register value to a 16-bit 692 // memory location. 693 void movw(Register dst, const Operand& src); 694 void movw(const Operand& dst, Register src); 695 void movw(const Operand& dst, Immediate imm); 696 697 // Move the offset of the label location relative to the current 698 // position (after the move) to the destination. 699 void movl(const Operand& dst, Label* src); 700 701 // Loads a pointer into a register with a relocation mode. 702 void movp(Register dst, void* ptr, RelocInfo::Mode rmode); 703 704 // Loads a 64-bit immediate into a register. 705 void movq(Register dst, int64_t value, 706 RelocInfo::Mode rmode = RelocInfo::NONE64); 707 void movq(Register dst, uint64_t value, 708 RelocInfo::Mode rmode = RelocInfo::NONE64); 709 710 void movsxbl(Register dst, Register src); 711 void movsxbl(Register dst, const Operand& src); 712 void movsxbq(Register dst, Register src); 713 void movsxbq(Register dst, const Operand& src); 714 void movsxwl(Register dst, Register src); 715 void movsxwl(Register dst, const Operand& src); 716 void movsxwq(Register dst, Register src); 717 void movsxwq(Register dst, const Operand& src); 718 void movsxlq(Register dst, Register src); 719 void movsxlq(Register dst, const Operand& src); 720 721 // Repeated moves. 722 723 void repmovsb(); 724 void repmovsw(); 725 void repmovsp() { emit_repmovs(kPointerSize); } 726 void repmovsl() { emit_repmovs(kInt32Size); } 727 void repmovsq() { emit_repmovs(kInt64Size); } 728 729 // Instruction to load from an immediate 64-bit pointer into RAX. 730 void load_rax(void* ptr, RelocInfo::Mode rmode); 731 void load_rax(ExternalReference ext); 732 733 // Conditional moves. 734 void cmovq(Condition cc, Register dst, Register src); 735 void cmovq(Condition cc, Register dst, const Operand& src); 736 void cmovl(Condition cc, Register dst, Register src); 737 void cmovl(Condition cc, Register dst, const Operand& src); 738 739 void cmpb(Register dst, Immediate src) { 740 immediate_arithmetic_op_8(0x7, dst, src); 741 } 742 743 void cmpb_al(Immediate src); 744 745 void cmpb(Register dst, Register src) { 746 arithmetic_op_8(0x3A, dst, src); 747 } 748 749 void cmpb(Register dst, const Operand& src) { 750 arithmetic_op_8(0x3A, dst, src); 751 } 752 753 void cmpb(const Operand& dst, Register src) { 754 arithmetic_op_8(0x38, src, dst); 755 } 756 757 void cmpb(const Operand& dst, Immediate src) { 758 immediate_arithmetic_op_8(0x7, dst, src); 759 } 760 761 void cmpw(const Operand& dst, Immediate src) { 762 immediate_arithmetic_op_16(0x7, dst, src); 763 } 764 765 void cmpw(Register dst, Immediate src) { 766 immediate_arithmetic_op_16(0x7, dst, src); 767 } 768 769 void cmpw(Register dst, const Operand& src) { 770 arithmetic_op_16(0x3B, dst, src); 771 } 772 773 void cmpw(Register dst, Register src) { 774 arithmetic_op_16(0x3B, dst, src); 775 } 776 777 void cmpw(const Operand& dst, Register src) { 778 arithmetic_op_16(0x39, src, dst); 779 } 780 781 void testb(Register reg, const Operand& op) { testb(op, reg); } 782 783 void testw(Register reg, const Operand& op) { testw(op, reg); } 784 785 void andb(Register dst, Immediate src) { 786 immediate_arithmetic_op_8(0x4, dst, src); 787 } 788 789 void decb(Register dst); 790 void decb(const Operand& dst); 791 792 // Lock prefix. 793 void lock(); 794 795 void xchgb(Register reg, const Operand& op); 796 void xchgw(Register reg, const Operand& op); 797 798 void cmpxchgb(const Operand& dst, Register src); 799 void cmpxchgw(const Operand& dst, Register src); 800 801 // Sign-extends rax into rdx:rax. 802 void cqo(); 803 // Sign-extends eax into edx:eax. 804 void cdq(); 805 806 // Multiply eax by src, put the result in edx:eax. 807 void mull(Register src); 808 void mull(const Operand& src); 809 // Multiply rax by src, put the result in rdx:rax. 810 void mulq(Register src); 811 812#define DECLARE_SHIFT_INSTRUCTION(instruction, subcode) \ 813 void instruction##p(Register dst, Immediate imm8) { \ 814 shift(dst, imm8, subcode, kPointerSize); \ 815 } \ 816 \ 817 void instruction##l(Register dst, Immediate imm8) { \ 818 shift(dst, imm8, subcode, kInt32Size); \ 819 } \ 820 \ 821 void instruction##q(Register dst, Immediate imm8) { \ 822 shift(dst, imm8, subcode, kInt64Size); \ 823 } \ 824 \ 825 void instruction##p(Operand dst, Immediate imm8) { \ 826 shift(dst, imm8, subcode, kPointerSize); \ 827 } \ 828 \ 829 void instruction##l(Operand dst, Immediate imm8) { \ 830 shift(dst, imm8, subcode, kInt32Size); \ 831 } \ 832 \ 833 void instruction##q(Operand dst, Immediate imm8) { \ 834 shift(dst, imm8, subcode, kInt64Size); \ 835 } \ 836 \ 837 void instruction##p_cl(Register dst) { shift(dst, subcode, kPointerSize); } \ 838 \ 839 void instruction##l_cl(Register dst) { shift(dst, subcode, kInt32Size); } \ 840 \ 841 void instruction##q_cl(Register dst) { shift(dst, subcode, kInt64Size); } \ 842 \ 843 void instruction##p_cl(Operand dst) { shift(dst, subcode, kPointerSize); } \ 844 \ 845 void instruction##l_cl(Operand dst) { shift(dst, subcode, kInt32Size); } \ 846 \ 847 void instruction##q_cl(Operand dst) { shift(dst, subcode, kInt64Size); } 848 SHIFT_INSTRUCTION_LIST(DECLARE_SHIFT_INSTRUCTION) 849#undef DECLARE_SHIFT_INSTRUCTION 850 851 // Shifts dst:src left by cl bits, affecting only dst. 852 void shld(Register dst, Register src); 853 854 // Shifts src:dst right by cl bits, affecting only dst. 855 void shrd(Register dst, Register src); 856 857 void store_rax(void* dst, RelocInfo::Mode mode); 858 void store_rax(ExternalReference ref); 859 860 void subb(Register dst, Immediate src) { 861 immediate_arithmetic_op_8(0x5, dst, src); 862 } 863 864 void testb(Register dst, Register src); 865 void testb(Register reg, Immediate mask); 866 void testb(const Operand& op, Immediate mask); 867 void testb(const Operand& op, Register reg); 868 869 void testw(Register dst, Register src); 870 void testw(Register reg, Immediate mask); 871 void testw(const Operand& op, Immediate mask); 872 void testw(const Operand& op, Register reg); 873 874 // Bit operations. 875 void bt(const Operand& dst, Register src); 876 void bts(const Operand& dst, Register src); 877 void bsrq(Register dst, Register src); 878 void bsrq(Register dst, const Operand& src); 879 void bsrl(Register dst, Register src); 880 void bsrl(Register dst, const Operand& src); 881 void bsfq(Register dst, Register src); 882 void bsfq(Register dst, const Operand& src); 883 void bsfl(Register dst, Register src); 884 void bsfl(Register dst, const Operand& src); 885 886 // Miscellaneous 887 void clc(); 888 void cld(); 889 void cpuid(); 890 void hlt(); 891 void int3(); 892 void nop(); 893 void ret(int imm16); 894 void ud2(); 895 void setcc(Condition cc, Register reg); 896 897 // Label operations & relative jumps (PPUM Appendix D) 898 // 899 // Takes a branch opcode (cc) and a label (L) and generates 900 // either a backward branch or a forward branch and links it 901 // to the label fixup chain. Usage: 902 // 903 // Label L; // unbound label 904 // j(cc, &L); // forward branch to unbound label 905 // bind(&L); // bind label to the current pc 906 // j(cc, &L); // backward branch to bound label 907 // bind(&L); // illegal: a label may be bound only once 908 // 909 // Note: The same Label can be used for forward and backward branches 910 // but it may be bound only once. 911 912 void bind(Label* L); // binds an unbound label L to the current code position 913 914 // Calls 915 // Call near relative 32-bit displacement, relative to next instruction. 916 void call(Label* L); 917 void call(Address entry, RelocInfo::Mode rmode); 918 void call(Handle<Code> target, 919 RelocInfo::Mode rmode = RelocInfo::CODE_TARGET, 920 TypeFeedbackId ast_id = TypeFeedbackId::None()); 921 922 // Calls directly to the given address using a relative offset. 923 // Should only ever be used in Code objects for calls within the 924 // same Code object. Should not be used when generating new code (use labels), 925 // but only when patching existing code. 926 void call(Address target); 927 928 // Call near absolute indirect, address in register 929 void call(Register adr); 930 931 // Jumps 932 // Jump short or near relative. 933 // Use a 32-bit signed displacement. 934 // Unconditional jump to L 935 void jmp(Label* L, Label::Distance distance = Label::kFar); 936 void jmp(Address entry, RelocInfo::Mode rmode); 937 void jmp(Handle<Code> target, RelocInfo::Mode rmode); 938 939 // Jump near absolute indirect (r64) 940 void jmp(Register adr); 941 void jmp(const Operand& src); 942 943 // Conditional jumps 944 void j(Condition cc, 945 Label* L, 946 Label::Distance distance = Label::kFar); 947 void j(Condition cc, Address entry, RelocInfo::Mode rmode); 948 void j(Condition cc, Handle<Code> target, RelocInfo::Mode rmode); 949 950 // Floating-point operations 951 void fld(int i); 952 953 void fld1(); 954 void fldz(); 955 void fldpi(); 956 void fldln2(); 957 958 void fld_s(const Operand& adr); 959 void fld_d(const Operand& adr); 960 961 void fstp_s(const Operand& adr); 962 void fstp_d(const Operand& adr); 963 void fstp(int index); 964 965 void fild_s(const Operand& adr); 966 void fild_d(const Operand& adr); 967 968 void fist_s(const Operand& adr); 969 970 void fistp_s(const Operand& adr); 971 void fistp_d(const Operand& adr); 972 973 void fisttp_s(const Operand& adr); 974 void fisttp_d(const Operand& adr); 975 976 void fabs(); 977 void fchs(); 978 979 void fadd(int i); 980 void fsub(int i); 981 void fmul(int i); 982 void fdiv(int i); 983 984 void fisub_s(const Operand& adr); 985 986 void faddp(int i = 1); 987 void fsubp(int i = 1); 988 void fsubrp(int i = 1); 989 void fmulp(int i = 1); 990 void fdivp(int i = 1); 991 void fprem(); 992 void fprem1(); 993 994 void fxch(int i = 1); 995 void fincstp(); 996 void ffree(int i = 0); 997 998 void ftst(); 999 void fucomp(int i); 1000 void fucompp(); 1001 void fucomi(int i); 1002 void fucomip(); 1003 1004 void fcompp(); 1005 void fnstsw_ax(); 1006 void fwait(); 1007 void fnclex(); 1008 1009 void fsin(); 1010 void fcos(); 1011 void fptan(); 1012 void fyl2x(); 1013 void f2xm1(); 1014 void fscale(); 1015 void fninit(); 1016 1017 void frndint(); 1018 1019 void sahf(); 1020 1021 // SSE instructions 1022 void addss(XMMRegister dst, XMMRegister src); 1023 void addss(XMMRegister dst, const Operand& src); 1024 void subss(XMMRegister dst, XMMRegister src); 1025 void subss(XMMRegister dst, const Operand& src); 1026 void mulss(XMMRegister dst, XMMRegister src); 1027 void mulss(XMMRegister dst, const Operand& src); 1028 void divss(XMMRegister dst, XMMRegister src); 1029 void divss(XMMRegister dst, const Operand& src); 1030 1031 void maxss(XMMRegister dst, XMMRegister src); 1032 void maxss(XMMRegister dst, const Operand& src); 1033 void minss(XMMRegister dst, XMMRegister src); 1034 void minss(XMMRegister dst, const Operand& src); 1035 1036 void sqrtss(XMMRegister dst, XMMRegister src); 1037 void sqrtss(XMMRegister dst, const Operand& src); 1038 1039 void ucomiss(XMMRegister dst, XMMRegister src); 1040 void ucomiss(XMMRegister dst, const Operand& src); 1041 void movaps(XMMRegister dst, XMMRegister src); 1042 1043 // Don't use this unless it's important to keep the 1044 // top half of the destination register unchanged. 1045 // Use movaps when moving float values and movd for integer 1046 // values in xmm registers. 1047 void movss(XMMRegister dst, XMMRegister src); 1048 1049 void movss(XMMRegister dst, const Operand& src); 1050 void movss(const Operand& dst, XMMRegister src); 1051 void shufps(XMMRegister dst, XMMRegister src, byte imm8); 1052 1053 void cvttss2si(Register dst, const Operand& src); 1054 void cvttss2si(Register dst, XMMRegister src); 1055 void cvtlsi2ss(XMMRegister dst, const Operand& src); 1056 void cvtlsi2ss(XMMRegister dst, Register src); 1057 1058 void andps(XMMRegister dst, XMMRegister src); 1059 void andps(XMMRegister dst, const Operand& src); 1060 void orps(XMMRegister dst, XMMRegister src); 1061 void orps(XMMRegister dst, const Operand& src); 1062 void xorps(XMMRegister dst, XMMRegister src); 1063 void xorps(XMMRegister dst, const Operand& src); 1064 1065 void addps(XMMRegister dst, XMMRegister src); 1066 void addps(XMMRegister dst, const Operand& src); 1067 void subps(XMMRegister dst, XMMRegister src); 1068 void subps(XMMRegister dst, const Operand& src); 1069 void mulps(XMMRegister dst, XMMRegister src); 1070 void mulps(XMMRegister dst, const Operand& src); 1071 void divps(XMMRegister dst, XMMRegister src); 1072 void divps(XMMRegister dst, const Operand& src); 1073 1074 void movmskps(Register dst, XMMRegister src); 1075 1076 void vinstr(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2, 1077 SIMDPrefix pp, LeadingOpcode m, VexW w); 1078 void vinstr(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2, 1079 SIMDPrefix pp, LeadingOpcode m, VexW w); 1080 1081 // SSE2 instructions 1082 void sse2_instr(XMMRegister dst, XMMRegister src, byte prefix, byte escape, 1083 byte opcode); 1084 void sse2_instr(XMMRegister dst, const Operand& src, byte prefix, byte escape, 1085 byte opcode); 1086#define DECLARE_SSE2_INSTRUCTION(instruction, prefix, escape, opcode) \ 1087 void instruction(XMMRegister dst, XMMRegister src) { \ 1088 sse2_instr(dst, src, 0x##prefix, 0x##escape, 0x##opcode); \ 1089 } \ 1090 void instruction(XMMRegister dst, const Operand& src) { \ 1091 sse2_instr(dst, src, 0x##prefix, 0x##escape, 0x##opcode); \ 1092 } 1093 1094 SSE2_INSTRUCTION_LIST(DECLARE_SSE2_INSTRUCTION) 1095#undef DECLARE_SSE2_INSTRUCTION 1096 1097#define DECLARE_SSE2_AVX_INSTRUCTION(instruction, prefix, escape, opcode) \ 1098 void v##instruction(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \ 1099 vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kW0); \ 1100 } \ 1101 void v##instruction(XMMRegister dst, XMMRegister src1, \ 1102 const Operand& src2) { \ 1103 vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kW0); \ 1104 } 1105 1106 SSE2_INSTRUCTION_LIST(DECLARE_SSE2_AVX_INSTRUCTION) 1107#undef DECLARE_SSE2_AVX_INSTRUCTION 1108 1109 // SSE3 1110 void lddqu(XMMRegister dst, const Operand& src); 1111 1112 // SSSE3 1113 void ssse3_instr(XMMRegister dst, XMMRegister src, byte prefix, byte escape1, 1114 byte escape2, byte opcode); 1115 void ssse3_instr(XMMRegister dst, const Operand& src, byte prefix, 1116 byte escape1, byte escape2, byte opcode); 1117 1118#define DECLARE_SSSE3_INSTRUCTION(instruction, prefix, escape1, escape2, \ 1119 opcode) \ 1120 void instruction(XMMRegister dst, XMMRegister src) { \ 1121 ssse3_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \ 1122 } \ 1123 void instruction(XMMRegister dst, const Operand& src) { \ 1124 ssse3_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \ 1125 } 1126 1127 SSSE3_INSTRUCTION_LIST(DECLARE_SSSE3_INSTRUCTION) 1128#undef DECLARE_SSSE3_INSTRUCTION 1129 1130 // SSE4 1131 void sse4_instr(XMMRegister dst, XMMRegister src, byte prefix, byte escape1, 1132 byte escape2, byte opcode); 1133 void sse4_instr(XMMRegister dst, const Operand& src, byte prefix, 1134 byte escape1, byte escape2, byte opcode); 1135#define DECLARE_SSE4_INSTRUCTION(instruction, prefix, escape1, escape2, \ 1136 opcode) \ 1137 void instruction(XMMRegister dst, XMMRegister src) { \ 1138 sse4_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \ 1139 } \ 1140 void instruction(XMMRegister dst, const Operand& src) { \ 1141 sse4_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \ 1142 } 1143 1144 SSE4_INSTRUCTION_LIST(DECLARE_SSE4_INSTRUCTION) 1145#undef DECLARE_SSE4_INSTRUCTION 1146 1147#define DECLARE_SSE34_AVX_INSTRUCTION(instruction, prefix, escape1, escape2, \ 1148 opcode) \ 1149 void v##instruction(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \ 1150 vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape1##escape2, kW0); \ 1151 } \ 1152 void v##instruction(XMMRegister dst, XMMRegister src1, \ 1153 const Operand& src2) { \ 1154 vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape1##escape2, kW0); \ 1155 } 1156 1157 SSSE3_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION) 1158 SSE4_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION) 1159#undef DECLARE_SSE34_AVX_INSTRUCTION 1160 1161 void movd(XMMRegister dst, Register src); 1162 void movd(XMMRegister dst, const Operand& src); 1163 void movd(Register dst, XMMRegister src); 1164 void movq(XMMRegister dst, Register src); 1165 void movq(Register dst, XMMRegister src); 1166 void movq(XMMRegister dst, XMMRegister src); 1167 1168 // Don't use this unless it's important to keep the 1169 // top half of the destination register unchanged. 1170 // Use movapd when moving double values and movq for integer 1171 // values in xmm registers. 1172 void movsd(XMMRegister dst, XMMRegister src); 1173 1174 void movsd(const Operand& dst, XMMRegister src); 1175 void movsd(XMMRegister dst, const Operand& src); 1176 1177 void movdqa(const Operand& dst, XMMRegister src); 1178 void movdqa(XMMRegister dst, const Operand& src); 1179 1180 void movdqu(const Operand& dst, XMMRegister src); 1181 void movdqu(XMMRegister dst, const Operand& src); 1182 1183 void movapd(XMMRegister dst, XMMRegister src); 1184 void movupd(XMMRegister dst, const Operand& src); 1185 void movupd(const Operand& dst, XMMRegister src); 1186 1187 void psllq(XMMRegister reg, byte imm8); 1188 void psrlq(XMMRegister reg, byte imm8); 1189 void psllw(XMMRegister reg, byte imm8); 1190 void pslld(XMMRegister reg, byte imm8); 1191 void psrlw(XMMRegister reg, byte imm8); 1192 void psrld(XMMRegister reg, byte imm8); 1193 void psraw(XMMRegister reg, byte imm8); 1194 void psrad(XMMRegister reg, byte imm8); 1195 1196 void cvttsd2si(Register dst, const Operand& src); 1197 void cvttsd2si(Register dst, XMMRegister src); 1198 void cvttss2siq(Register dst, XMMRegister src); 1199 void cvttss2siq(Register dst, const Operand& src); 1200 void cvttsd2siq(Register dst, XMMRegister src); 1201 void cvttsd2siq(Register dst, const Operand& src); 1202 1203 void cvtlsi2sd(XMMRegister dst, const Operand& src); 1204 void cvtlsi2sd(XMMRegister dst, Register src); 1205 1206 void cvtqsi2ss(XMMRegister dst, const Operand& src); 1207 void cvtqsi2ss(XMMRegister dst, Register src); 1208 1209 void cvtqsi2sd(XMMRegister dst, const Operand& src); 1210 void cvtqsi2sd(XMMRegister dst, Register src); 1211 1212 1213 void cvtss2sd(XMMRegister dst, XMMRegister src); 1214 void cvtss2sd(XMMRegister dst, const Operand& src); 1215 void cvtsd2ss(XMMRegister dst, XMMRegister src); 1216 void cvtsd2ss(XMMRegister dst, const Operand& src); 1217 1218 void cvtsd2si(Register dst, XMMRegister src); 1219 void cvtsd2siq(Register dst, XMMRegister src); 1220 1221 void addsd(XMMRegister dst, XMMRegister src); 1222 void addsd(XMMRegister dst, const Operand& src); 1223 void subsd(XMMRegister dst, XMMRegister src); 1224 void subsd(XMMRegister dst, const Operand& src); 1225 void mulsd(XMMRegister dst, XMMRegister src); 1226 void mulsd(XMMRegister dst, const Operand& src); 1227 void divsd(XMMRegister dst, XMMRegister src); 1228 void divsd(XMMRegister dst, const Operand& src); 1229 1230 void maxsd(XMMRegister dst, XMMRegister src); 1231 void maxsd(XMMRegister dst, const Operand& src); 1232 void minsd(XMMRegister dst, XMMRegister src); 1233 void minsd(XMMRegister dst, const Operand& src); 1234 1235 void andpd(XMMRegister dst, XMMRegister src); 1236 void andpd(XMMRegister dst, const Operand& src); 1237 void orpd(XMMRegister dst, XMMRegister src); 1238 void orpd(XMMRegister dst, const Operand& src); 1239 void xorpd(XMMRegister dst, XMMRegister src); 1240 void xorpd(XMMRegister dst, const Operand& src); 1241 void sqrtsd(XMMRegister dst, XMMRegister src); 1242 void sqrtsd(XMMRegister dst, const Operand& src); 1243 1244 void ucomisd(XMMRegister dst, XMMRegister src); 1245 void ucomisd(XMMRegister dst, const Operand& src); 1246 void cmpltsd(XMMRegister dst, XMMRegister src); 1247 1248 void movmskpd(Register dst, XMMRegister src); 1249 1250 void punpckldq(XMMRegister dst, XMMRegister src); 1251 void punpckldq(XMMRegister dst, const Operand& src); 1252 void punpckhdq(XMMRegister dst, XMMRegister src); 1253 1254 // SSE 4.1 instruction 1255 void insertps(XMMRegister dst, XMMRegister src, byte imm8); 1256 void extractps(Register dst, XMMRegister src, byte imm8); 1257 void pextrb(Register dst, XMMRegister src, int8_t imm8); 1258 void pextrb(const Operand& dst, XMMRegister src, int8_t imm8); 1259 void pextrw(Register dst, XMMRegister src, int8_t imm8); 1260 void pextrw(const Operand& dst, XMMRegister src, int8_t imm8); 1261 void pextrd(Register dst, XMMRegister src, int8_t imm8); 1262 void pextrd(const Operand& dst, XMMRegister src, int8_t imm8); 1263 void pinsrb(XMMRegister dst, Register src, int8_t imm8); 1264 void pinsrb(XMMRegister dst, const Operand& src, int8_t imm8); 1265 void pinsrw(XMMRegister dst, Register src, int8_t imm8); 1266 void pinsrw(XMMRegister dst, const Operand& src, int8_t imm8); 1267 void pinsrd(XMMRegister dst, Register src, int8_t imm8); 1268 void pinsrd(XMMRegister dst, const Operand& src, int8_t imm8); 1269 1270 void roundss(XMMRegister dst, XMMRegister src, RoundingMode mode); 1271 void roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode); 1272 1273 void cmpps(XMMRegister dst, XMMRegister src, int8_t cmp); 1274 void cmpps(XMMRegister dst, const Operand& src, int8_t cmp); 1275 void cmppd(XMMRegister dst, XMMRegister src, int8_t cmp); 1276 void cmppd(XMMRegister dst, const Operand& src, int8_t cmp); 1277 1278#define SSE_CMP_P(instr, imm8) \ 1279 void instr##ps(XMMRegister dst, XMMRegister src) { cmpps(dst, src, imm8); } \ 1280 void instr##ps(XMMRegister dst, const Operand& src) { \ 1281 cmpps(dst, src, imm8); \ 1282 } \ 1283 void instr##pd(XMMRegister dst, XMMRegister src) { cmppd(dst, src, imm8); } \ 1284 void instr##pd(XMMRegister dst, const Operand& src) { cmppd(dst, src, imm8); } 1285 1286 SSE_CMP_P(cmpeq, 0x0); 1287 SSE_CMP_P(cmplt, 0x1); 1288 SSE_CMP_P(cmple, 0x2); 1289 SSE_CMP_P(cmpneq, 0x4); 1290 SSE_CMP_P(cmpnlt, 0x5); 1291 SSE_CMP_P(cmpnle, 0x6); 1292 1293#undef SSE_CMP_P 1294 1295 void minps(XMMRegister dst, XMMRegister src); 1296 void minps(XMMRegister dst, const Operand& src); 1297 void maxps(XMMRegister dst, XMMRegister src); 1298 void maxps(XMMRegister dst, const Operand& src); 1299 void rcpps(XMMRegister dst, XMMRegister src); 1300 void rcpps(XMMRegister dst, const Operand& src); 1301 void rsqrtps(XMMRegister dst, XMMRegister src); 1302 void rsqrtps(XMMRegister dst, const Operand& src); 1303 void sqrtps(XMMRegister dst, XMMRegister src); 1304 void sqrtps(XMMRegister dst, const Operand& src); 1305 void movups(XMMRegister dst, XMMRegister src); 1306 void movups(XMMRegister dst, const Operand& src); 1307 void movups(const Operand& dst, XMMRegister src); 1308 void psrldq(XMMRegister dst, uint8_t shift); 1309 void pshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle); 1310 void pshufd(XMMRegister dst, const Operand& src, uint8_t shuffle); 1311 void cvtdq2ps(XMMRegister dst, XMMRegister src); 1312 void cvtdq2ps(XMMRegister dst, const Operand& src); 1313 1314 // AVX instruction 1315 void vfmadd132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1316 vfmasd(0x99, dst, src1, src2); 1317 } 1318 void vfmadd213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1319 vfmasd(0xa9, dst, src1, src2); 1320 } 1321 void vfmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1322 vfmasd(0xb9, dst, src1, src2); 1323 } 1324 void vfmadd132sd(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1325 vfmasd(0x99, dst, src1, src2); 1326 } 1327 void vfmadd213sd(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1328 vfmasd(0xa9, dst, src1, src2); 1329 } 1330 void vfmadd231sd(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1331 vfmasd(0xb9, dst, src1, src2); 1332 } 1333 void vfmsub132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1334 vfmasd(0x9b, dst, src1, src2); 1335 } 1336 void vfmsub213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1337 vfmasd(0xab, dst, src1, src2); 1338 } 1339 void vfmsub231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1340 vfmasd(0xbb, dst, src1, src2); 1341 } 1342 void vfmsub132sd(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1343 vfmasd(0x9b, dst, src1, src2); 1344 } 1345 void vfmsub213sd(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1346 vfmasd(0xab, dst, src1, src2); 1347 } 1348 void vfmsub231sd(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1349 vfmasd(0xbb, dst, src1, src2); 1350 } 1351 void vfnmadd132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1352 vfmasd(0x9d, dst, src1, src2); 1353 } 1354 void vfnmadd213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1355 vfmasd(0xad, dst, src1, src2); 1356 } 1357 void vfnmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1358 vfmasd(0xbd, dst, src1, src2); 1359 } 1360 void vfnmadd132sd(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1361 vfmasd(0x9d, dst, src1, src2); 1362 } 1363 void vfnmadd213sd(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1364 vfmasd(0xad, dst, src1, src2); 1365 } 1366 void vfnmadd231sd(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1367 vfmasd(0xbd, dst, src1, src2); 1368 } 1369 void vfnmsub132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1370 vfmasd(0x9f, dst, src1, src2); 1371 } 1372 void vfnmsub213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1373 vfmasd(0xaf, dst, src1, src2); 1374 } 1375 void vfnmsub231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1376 vfmasd(0xbf, dst, src1, src2); 1377 } 1378 void vfnmsub132sd(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1379 vfmasd(0x9f, dst, src1, src2); 1380 } 1381 void vfnmsub213sd(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1382 vfmasd(0xaf, dst, src1, src2); 1383 } 1384 void vfnmsub231sd(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1385 vfmasd(0xbf, dst, src1, src2); 1386 } 1387 void vfmasd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2); 1388 void vfmasd(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2); 1389 1390 void vfmadd132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1391 vfmass(0x99, dst, src1, src2); 1392 } 1393 void vfmadd213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1394 vfmass(0xa9, dst, src1, src2); 1395 } 1396 void vfmadd231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1397 vfmass(0xb9, dst, src1, src2); 1398 } 1399 void vfmadd132ss(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1400 vfmass(0x99, dst, src1, src2); 1401 } 1402 void vfmadd213ss(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1403 vfmass(0xa9, dst, src1, src2); 1404 } 1405 void vfmadd231ss(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1406 vfmass(0xb9, dst, src1, src2); 1407 } 1408 void vfmsub132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1409 vfmass(0x9b, dst, src1, src2); 1410 } 1411 void vfmsub213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1412 vfmass(0xab, dst, src1, src2); 1413 } 1414 void vfmsub231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1415 vfmass(0xbb, dst, src1, src2); 1416 } 1417 void vfmsub132ss(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1418 vfmass(0x9b, dst, src1, src2); 1419 } 1420 void vfmsub213ss(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1421 vfmass(0xab, dst, src1, src2); 1422 } 1423 void vfmsub231ss(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1424 vfmass(0xbb, dst, src1, src2); 1425 } 1426 void vfnmadd132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1427 vfmass(0x9d, dst, src1, src2); 1428 } 1429 void vfnmadd213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1430 vfmass(0xad, dst, src1, src2); 1431 } 1432 void vfnmadd231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1433 vfmass(0xbd, dst, src1, src2); 1434 } 1435 void vfnmadd132ss(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1436 vfmass(0x9d, dst, src1, src2); 1437 } 1438 void vfnmadd213ss(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1439 vfmass(0xad, dst, src1, src2); 1440 } 1441 void vfnmadd231ss(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1442 vfmass(0xbd, dst, src1, src2); 1443 } 1444 void vfnmsub132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1445 vfmass(0x9f, dst, src1, src2); 1446 } 1447 void vfnmsub213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1448 vfmass(0xaf, dst, src1, src2); 1449 } 1450 void vfnmsub231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1451 vfmass(0xbf, dst, src1, src2); 1452 } 1453 void vfnmsub132ss(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1454 vfmass(0x9f, dst, src1, src2); 1455 } 1456 void vfnmsub213ss(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1457 vfmass(0xaf, dst, src1, src2); 1458 } 1459 void vfnmsub231ss(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1460 vfmass(0xbf, dst, src1, src2); 1461 } 1462 void vfmass(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2); 1463 void vfmass(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2); 1464 1465 void vmovd(XMMRegister dst, Register src); 1466 void vmovd(XMMRegister dst, const Operand& src); 1467 void vmovd(Register dst, XMMRegister src); 1468 void vmovq(XMMRegister dst, Register src); 1469 void vmovq(XMMRegister dst, const Operand& src); 1470 void vmovq(Register dst, XMMRegister src); 1471 1472 void vmovsd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1473 vsd(0x10, dst, src1, src2); 1474 } 1475 void vmovsd(XMMRegister dst, const Operand& src) { 1476 vsd(0x10, dst, xmm0, src); 1477 } 1478 void vmovsd(const Operand& dst, XMMRegister src) { 1479 vsd(0x11, src, xmm0, dst); 1480 } 1481 1482#define AVX_SP_3(instr, opcode) \ 1483 AVX_S_3(instr, opcode) \ 1484 AVX_P_3(instr, opcode) 1485 1486#define AVX_S_3(instr, opcode) \ 1487 AVX_3(instr##ss, opcode, vss) \ 1488 AVX_3(instr##sd, opcode, vsd) 1489 1490#define AVX_P_3(instr, opcode) \ 1491 AVX_3(instr##ps, opcode, vps) \ 1492 AVX_3(instr##pd, opcode, vpd) 1493 1494#define AVX_3(instr, opcode, impl) \ 1495 void instr(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \ 1496 impl(opcode, dst, src1, src2); \ 1497 } \ 1498 void instr(XMMRegister dst, XMMRegister src1, const Operand& src2) { \ 1499 impl(opcode, dst, src1, src2); \ 1500 } 1501 1502 AVX_SP_3(vsqrt, 0x51); 1503 AVX_SP_3(vadd, 0x58); 1504 AVX_SP_3(vsub, 0x5c); 1505 AVX_SP_3(vmul, 0x59); 1506 AVX_SP_3(vdiv, 0x5e); 1507 AVX_SP_3(vmin, 0x5d); 1508 AVX_SP_3(vmax, 0x5f); 1509 AVX_P_3(vand, 0x54); 1510 AVX_P_3(vor, 0x56); 1511 AVX_P_3(vxor, 0x57); 1512 AVX_3(vcvtsd2ss, 0x5a, vsd); 1513 1514#undef AVX_3 1515#undef AVX_S_3 1516#undef AVX_P_3 1517#undef AVX_SP_3 1518 1519 void vpsrlq(XMMRegister dst, XMMRegister src, byte imm8) { 1520 XMMRegister iop = {2}; 1521 vpd(0x73, iop, dst, src); 1522 emit(imm8); 1523 } 1524 void vpsllq(XMMRegister dst, XMMRegister src, byte imm8) { 1525 XMMRegister iop = {6}; 1526 vpd(0x73, iop, dst, src); 1527 emit(imm8); 1528 } 1529 void vcvtss2sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1530 vinstr(0x5a, dst, src1, src2, kF3, k0F, kWIG); 1531 } 1532 void vcvtss2sd(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1533 vinstr(0x5a, dst, src1, src2, kF3, k0F, kWIG); 1534 } 1535 void vcvtlsi2sd(XMMRegister dst, XMMRegister src1, Register src2) { 1536 XMMRegister isrc2 = {src2.code()}; 1537 vinstr(0x2a, dst, src1, isrc2, kF2, k0F, kW0); 1538 } 1539 void vcvtlsi2sd(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1540 vinstr(0x2a, dst, src1, src2, kF2, k0F, kW0); 1541 } 1542 void vcvtlsi2ss(XMMRegister dst, XMMRegister src1, Register src2) { 1543 XMMRegister isrc2 = {src2.code()}; 1544 vinstr(0x2a, dst, src1, isrc2, kF3, k0F, kW0); 1545 } 1546 void vcvtlsi2ss(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1547 vinstr(0x2a, dst, src1, src2, kF3, k0F, kW0); 1548 } 1549 void vcvtqsi2ss(XMMRegister dst, XMMRegister src1, Register src2) { 1550 XMMRegister isrc2 = {src2.code()}; 1551 vinstr(0x2a, dst, src1, isrc2, kF3, k0F, kW1); 1552 } 1553 void vcvtqsi2ss(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1554 vinstr(0x2a, dst, src1, src2, kF3, k0F, kW1); 1555 } 1556 void vcvtqsi2sd(XMMRegister dst, XMMRegister src1, Register src2) { 1557 XMMRegister isrc2 = {src2.code()}; 1558 vinstr(0x2a, dst, src1, isrc2, kF2, k0F, kW1); 1559 } 1560 void vcvtqsi2sd(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1561 vinstr(0x2a, dst, src1, src2, kF2, k0F, kW1); 1562 } 1563 void vcvttss2si(Register dst, XMMRegister src) { 1564 XMMRegister idst = {dst.code()}; 1565 vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW0); 1566 } 1567 void vcvttss2si(Register dst, const Operand& src) { 1568 XMMRegister idst = {dst.code()}; 1569 vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW0); 1570 } 1571 void vcvttsd2si(Register dst, XMMRegister src) { 1572 XMMRegister idst = {dst.code()}; 1573 vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW0); 1574 } 1575 void vcvttsd2si(Register dst, const Operand& src) { 1576 XMMRegister idst = {dst.code()}; 1577 vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW0); 1578 } 1579 void vcvttss2siq(Register dst, XMMRegister src) { 1580 XMMRegister idst = {dst.code()}; 1581 vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW1); 1582 } 1583 void vcvttss2siq(Register dst, const Operand& src) { 1584 XMMRegister idst = {dst.code()}; 1585 vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW1); 1586 } 1587 void vcvttsd2siq(Register dst, XMMRegister src) { 1588 XMMRegister idst = {dst.code()}; 1589 vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW1); 1590 } 1591 void vcvttsd2siq(Register dst, const Operand& src) { 1592 XMMRegister idst = {dst.code()}; 1593 vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW1); 1594 } 1595 void vcvtsd2si(Register dst, XMMRegister src) { 1596 XMMRegister idst = {dst.code()}; 1597 vinstr(0x2d, idst, xmm0, src, kF2, k0F, kW0); 1598 } 1599 void vucomisd(XMMRegister dst, XMMRegister src) { 1600 vinstr(0x2e, dst, xmm0, src, k66, k0F, kWIG); 1601 } 1602 void vucomisd(XMMRegister dst, const Operand& src) { 1603 vinstr(0x2e, dst, xmm0, src, k66, k0F, kWIG); 1604 } 1605 void vroundss(XMMRegister dst, XMMRegister src1, XMMRegister src2, 1606 RoundingMode mode) { 1607 vinstr(0x0a, dst, src1, src2, k66, k0F3A, kWIG); 1608 emit(static_cast<byte>(mode) | 0x8); // Mask precision exception. 1609 } 1610 void vroundsd(XMMRegister dst, XMMRegister src1, XMMRegister src2, 1611 RoundingMode mode) { 1612 vinstr(0x0b, dst, src1, src2, k66, k0F3A, kWIG); 1613 emit(static_cast<byte>(mode) | 0x8); // Mask precision exception. 1614 } 1615 1616 void vsd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1617 vinstr(op, dst, src1, src2, kF2, k0F, kWIG); 1618 } 1619 void vsd(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2) { 1620 vinstr(op, dst, src1, src2, kF2, k0F, kWIG); 1621 } 1622 1623 void vmovss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1624 vss(0x10, dst, src1, src2); 1625 } 1626 void vmovss(XMMRegister dst, const Operand& src) { 1627 vss(0x10, dst, xmm0, src); 1628 } 1629 void vmovss(const Operand& dst, XMMRegister src) { 1630 vss(0x11, src, xmm0, dst); 1631 } 1632 void vucomiss(XMMRegister dst, XMMRegister src); 1633 void vucomiss(XMMRegister dst, const Operand& src); 1634 void vss(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2); 1635 void vss(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2); 1636 1637 void vmovaps(XMMRegister dst, XMMRegister src) { vps(0x28, dst, xmm0, src); } 1638 void vmovups(XMMRegister dst, XMMRegister src) { vps(0x10, dst, xmm0, src); } 1639 void vmovups(XMMRegister dst, const Operand& src) { 1640 vps(0x10, dst, xmm0, src); 1641 } 1642 void vmovups(const Operand& dst, XMMRegister src) { 1643 vps(0x11, src, xmm0, dst); 1644 } 1645 void vmovapd(XMMRegister dst, XMMRegister src) { vpd(0x28, dst, xmm0, src); } 1646 void vmovupd(XMMRegister dst, const Operand& src) { 1647 vpd(0x10, dst, xmm0, src); 1648 } 1649 void vmovupd(const Operand& dst, XMMRegister src) { 1650 vpd(0x11, src, xmm0, dst); 1651 } 1652 void vmovmskps(Register dst, XMMRegister src) { 1653 XMMRegister idst = {dst.code()}; 1654 vps(0x50, idst, xmm0, src); 1655 } 1656 void vmovmskpd(Register dst, XMMRegister src) { 1657 XMMRegister idst = {dst.code()}; 1658 vpd(0x50, idst, xmm0, src); 1659 } 1660 void vcmpps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int8_t cmp) { 1661 vps(0xC2, dst, src1, src2); 1662 emit(cmp); 1663 } 1664 void vcmpps(XMMRegister dst, XMMRegister src1, const Operand& src2, 1665 int8_t cmp) { 1666 vps(0xC2, dst, src1, src2); 1667 emit(cmp); 1668 } 1669 void vcmppd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int8_t cmp) { 1670 vpd(0xC2, dst, src1, src2); 1671 emit(cmp); 1672 } 1673 void vcmppd(XMMRegister dst, XMMRegister src1, const Operand& src2, 1674 int8_t cmp) { 1675 vpd(0xC2, dst, src1, src2); 1676 emit(cmp); 1677 } 1678 1679#define AVX_CMP_P(instr, imm8) \ 1680 void instr##ps(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \ 1681 vcmpps(dst, src1, src2, imm8); \ 1682 } \ 1683 void instr##ps(XMMRegister dst, XMMRegister src1, const Operand& src2) { \ 1684 vcmpps(dst, src1, src2, imm8); \ 1685 } \ 1686 void instr##pd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \ 1687 vcmppd(dst, src1, src2, imm8); \ 1688 } \ 1689 void instr##pd(XMMRegister dst, XMMRegister src1, const Operand& src2) { \ 1690 vcmppd(dst, src1, src2, imm8); \ 1691 } 1692 1693 AVX_CMP_P(vcmpeq, 0x0); 1694 AVX_CMP_P(vcmplt, 0x1); 1695 AVX_CMP_P(vcmple, 0x2); 1696 AVX_CMP_P(vcmpneq, 0x4); 1697 AVX_CMP_P(vcmpnlt, 0x5); 1698 AVX_CMP_P(vcmpnle, 0x6); 1699 1700#undef AVX_CMP_P 1701 1702 void vlddqu(XMMRegister dst, const Operand& src) { 1703 vinstr(0xF0, dst, xmm0, src, kF2, k0F, kWIG); 1704 } 1705 void vpsllw(XMMRegister dst, XMMRegister src, int8_t imm8) { 1706 XMMRegister iop = {6}; 1707 vinstr(0x71, iop, dst, src, k66, k0F, kWIG); 1708 emit(imm8); 1709 } 1710 void vpsrlw(XMMRegister dst, XMMRegister src, int8_t imm8) { 1711 XMMRegister iop = {2}; 1712 vinstr(0x71, iop, dst, src, k66, k0F, kWIG); 1713 emit(imm8); 1714 } 1715 void vpsraw(XMMRegister dst, XMMRegister src, int8_t imm8) { 1716 XMMRegister iop = {4}; 1717 vinstr(0x71, iop, dst, src, k66, k0F, kWIG); 1718 emit(imm8); 1719 } 1720 void vpslld(XMMRegister dst, XMMRegister src, int8_t imm8) { 1721 XMMRegister iop = {6}; 1722 vinstr(0x72, iop, dst, src, k66, k0F, kWIG); 1723 emit(imm8); 1724 } 1725 void vpsrld(XMMRegister dst, XMMRegister src, int8_t imm8) { 1726 XMMRegister iop = {2}; 1727 vinstr(0x72, iop, dst, src, k66, k0F, kWIG); 1728 emit(imm8); 1729 } 1730 void vpsrad(XMMRegister dst, XMMRegister src, int8_t imm8) { 1731 XMMRegister iop = {4}; 1732 vinstr(0x72, iop, dst, src, k66, k0F, kWIG); 1733 emit(imm8); 1734 } 1735 void vpextrb(Register dst, XMMRegister src, int8_t imm8) { 1736 XMMRegister idst = {dst.code()}; 1737 vinstr(0x14, src, xmm0, idst, k66, k0F3A, kW0); 1738 emit(imm8); 1739 } 1740 void vpextrb(const Operand& dst, XMMRegister src, int8_t imm8) { 1741 vinstr(0x14, src, xmm0, dst, k66, k0F3A, kW0); 1742 emit(imm8); 1743 } 1744 void vpextrw(Register dst, XMMRegister src, int8_t imm8) { 1745 XMMRegister idst = {dst.code()}; 1746 vinstr(0xc5, idst, xmm0, src, k66, k0F, kW0); 1747 emit(imm8); 1748 } 1749 void vpextrw(const Operand& dst, XMMRegister src, int8_t imm8) { 1750 vinstr(0x15, src, xmm0, dst, k66, k0F3A, kW0); 1751 emit(imm8); 1752 } 1753 void vpextrd(Register dst, XMMRegister src, int8_t imm8) { 1754 XMMRegister idst = {dst.code()}; 1755 vinstr(0x16, src, xmm0, idst, k66, k0F3A, kW0); 1756 emit(imm8); 1757 } 1758 void vpextrd(const Operand& dst, XMMRegister src, int8_t imm8) { 1759 vinstr(0x16, src, xmm0, dst, k66, k0F3A, kW0); 1760 emit(imm8); 1761 } 1762 void vpinsrb(XMMRegister dst, XMMRegister src1, Register src2, int8_t imm8) { 1763 XMMRegister isrc = {src2.code()}; 1764 vinstr(0x20, dst, src1, isrc, k66, k0F3A, kW0); 1765 emit(imm8); 1766 } 1767 void vpinsrb(XMMRegister dst, XMMRegister src1, const Operand& src2, 1768 int8_t imm8) { 1769 vinstr(0x20, dst, src1, src2, k66, k0F3A, kW0); 1770 emit(imm8); 1771 } 1772 void vpinsrw(XMMRegister dst, XMMRegister src1, Register src2, int8_t imm8) { 1773 XMMRegister isrc = {src2.code()}; 1774 vinstr(0xc4, dst, src1, isrc, k66, k0F, kW0); 1775 emit(imm8); 1776 } 1777 void vpinsrw(XMMRegister dst, XMMRegister src1, const Operand& src2, 1778 int8_t imm8) { 1779 vinstr(0xc4, dst, src1, src2, k66, k0F, kW0); 1780 emit(imm8); 1781 } 1782 void vpinsrd(XMMRegister dst, XMMRegister src1, Register src2, int8_t imm8) { 1783 XMMRegister isrc = {src2.code()}; 1784 vinstr(0x22, dst, src1, isrc, k66, k0F3A, kW0); 1785 emit(imm8); 1786 } 1787 void vpinsrd(XMMRegister dst, XMMRegister src1, const Operand& src2, 1788 int8_t imm8) { 1789 vinstr(0x22, dst, src1, src2, k66, k0F3A, kW0); 1790 emit(imm8); 1791 } 1792 void vpshufd(XMMRegister dst, XMMRegister src, int8_t imm8) { 1793 vinstr(0x70, dst, xmm0, src, k66, k0F, kWIG); 1794 emit(imm8); 1795 } 1796 1797 void vps(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2); 1798 void vps(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2); 1799 void vpd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2); 1800 void vpd(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2); 1801 1802 // BMI instruction 1803 void andnq(Register dst, Register src1, Register src2) { 1804 bmi1q(0xf2, dst, src1, src2); 1805 } 1806 void andnq(Register dst, Register src1, const Operand& src2) { 1807 bmi1q(0xf2, dst, src1, src2); 1808 } 1809 void andnl(Register dst, Register src1, Register src2) { 1810 bmi1l(0xf2, dst, src1, src2); 1811 } 1812 void andnl(Register dst, Register src1, const Operand& src2) { 1813 bmi1l(0xf2, dst, src1, src2); 1814 } 1815 void bextrq(Register dst, Register src1, Register src2) { 1816 bmi1q(0xf7, dst, src2, src1); 1817 } 1818 void bextrq(Register dst, const Operand& src1, Register src2) { 1819 bmi1q(0xf7, dst, src2, src1); 1820 } 1821 void bextrl(Register dst, Register src1, Register src2) { 1822 bmi1l(0xf7, dst, src2, src1); 1823 } 1824 void bextrl(Register dst, const Operand& src1, Register src2) { 1825 bmi1l(0xf7, dst, src2, src1); 1826 } 1827 void blsiq(Register dst, Register src) { 1828 Register ireg = {3}; 1829 bmi1q(0xf3, ireg, dst, src); 1830 } 1831 void blsiq(Register dst, const Operand& src) { 1832 Register ireg = {3}; 1833 bmi1q(0xf3, ireg, dst, src); 1834 } 1835 void blsil(Register dst, Register src) { 1836 Register ireg = {3}; 1837 bmi1l(0xf3, ireg, dst, src); 1838 } 1839 void blsil(Register dst, const Operand& src) { 1840 Register ireg = {3}; 1841 bmi1l(0xf3, ireg, dst, src); 1842 } 1843 void blsmskq(Register dst, Register src) { 1844 Register ireg = {2}; 1845 bmi1q(0xf3, ireg, dst, src); 1846 } 1847 void blsmskq(Register dst, const Operand& src) { 1848 Register ireg = {2}; 1849 bmi1q(0xf3, ireg, dst, src); 1850 } 1851 void blsmskl(Register dst, Register src) { 1852 Register ireg = {2}; 1853 bmi1l(0xf3, ireg, dst, src); 1854 } 1855 void blsmskl(Register dst, const Operand& src) { 1856 Register ireg = {2}; 1857 bmi1l(0xf3, ireg, dst, src); 1858 } 1859 void blsrq(Register dst, Register src) { 1860 Register ireg = {1}; 1861 bmi1q(0xf3, ireg, dst, src); 1862 } 1863 void blsrq(Register dst, const Operand& src) { 1864 Register ireg = {1}; 1865 bmi1q(0xf3, ireg, dst, src); 1866 } 1867 void blsrl(Register dst, Register src) { 1868 Register ireg = {1}; 1869 bmi1l(0xf3, ireg, dst, src); 1870 } 1871 void blsrl(Register dst, const Operand& src) { 1872 Register ireg = {1}; 1873 bmi1l(0xf3, ireg, dst, src); 1874 } 1875 void tzcntq(Register dst, Register src); 1876 void tzcntq(Register dst, const Operand& src); 1877 void tzcntl(Register dst, Register src); 1878 void tzcntl(Register dst, const Operand& src); 1879 1880 void lzcntq(Register dst, Register src); 1881 void lzcntq(Register dst, const Operand& src); 1882 void lzcntl(Register dst, Register src); 1883 void lzcntl(Register dst, const Operand& src); 1884 1885 void popcntq(Register dst, Register src); 1886 void popcntq(Register dst, const Operand& src); 1887 void popcntl(Register dst, Register src); 1888 void popcntl(Register dst, const Operand& src); 1889 1890 void bzhiq(Register dst, Register src1, Register src2) { 1891 bmi2q(kNone, 0xf5, dst, src2, src1); 1892 } 1893 void bzhiq(Register dst, const Operand& src1, Register src2) { 1894 bmi2q(kNone, 0xf5, dst, src2, src1); 1895 } 1896 void bzhil(Register dst, Register src1, Register src2) { 1897 bmi2l(kNone, 0xf5, dst, src2, src1); 1898 } 1899 void bzhil(Register dst, const Operand& src1, Register src2) { 1900 bmi2l(kNone, 0xf5, dst, src2, src1); 1901 } 1902 void mulxq(Register dst1, Register dst2, Register src) { 1903 bmi2q(kF2, 0xf6, dst1, dst2, src); 1904 } 1905 void mulxq(Register dst1, Register dst2, const Operand& src) { 1906 bmi2q(kF2, 0xf6, dst1, dst2, src); 1907 } 1908 void mulxl(Register dst1, Register dst2, Register src) { 1909 bmi2l(kF2, 0xf6, dst1, dst2, src); 1910 } 1911 void mulxl(Register dst1, Register dst2, const Operand& src) { 1912 bmi2l(kF2, 0xf6, dst1, dst2, src); 1913 } 1914 void pdepq(Register dst, Register src1, Register src2) { 1915 bmi2q(kF2, 0xf5, dst, src1, src2); 1916 } 1917 void pdepq(Register dst, Register src1, const Operand& src2) { 1918 bmi2q(kF2, 0xf5, dst, src1, src2); 1919 } 1920 void pdepl(Register dst, Register src1, Register src2) { 1921 bmi2l(kF2, 0xf5, dst, src1, src2); 1922 } 1923 void pdepl(Register dst, Register src1, const Operand& src2) { 1924 bmi2l(kF2, 0xf5, dst, src1, src2); 1925 } 1926 void pextq(Register dst, Register src1, Register src2) { 1927 bmi2q(kF3, 0xf5, dst, src1, src2); 1928 } 1929 void pextq(Register dst, Register src1, const Operand& src2) { 1930 bmi2q(kF3, 0xf5, dst, src1, src2); 1931 } 1932 void pextl(Register dst, Register src1, Register src2) { 1933 bmi2l(kF3, 0xf5, dst, src1, src2); 1934 } 1935 void pextl(Register dst, Register src1, const Operand& src2) { 1936 bmi2l(kF3, 0xf5, dst, src1, src2); 1937 } 1938 void sarxq(Register dst, Register src1, Register src2) { 1939 bmi2q(kF3, 0xf7, dst, src2, src1); 1940 } 1941 void sarxq(Register dst, const Operand& src1, Register src2) { 1942 bmi2q(kF3, 0xf7, dst, src2, src1); 1943 } 1944 void sarxl(Register dst, Register src1, Register src2) { 1945 bmi2l(kF3, 0xf7, dst, src2, src1); 1946 } 1947 void sarxl(Register dst, const Operand& src1, Register src2) { 1948 bmi2l(kF3, 0xf7, dst, src2, src1); 1949 } 1950 void shlxq(Register dst, Register src1, Register src2) { 1951 bmi2q(k66, 0xf7, dst, src2, src1); 1952 } 1953 void shlxq(Register dst, const Operand& src1, Register src2) { 1954 bmi2q(k66, 0xf7, dst, src2, src1); 1955 } 1956 void shlxl(Register dst, Register src1, Register src2) { 1957 bmi2l(k66, 0xf7, dst, src2, src1); 1958 } 1959 void shlxl(Register dst, const Operand& src1, Register src2) { 1960 bmi2l(k66, 0xf7, dst, src2, src1); 1961 } 1962 void shrxq(Register dst, Register src1, Register src2) { 1963 bmi2q(kF2, 0xf7, dst, src2, src1); 1964 } 1965 void shrxq(Register dst, const Operand& src1, Register src2) { 1966 bmi2q(kF2, 0xf7, dst, src2, src1); 1967 } 1968 void shrxl(Register dst, Register src1, Register src2) { 1969 bmi2l(kF2, 0xf7, dst, src2, src1); 1970 } 1971 void shrxl(Register dst, const Operand& src1, Register src2) { 1972 bmi2l(kF2, 0xf7, dst, src2, src1); 1973 } 1974 void rorxq(Register dst, Register src, byte imm8); 1975 void rorxq(Register dst, const Operand& src, byte imm8); 1976 void rorxl(Register dst, Register src, byte imm8); 1977 void rorxl(Register dst, const Operand& src, byte imm8); 1978 1979 // Check the code size generated from label to here. 1980 int SizeOfCodeGeneratedSince(Label* label) { 1981 return pc_offset() - label->pos(); 1982 } 1983 1984 // Mark generator continuation. 1985 void RecordGeneratorContinuation(); 1986 1987 // Mark address of a debug break slot. 1988 void RecordDebugBreakSlot(RelocInfo::Mode mode); 1989 1990 // Record a comment relocation entry that can be used by a disassembler. 1991 // Use --code-comments to enable. 1992 void RecordComment(const char* msg); 1993 1994 // Record a deoptimization reason that can be used by a log or cpu profiler. 1995 // Use --trace-deopt to enable. 1996 void RecordDeoptReason(DeoptimizeReason reason, SourcePosition position, 1997 int id); 1998 1999 void PatchConstantPoolAccessInstruction(int pc_offset, int offset, 2000 ConstantPoolEntry::Access access, 2001 ConstantPoolEntry::Type type) { 2002 // No embedded constant pool support. 2003 UNREACHABLE(); 2004 } 2005 2006 // Writes a single word of data in the code stream. 2007 // Used for inline tables, e.g., jump-tables. 2008 void db(uint8_t data); 2009 void dd(uint32_t data); 2010 void dq(uint64_t data); 2011 void dp(uintptr_t data) { dq(data); } 2012 void dq(Label* label); 2013 2014 // Check if there is less than kGap bytes available in the buffer. 2015 // If this is the case, we need to grow the buffer before emitting 2016 // an instruction or relocation information. 2017 inline bool buffer_overflow() const { 2018 return pc_ >= reloc_info_writer.pos() - kGap; 2019 } 2020 2021 // Get the number of bytes available in the buffer. 2022 inline int available_space() const { 2023 return static_cast<int>(reloc_info_writer.pos() - pc_); 2024 } 2025 2026 static bool IsNop(Address addr); 2027 2028 // Avoid overflows for displacements etc. 2029 static const int kMaximalBufferSize = 512*MB; 2030 2031 byte byte_at(int pos) { return buffer_[pos]; } 2032 void set_byte_at(int pos, byte value) { buffer_[pos] = value; } 2033 2034 Address pc() const { return pc_; } 2035 2036 protected: 2037 // Call near indirect 2038 void call(const Operand& operand); 2039 2040 private: 2041 byte* addr_at(int pos) { return buffer_ + pos; } 2042 uint32_t long_at(int pos) { 2043 return *reinterpret_cast<uint32_t*>(addr_at(pos)); 2044 } 2045 void long_at_put(int pos, uint32_t x) { 2046 *reinterpret_cast<uint32_t*>(addr_at(pos)) = x; 2047 } 2048 2049 // code emission 2050 void GrowBuffer(); 2051 2052 void emit(byte x) { *pc_++ = x; } 2053 inline void emitl(uint32_t x); 2054 inline void emitp(void* x, RelocInfo::Mode rmode); 2055 inline void emitq(uint64_t x); 2056 inline void emitw(uint16_t x); 2057 inline void emit_code_target(Handle<Code> target, 2058 RelocInfo::Mode rmode, 2059 TypeFeedbackId ast_id = TypeFeedbackId::None()); 2060 inline void emit_runtime_entry(Address entry, RelocInfo::Mode rmode); 2061 void emit(Immediate x) { 2062 if (!RelocInfo::IsNone(x.rmode_)) { 2063 RecordRelocInfo(x.rmode_); 2064 } 2065 emitl(x.value_); 2066 } 2067 2068 // Emits a REX prefix that encodes a 64-bit operand size and 2069 // the top bit of both register codes. 2070 // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B. 2071 // REX.W is set. 2072 inline void emit_rex_64(XMMRegister reg, Register rm_reg); 2073 inline void emit_rex_64(Register reg, XMMRegister rm_reg); 2074 inline void emit_rex_64(Register reg, Register rm_reg); 2075 2076 // Emits a REX prefix that encodes a 64-bit operand size and 2077 // the top bit of the destination, index, and base register codes. 2078 // The high bit of reg is used for REX.R, the high bit of op's base 2079 // register is used for REX.B, and the high bit of op's index register 2080 // is used for REX.X. REX.W is set. 2081 inline void emit_rex_64(Register reg, const Operand& op); 2082 inline void emit_rex_64(XMMRegister reg, const Operand& op); 2083 2084 // Emits a REX prefix that encodes a 64-bit operand size and 2085 // the top bit of the register code. 2086 // The high bit of register is used for REX.B. 2087 // REX.W is set and REX.R and REX.X are clear. 2088 inline void emit_rex_64(Register rm_reg); 2089 2090 // Emits a REX prefix that encodes a 64-bit operand size and 2091 // the top bit of the index and base register codes. 2092 // The high bit of op's base register is used for REX.B, and the high 2093 // bit of op's index register is used for REX.X. 2094 // REX.W is set and REX.R clear. 2095 inline void emit_rex_64(const Operand& op); 2096 2097 // Emit a REX prefix that only sets REX.W to choose a 64-bit operand size. 2098 void emit_rex_64() { emit(0x48); } 2099 2100 // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B. 2101 // REX.W is clear. 2102 inline void emit_rex_32(Register reg, Register rm_reg); 2103 2104 // The high bit of reg is used for REX.R, the high bit of op's base 2105 // register is used for REX.B, and the high bit of op's index register 2106 // is used for REX.X. REX.W is cleared. 2107 inline void emit_rex_32(Register reg, const Operand& op); 2108 2109 // High bit of rm_reg goes to REX.B. 2110 // REX.W, REX.R and REX.X are clear. 2111 inline void emit_rex_32(Register rm_reg); 2112 2113 // High bit of base goes to REX.B and high bit of index to REX.X. 2114 // REX.W and REX.R are clear. 2115 inline void emit_rex_32(const Operand& op); 2116 2117 // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B. 2118 // REX.W is cleared. If no REX bits are set, no byte is emitted. 2119 inline void emit_optional_rex_32(Register reg, Register rm_reg); 2120 2121 // The high bit of reg is used for REX.R, the high bit of op's base 2122 // register is used for REX.B, and the high bit of op's index register 2123 // is used for REX.X. REX.W is cleared. If no REX bits are set, nothing 2124 // is emitted. 2125 inline void emit_optional_rex_32(Register reg, const Operand& op); 2126 2127 // As for emit_optional_rex_32(Register, Register), except that 2128 // the registers are XMM registers. 2129 inline void emit_optional_rex_32(XMMRegister reg, XMMRegister base); 2130 2131 // As for emit_optional_rex_32(Register, Register), except that 2132 // one of the registers is an XMM registers. 2133 inline void emit_optional_rex_32(XMMRegister reg, Register base); 2134 2135 // As for emit_optional_rex_32(Register, Register), except that 2136 // one of the registers is an XMM registers. 2137 inline void emit_optional_rex_32(Register reg, XMMRegister base); 2138 2139 // As for emit_optional_rex_32(Register, const Operand&), except that 2140 // the register is an XMM register. 2141 inline void emit_optional_rex_32(XMMRegister reg, const Operand& op); 2142 2143 // Optionally do as emit_rex_32(Register) if the register number has 2144 // the high bit set. 2145 inline void emit_optional_rex_32(Register rm_reg); 2146 inline void emit_optional_rex_32(XMMRegister rm_reg); 2147 2148 // Optionally do as emit_rex_32(const Operand&) if the operand register 2149 // numbers have a high bit set. 2150 inline void emit_optional_rex_32(const Operand& op); 2151 2152 void emit_rex(int size) { 2153 if (size == kInt64Size) { 2154 emit_rex_64(); 2155 } else { 2156 DCHECK(size == kInt32Size); 2157 } 2158 } 2159 2160 template<class P1> 2161 void emit_rex(P1 p1, int size) { 2162 if (size == kInt64Size) { 2163 emit_rex_64(p1); 2164 } else { 2165 DCHECK(size == kInt32Size); 2166 emit_optional_rex_32(p1); 2167 } 2168 } 2169 2170 template<class P1, class P2> 2171 void emit_rex(P1 p1, P2 p2, int size) { 2172 if (size == kInt64Size) { 2173 emit_rex_64(p1, p2); 2174 } else { 2175 DCHECK(size == kInt32Size); 2176 emit_optional_rex_32(p1, p2); 2177 } 2178 } 2179 2180 // Emit vex prefix 2181 void emit_vex2_byte0() { emit(0xc5); } 2182 inline void emit_vex2_byte1(XMMRegister reg, XMMRegister v, VectorLength l, 2183 SIMDPrefix pp); 2184 void emit_vex3_byte0() { emit(0xc4); } 2185 inline void emit_vex3_byte1(XMMRegister reg, XMMRegister rm, LeadingOpcode m); 2186 inline void emit_vex3_byte1(XMMRegister reg, const Operand& rm, 2187 LeadingOpcode m); 2188 inline void emit_vex3_byte2(VexW w, XMMRegister v, VectorLength l, 2189 SIMDPrefix pp); 2190 inline void emit_vex_prefix(XMMRegister reg, XMMRegister v, XMMRegister rm, 2191 VectorLength l, SIMDPrefix pp, LeadingOpcode m, 2192 VexW w); 2193 inline void emit_vex_prefix(Register reg, Register v, Register rm, 2194 VectorLength l, SIMDPrefix pp, LeadingOpcode m, 2195 VexW w); 2196 inline void emit_vex_prefix(XMMRegister reg, XMMRegister v, const Operand& rm, 2197 VectorLength l, SIMDPrefix pp, LeadingOpcode m, 2198 VexW w); 2199 inline void emit_vex_prefix(Register reg, Register v, const Operand& rm, 2200 VectorLength l, SIMDPrefix pp, LeadingOpcode m, 2201 VexW w); 2202 2203 // Emit the ModR/M byte, and optionally the SIB byte and 2204 // 1- or 4-byte offset for a memory operand. Also encodes 2205 // the second operand of the operation, a register or operation 2206 // subcode, into the reg field of the ModR/M byte. 2207 void emit_operand(Register reg, const Operand& adr) { 2208 emit_operand(reg.low_bits(), adr); 2209 } 2210 2211 // Emit the ModR/M byte, and optionally the SIB byte and 2212 // 1- or 4-byte offset for a memory operand. Also used to encode 2213 // a three-bit opcode extension into the ModR/M byte. 2214 void emit_operand(int rm, const Operand& adr); 2215 2216 // Emit a ModR/M byte with registers coded in the reg and rm_reg fields. 2217 void emit_modrm(Register reg, Register rm_reg) { 2218 emit(0xC0 | reg.low_bits() << 3 | rm_reg.low_bits()); 2219 } 2220 2221 // Emit a ModR/M byte with an operation subcode in the reg field and 2222 // a register in the rm_reg field. 2223 void emit_modrm(int code, Register rm_reg) { 2224 DCHECK(is_uint3(code)); 2225 emit(0xC0 | code << 3 | rm_reg.low_bits()); 2226 } 2227 2228 // Emit the code-object-relative offset of the label's position 2229 inline void emit_code_relative_offset(Label* label); 2230 2231 // The first argument is the reg field, the second argument is the r/m field. 2232 void emit_sse_operand(XMMRegister dst, XMMRegister src); 2233 void emit_sse_operand(XMMRegister reg, const Operand& adr); 2234 void emit_sse_operand(Register reg, const Operand& adr); 2235 void emit_sse_operand(XMMRegister dst, Register src); 2236 void emit_sse_operand(Register dst, XMMRegister src); 2237 void emit_sse_operand(XMMRegister dst); 2238 2239 // Emit machine code for one of the operations ADD, ADC, SUB, SBC, 2240 // AND, OR, XOR, or CMP. The encodings of these operations are all 2241 // similar, differing just in the opcode or in the reg field of the 2242 // ModR/M byte. 2243 void arithmetic_op_8(byte opcode, Register reg, Register rm_reg); 2244 void arithmetic_op_8(byte opcode, Register reg, const Operand& rm_reg); 2245 void arithmetic_op_16(byte opcode, Register reg, Register rm_reg); 2246 void arithmetic_op_16(byte opcode, Register reg, const Operand& rm_reg); 2247 // Operate on operands/registers with pointer size, 32-bit or 64-bit size. 2248 void arithmetic_op(byte opcode, Register reg, Register rm_reg, int size); 2249 void arithmetic_op(byte opcode, 2250 Register reg, 2251 const Operand& rm_reg, 2252 int size); 2253 // Operate on a byte in memory or register. 2254 void immediate_arithmetic_op_8(byte subcode, 2255 Register dst, 2256 Immediate src); 2257 void immediate_arithmetic_op_8(byte subcode, 2258 const Operand& dst, 2259 Immediate src); 2260 // Operate on a word in memory or register. 2261 void immediate_arithmetic_op_16(byte subcode, 2262 Register dst, 2263 Immediate src); 2264 void immediate_arithmetic_op_16(byte subcode, 2265 const Operand& dst, 2266 Immediate src); 2267 // Operate on operands/registers with pointer size, 32-bit or 64-bit size. 2268 void immediate_arithmetic_op(byte subcode, 2269 Register dst, 2270 Immediate src, 2271 int size); 2272 void immediate_arithmetic_op(byte subcode, 2273 const Operand& dst, 2274 Immediate src, 2275 int size); 2276 2277 // Emit machine code for a shift operation. 2278 void shift(Operand dst, Immediate shift_amount, int subcode, int size); 2279 void shift(Register dst, Immediate shift_amount, int subcode, int size); 2280 // Shift dst by cl % 64 bits. 2281 void shift(Register dst, int subcode, int size); 2282 void shift(Operand dst, int subcode, int size); 2283 2284 void emit_farith(int b1, int b2, int i); 2285 2286 // labels 2287 // void print(Label* L); 2288 void bind_to(Label* L, int pos); 2289 2290 // record reloc info for current pc_ 2291 void RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data = 0); 2292 2293 // Arithmetics 2294 void emit_add(Register dst, Register src, int size) { 2295 arithmetic_op(0x03, dst, src, size); 2296 } 2297 2298 void emit_add(Register dst, Immediate src, int size) { 2299 immediate_arithmetic_op(0x0, dst, src, size); 2300 } 2301 2302 void emit_add(Register dst, const Operand& src, int size) { 2303 arithmetic_op(0x03, dst, src, size); 2304 } 2305 2306 void emit_add(const Operand& dst, Register src, int size) { 2307 arithmetic_op(0x1, src, dst, size); 2308 } 2309 2310 void emit_add(const Operand& dst, Immediate src, int size) { 2311 immediate_arithmetic_op(0x0, dst, src, size); 2312 } 2313 2314 void emit_and(Register dst, Register src, int size) { 2315 arithmetic_op(0x23, dst, src, size); 2316 } 2317 2318 void emit_and(Register dst, const Operand& src, int size) { 2319 arithmetic_op(0x23, dst, src, size); 2320 } 2321 2322 void emit_and(const Operand& dst, Register src, int size) { 2323 arithmetic_op(0x21, src, dst, size); 2324 } 2325 2326 void emit_and(Register dst, Immediate src, int size) { 2327 immediate_arithmetic_op(0x4, dst, src, size); 2328 } 2329 2330 void emit_and(const Operand& dst, Immediate src, int size) { 2331 immediate_arithmetic_op(0x4, dst, src, size); 2332 } 2333 2334 void emit_cmp(Register dst, Register src, int size) { 2335 arithmetic_op(0x3B, dst, src, size); 2336 } 2337 2338 void emit_cmp(Register dst, const Operand& src, int size) { 2339 arithmetic_op(0x3B, dst, src, size); 2340 } 2341 2342 void emit_cmp(const Operand& dst, Register src, int size) { 2343 arithmetic_op(0x39, src, dst, size); 2344 } 2345 2346 void emit_cmp(Register dst, Immediate src, int size) { 2347 immediate_arithmetic_op(0x7, dst, src, size); 2348 } 2349 2350 void emit_cmp(const Operand& dst, Immediate src, int size) { 2351 immediate_arithmetic_op(0x7, dst, src, size); 2352 } 2353 2354 // Compare {al,ax,eax,rax} with src. If equal, set ZF and write dst into 2355 // src. Otherwise clear ZF and write src into {al,ax,eax,rax}. This 2356 // operation is only atomic if prefixed by the lock instruction. 2357 void emit_cmpxchg(const Operand& dst, Register src, int size); 2358 2359 void emit_dec(Register dst, int size); 2360 void emit_dec(const Operand& dst, int size); 2361 2362 // Divide rdx:rax by src. Quotient in rax, remainder in rdx when size is 64. 2363 // Divide edx:eax by lower 32 bits of src. Quotient in eax, remainder in edx 2364 // when size is 32. 2365 void emit_idiv(Register src, int size); 2366 void emit_div(Register src, int size); 2367 2368 // Signed multiply instructions. 2369 // rdx:rax = rax * src when size is 64 or edx:eax = eax * src when size is 32. 2370 void emit_imul(Register src, int size); 2371 void emit_imul(const Operand& src, int size); 2372 void emit_imul(Register dst, Register src, int size); 2373 void emit_imul(Register dst, const Operand& src, int size); 2374 void emit_imul(Register dst, Register src, Immediate imm, int size); 2375 void emit_imul(Register dst, const Operand& src, Immediate imm, int size); 2376 2377 void emit_inc(Register dst, int size); 2378 void emit_inc(const Operand& dst, int size); 2379 2380 void emit_lea(Register dst, const Operand& src, int size); 2381 2382 void emit_mov(Register dst, const Operand& src, int size); 2383 void emit_mov(Register dst, Register src, int size); 2384 void emit_mov(const Operand& dst, Register src, int size); 2385 void emit_mov(Register dst, Immediate value, int size); 2386 void emit_mov(const Operand& dst, Immediate value, int size); 2387 2388 void emit_movzxb(Register dst, const Operand& src, int size); 2389 void emit_movzxb(Register dst, Register src, int size); 2390 void emit_movzxw(Register dst, const Operand& src, int size); 2391 void emit_movzxw(Register dst, Register src, int size); 2392 2393 void emit_neg(Register dst, int size); 2394 void emit_neg(const Operand& dst, int size); 2395 2396 void emit_not(Register dst, int size); 2397 void emit_not(const Operand& dst, int size); 2398 2399 void emit_or(Register dst, Register src, int size) { 2400 arithmetic_op(0x0B, dst, src, size); 2401 } 2402 2403 void emit_or(Register dst, const Operand& src, int size) { 2404 arithmetic_op(0x0B, dst, src, size); 2405 } 2406 2407 void emit_or(const Operand& dst, Register src, int size) { 2408 arithmetic_op(0x9, src, dst, size); 2409 } 2410 2411 void emit_or(Register dst, Immediate src, int size) { 2412 immediate_arithmetic_op(0x1, dst, src, size); 2413 } 2414 2415 void emit_or(const Operand& dst, Immediate src, int size) { 2416 immediate_arithmetic_op(0x1, dst, src, size); 2417 } 2418 2419 void emit_repmovs(int size); 2420 2421 void emit_sbb(Register dst, Register src, int size) { 2422 arithmetic_op(0x1b, dst, src, size); 2423 } 2424 2425 void emit_sub(Register dst, Register src, int size) { 2426 arithmetic_op(0x2B, dst, src, size); 2427 } 2428 2429 void emit_sub(Register dst, Immediate src, int size) { 2430 immediate_arithmetic_op(0x5, dst, src, size); 2431 } 2432 2433 void emit_sub(Register dst, const Operand& src, int size) { 2434 arithmetic_op(0x2B, dst, src, size); 2435 } 2436 2437 void emit_sub(const Operand& dst, Register src, int size) { 2438 arithmetic_op(0x29, src, dst, size); 2439 } 2440 2441 void emit_sub(const Operand& dst, Immediate src, int size) { 2442 immediate_arithmetic_op(0x5, dst, src, size); 2443 } 2444 2445 void emit_test(Register dst, Register src, int size); 2446 void emit_test(Register reg, Immediate mask, int size); 2447 void emit_test(const Operand& op, Register reg, int size); 2448 void emit_test(const Operand& op, Immediate mask, int size); 2449 void emit_test(Register reg, const Operand& op, int size) { 2450 return emit_test(op, reg, size); 2451 } 2452 2453 void emit_xchg(Register dst, Register src, int size); 2454 void emit_xchg(Register dst, const Operand& src, int size); 2455 2456 void emit_xor(Register dst, Register src, int size) { 2457 if (size == kInt64Size && dst.code() == src.code()) { 2458 // 32 bit operations zero the top 32 bits of 64 bit registers. Therefore 2459 // there is no need to make this a 64 bit operation. 2460 arithmetic_op(0x33, dst, src, kInt32Size); 2461 } else { 2462 arithmetic_op(0x33, dst, src, size); 2463 } 2464 } 2465 2466 void emit_xor(Register dst, const Operand& src, int size) { 2467 arithmetic_op(0x33, dst, src, size); 2468 } 2469 2470 void emit_xor(Register dst, Immediate src, int size) { 2471 immediate_arithmetic_op(0x6, dst, src, size); 2472 } 2473 2474 void emit_xor(const Operand& dst, Immediate src, int size) { 2475 immediate_arithmetic_op(0x6, dst, src, size); 2476 } 2477 2478 void emit_xor(const Operand& dst, Register src, int size) { 2479 arithmetic_op(0x31, src, dst, size); 2480 } 2481 2482 // Most BMI instructions are similiar. 2483 void bmi1q(byte op, Register reg, Register vreg, Register rm); 2484 void bmi1q(byte op, Register reg, Register vreg, const Operand& rm); 2485 void bmi1l(byte op, Register reg, Register vreg, Register rm); 2486 void bmi1l(byte op, Register reg, Register vreg, const Operand& rm); 2487 void bmi2q(SIMDPrefix pp, byte op, Register reg, Register vreg, Register rm); 2488 void bmi2q(SIMDPrefix pp, byte op, Register reg, Register vreg, 2489 const Operand& rm); 2490 void bmi2l(SIMDPrefix pp, byte op, Register reg, Register vreg, Register rm); 2491 void bmi2l(SIMDPrefix pp, byte op, Register reg, Register vreg, 2492 const Operand& rm); 2493 2494 friend class CodePatcher; 2495 friend class EnsureSpace; 2496 friend class RegExpMacroAssemblerX64; 2497 2498 // code generation 2499 RelocInfoWriter reloc_info_writer; 2500 2501 // Internal reference positions, required for (potential) patching in 2502 // GrowBuffer(); contains only those internal references whose labels 2503 // are already bound. 2504 std::deque<int> internal_reference_positions_; 2505 2506 List< Handle<Code> > code_targets_; 2507}; 2508 2509 2510// Helper class that ensures that there is enough space for generating 2511// instructions and relocation information. The constructor makes 2512// sure that there is enough space and (in debug mode) the destructor 2513// checks that we did not generate too much. 2514class EnsureSpace BASE_EMBEDDED { 2515 public: 2516 explicit EnsureSpace(Assembler* assembler) : assembler_(assembler) { 2517 if (assembler_->buffer_overflow()) assembler_->GrowBuffer(); 2518#ifdef DEBUG 2519 space_before_ = assembler_->available_space(); 2520#endif 2521 } 2522 2523#ifdef DEBUG 2524 ~EnsureSpace() { 2525 int bytes_generated = space_before_ - assembler_->available_space(); 2526 DCHECK(bytes_generated < assembler_->kGap); 2527 } 2528#endif 2529 2530 private: 2531 Assembler* assembler_; 2532#ifdef DEBUG 2533 int space_before_; 2534#endif 2535}; 2536 2537} // namespace internal 2538} // namespace v8 2539 2540#endif // V8_X64_ASSEMBLER_X64_H_ 2541