assembler-x64.h revision f91f0611dbaf29ca0f1d4aecb357ce243a19d2fa
1// Copyright (c) 1994-2006 Sun Microsystems Inc.
2// All Rights Reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8// - Redistributions of source code must retain the above copyright notice,
9// this list of conditions and the following disclaimer.
10//
11// - Redistribution in binary form must reproduce the above copyright
12// notice, this list of conditions and the following disclaimer in the
13// documentation and/or other materials provided with the distribution.
14//
15// - Neither the name of Sun Microsystems or the names of contributors may
16// be used to endorse or promote products derived from this software without
17// specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
20// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// The original source code covered by the above license above has been
32// modified significantly by Google Inc.
33// Copyright 2012 the V8 project authors. All rights reserved.
34
35// A lightweight X64 Assembler.
36
37#ifndef V8_X64_ASSEMBLER_X64_H_
38#define V8_X64_ASSEMBLER_X64_H_
39
40#include <deque>
41
42#include "src/assembler.h"
43
44namespace v8 {
45namespace internal {
46
47// Utility functions
48
49#define GENERAL_REGISTERS(V) \
50  V(rax)                     \
51  V(rcx)                     \
52  V(rdx)                     \
53  V(rbx)                     \
54  V(rsp)                     \
55  V(rbp)                     \
56  V(rsi)                     \
57  V(rdi)                     \
58  V(r8)                      \
59  V(r9)                      \
60  V(r10)                     \
61  V(r11)                     \
62  V(r12)                     \
63  V(r13)                     \
64  V(r14)                     \
65  V(r15)
66
67#define ALLOCATABLE_GENERAL_REGISTERS(V) \
68  V(rax)                                 \
69  V(rbx)                                 \
70  V(rdx)                                 \
71  V(rcx)                                 \
72  V(rsi)                                 \
73  V(rdi)                                 \
74  V(r8)                                  \
75  V(r9)                                  \
76  V(r11)                                 \
77  V(r12)                                 \
78  V(r14)                                 \
79  V(r15)
80
81
82// CPU Registers.
83//
84// 1) We would prefer to use an enum, but enum values are assignment-
85// compatible with int, which has caused code-generation bugs.
86//
87// 2) We would prefer to use a class instead of a struct but we don't like
88// the register initialization to depend on the particular initialization
89// order (which appears to be different on OS X, Linux, and Windows for the
90// installed versions of C++ we tried). Using a struct permits C-style
91// "initialization". Also, the Register objects cannot be const as this
92// forces initialization stubs in MSVC, making us dependent on initialization
93// order.
94//
95// 3) By not using an enum, we are possibly preventing the compiler from
96// doing certain constant folds, which may significantly reduce the
97// code generated for some assembly instructions (because they boil down
98// to a few constants). If this is a problem, we could change the code
99// such that we use an enum in optimized mode, and the struct in debug
100// mode. This way we get the compile-time error checking in debug mode
101// and best performance in optimized code.
102//
103struct Register {
104  enum Code {
105#define REGISTER_CODE(R) kCode_##R,
106    GENERAL_REGISTERS(REGISTER_CODE)
107#undef REGISTER_CODE
108        kAfterLast,
109    kCode_no_reg = -1
110  };
111
112  static const int kNumRegisters = Code::kAfterLast;
113
114  static Register from_code(int code) {
115    DCHECK(code >= 0);
116    DCHECK(code < kNumRegisters);
117    Register r = {code};
118    return r;
119  }
120  bool is_valid() const { return 0 <= reg_code && reg_code < kNumRegisters; }
121  bool is(Register reg) const { return reg_code == reg.reg_code; }
122  int code() const {
123    DCHECK(is_valid());
124    return reg_code;
125  }
126  int bit() const {
127    DCHECK(is_valid());
128    return 1 << reg_code;
129  }
130
131  bool is_byte_register() const { return reg_code <= 3; }
132  // Return the high bit of the register code as a 0 or 1.  Used often
133  // when constructing the REX prefix byte.
134  int high_bit() const { return reg_code >> 3; }
135  // Return the 3 low bits of the register code.  Used when encoding registers
136  // in modR/M, SIB, and opcode bytes.
137  int low_bits() const { return reg_code & 0x7; }
138
139  // Unfortunately we can't make this private in a struct when initializing
140  // by assignment.
141  int reg_code;
142};
143
144
145#define DECLARE_REGISTER(R) const Register R = {Register::kCode_##R};
146GENERAL_REGISTERS(DECLARE_REGISTER)
147#undef DECLARE_REGISTER
148const Register no_reg = {Register::kCode_no_reg};
149
150
151#ifdef _WIN64
152  // Windows calling convention
153const Register arg_reg_1 = {Register::kCode_rcx};
154const Register arg_reg_2 = {Register::kCode_rdx};
155const Register arg_reg_3 = {Register::kCode_r8};
156const Register arg_reg_4 = {Register::kCode_r9};
157#else
158  // AMD64 calling convention
159const Register arg_reg_1 = {Register::kCode_rdi};
160const Register arg_reg_2 = {Register::kCode_rsi};
161const Register arg_reg_3 = {Register::kCode_rdx};
162const Register arg_reg_4 = {Register::kCode_rcx};
163#endif  // _WIN64
164
165
166#define DOUBLE_REGISTERS(V) \
167  V(xmm0)                   \
168  V(xmm1)                   \
169  V(xmm2)                   \
170  V(xmm3)                   \
171  V(xmm4)                   \
172  V(xmm5)                   \
173  V(xmm6)                   \
174  V(xmm7)                   \
175  V(xmm8)                   \
176  V(xmm9)                   \
177  V(xmm10)                  \
178  V(xmm11)                  \
179  V(xmm12)                  \
180  V(xmm13)                  \
181  V(xmm14)                  \
182  V(xmm15)
183
184#define FLOAT_REGISTERS DOUBLE_REGISTERS
185#define SIMD128_REGISTERS DOUBLE_REGISTERS
186
187#define ALLOCATABLE_DOUBLE_REGISTERS(V) \
188  V(xmm0)                               \
189  V(xmm1)                               \
190  V(xmm2)                               \
191  V(xmm3)                               \
192  V(xmm4)                               \
193  V(xmm5)                               \
194  V(xmm6)                               \
195  V(xmm7)                               \
196  V(xmm8)                               \
197  V(xmm9)                               \
198  V(xmm10)                              \
199  V(xmm11)                              \
200  V(xmm12)                              \
201  V(xmm13)                              \
202  V(xmm14)
203
204static const bool kSimpleFPAliasing = true;
205
206struct XMMRegister {
207  enum Code {
208#define REGISTER_CODE(R) kCode_##R,
209    DOUBLE_REGISTERS(REGISTER_CODE)
210#undef REGISTER_CODE
211        kAfterLast,
212    kCode_no_reg = -1
213  };
214
215  static const int kMaxNumRegisters = Code::kAfterLast;
216
217  static XMMRegister from_code(int code) {
218    XMMRegister result = {code};
219    return result;
220  }
221
222  bool is_valid() const { return 0 <= reg_code && reg_code < kMaxNumRegisters; }
223  bool is(XMMRegister reg) const { return reg_code == reg.reg_code; }
224  int code() const {
225    DCHECK(is_valid());
226    return reg_code;
227  }
228
229  // Return the high bit of the register code as a 0 or 1.  Used often
230  // when constructing the REX prefix byte.
231  int high_bit() const { return reg_code >> 3; }
232  // Return the 3 low bits of the register code.  Used when encoding registers
233  // in modR/M, SIB, and opcode bytes.
234  int low_bits() const { return reg_code & 0x7; }
235
236  // Unfortunately we can't make this private in a struct when initializing
237  // by assignment.
238  int reg_code;
239};
240
241typedef XMMRegister FloatRegister;
242
243typedef XMMRegister DoubleRegister;
244
245typedef XMMRegister Simd128Register;
246
247#define DECLARE_REGISTER(R) \
248  const DoubleRegister R = {DoubleRegister::kCode_##R};
249DOUBLE_REGISTERS(DECLARE_REGISTER)
250#undef DECLARE_REGISTER
251const DoubleRegister no_double_reg = {DoubleRegister::kCode_no_reg};
252
253enum Condition {
254  // any value < 0 is considered no_condition
255  no_condition  = -1,
256
257  overflow      =  0,
258  no_overflow   =  1,
259  below         =  2,
260  above_equal   =  3,
261  equal         =  4,
262  not_equal     =  5,
263  below_equal   =  6,
264  above         =  7,
265  negative      =  8,
266  positive      =  9,
267  parity_even   = 10,
268  parity_odd    = 11,
269  less          = 12,
270  greater_equal = 13,
271  less_equal    = 14,
272  greater       = 15,
273
274  // Fake conditions that are handled by the
275  // opcodes using them.
276  always        = 16,
277  never         = 17,
278  // aliases
279  carry         = below,
280  not_carry     = above_equal,
281  zero          = equal,
282  not_zero      = not_equal,
283  sign          = negative,
284  not_sign      = positive,
285  last_condition = greater
286};
287
288
289// Returns the equivalent of !cc.
290// Negation of the default no_condition (-1) results in a non-default
291// no_condition value (-2). As long as tests for no_condition check
292// for condition < 0, this will work as expected.
293inline Condition NegateCondition(Condition cc) {
294  return static_cast<Condition>(cc ^ 1);
295}
296
297
298// Commute a condition such that {a cond b == b cond' a}.
299inline Condition CommuteCondition(Condition cc) {
300  switch (cc) {
301    case below:
302      return above;
303    case above:
304      return below;
305    case above_equal:
306      return below_equal;
307    case below_equal:
308      return above_equal;
309    case less:
310      return greater;
311    case greater:
312      return less;
313    case greater_equal:
314      return less_equal;
315    case less_equal:
316      return greater_equal;
317    default:
318      return cc;
319  }
320}
321
322
323enum RoundingMode {
324  kRoundToNearest = 0x0,
325  kRoundDown = 0x1,
326  kRoundUp = 0x2,
327  kRoundToZero = 0x3
328};
329
330
331// -----------------------------------------------------------------------------
332// Machine instruction Immediates
333
334class Immediate BASE_EMBEDDED {
335 public:
336  explicit Immediate(int32_t value) : value_(value) {}
337  explicit Immediate(int32_t value, RelocInfo::Mode rmode)
338      : value_(value), rmode_(rmode) {}
339  explicit Immediate(Smi* value) {
340    DCHECK(SmiValuesAre31Bits());  // Only available for 31-bit SMI.
341    value_ = static_cast<int32_t>(reinterpret_cast<intptr_t>(value));
342  }
343
344 private:
345  int32_t value_;
346  RelocInfo::Mode rmode_ = RelocInfo::NONE32;
347
348  friend class Assembler;
349};
350
351
352// -----------------------------------------------------------------------------
353// Machine instruction Operands
354
355enum ScaleFactor {
356  times_1 = 0,
357  times_2 = 1,
358  times_4 = 2,
359  times_8 = 3,
360  times_int_size = times_4,
361  times_pointer_size = (kPointerSize == 8) ? times_8 : times_4
362};
363
364
365class Operand BASE_EMBEDDED {
366 public:
367  // [base + disp/r]
368  Operand(Register base, int32_t disp);
369
370  // [base + index*scale + disp/r]
371  Operand(Register base,
372          Register index,
373          ScaleFactor scale,
374          int32_t disp);
375
376  // [index*scale + disp/r]
377  Operand(Register index,
378          ScaleFactor scale,
379          int32_t disp);
380
381  // Offset from existing memory operand.
382  // Offset is added to existing displacement as 32-bit signed values and
383  // this must not overflow.
384  Operand(const Operand& base, int32_t offset);
385
386  // [rip + disp/r]
387  explicit Operand(Label* label);
388
389  // Checks whether either base or index register is the given register.
390  // Does not check the "reg" part of the Operand.
391  bool AddressUsesRegister(Register reg) const;
392
393  // Queries related to the size of the generated instruction.
394  // Whether the generated instruction will have a REX prefix.
395  bool requires_rex() const { return rex_ != 0; }
396  // Size of the ModR/M, SIB and displacement parts of the generated
397  // instruction.
398  int operand_size() const { return len_; }
399
400 private:
401  byte rex_;
402  byte buf_[9];
403  // The number of bytes of buf_ in use.
404  byte len_;
405
406  // Set the ModR/M byte without an encoded 'reg' register. The
407  // register is encoded later as part of the emit_operand operation.
408  // set_modrm can be called before or after set_sib and set_disp*.
409  inline void set_modrm(int mod, Register rm);
410
411  // Set the SIB byte if one is needed. Sets the length to 2 rather than 1.
412  inline void set_sib(ScaleFactor scale, Register index, Register base);
413
414  // Adds operand displacement fields (offsets added to the memory address).
415  // Needs to be called after set_sib, not before it.
416  inline void set_disp8(int disp);
417  inline void set_disp32(int disp);
418  inline void set_disp64(int64_t disp);  // for labels.
419
420  friend class Assembler;
421};
422
423#define ASSEMBLER_INSTRUCTION_LIST(V) \
424  V(add)                              \
425  V(and)                              \
426  V(cmp)                              \
427  V(cmpxchg)                          \
428  V(dec)                              \
429  V(idiv)                             \
430  V(div)                              \
431  V(imul)                             \
432  V(inc)                              \
433  V(lea)                              \
434  V(mov)                              \
435  V(movzxb)                           \
436  V(movzxw)                           \
437  V(neg)                              \
438  V(not)                              \
439  V(or)                               \
440  V(repmovs)                          \
441  V(sbb)                              \
442  V(sub)                              \
443  V(test)                             \
444  V(xchg)                             \
445  V(xor)
446
447// Shift instructions on operands/registers with kPointerSize, kInt32Size and
448// kInt64Size.
449#define SHIFT_INSTRUCTION_LIST(V)       \
450  V(rol, 0x0)                           \
451  V(ror, 0x1)                           \
452  V(rcl, 0x2)                           \
453  V(rcr, 0x3)                           \
454  V(shl, 0x4)                           \
455  V(shr, 0x5)                           \
456  V(sar, 0x7)                           \
457
458
459class Assembler : public AssemblerBase {
460 private:
461  // We check before assembling an instruction that there is sufficient
462  // space to write an instruction and its relocation information.
463  // The relocation writer's position must be kGap bytes above the end of
464  // the generated instructions. This leaves enough space for the
465  // longest possible x64 instruction, 15 bytes, and the longest possible
466  // relocation information encoding, RelocInfoWriter::kMaxLength == 16.
467  // (There is a 15 byte limit on x64 instruction length that rules out some
468  // otherwise valid instructions.)
469  // This allows for a single, fast space check per instruction.
470  static const int kGap = 32;
471
472 public:
473  // Create an assembler. Instructions and relocation information are emitted
474  // into a buffer, with the instructions starting from the beginning and the
475  // relocation information starting from the end of the buffer. See CodeDesc
476  // for a detailed comment on the layout (globals.h).
477  //
478  // If the provided buffer is NULL, the assembler allocates and grows its own
479  // buffer, and buffer_size determines the initial buffer size. The buffer is
480  // owned by the assembler and deallocated upon destruction of the assembler.
481  //
482  // If the provided buffer is not NULL, the assembler uses the provided buffer
483  // for code generation and assumes its size to be buffer_size. If the buffer
484  // is too small, a fatal error occurs. No deallocation of the buffer is done
485  // upon destruction of the assembler.
486  Assembler(Isolate* isolate, void* buffer, int buffer_size);
487  virtual ~Assembler() { }
488
489  // GetCode emits any pending (non-emitted) code and fills the descriptor
490  // desc. GetCode() is idempotent; it returns the same result if no other
491  // Assembler functions are invoked in between GetCode() calls.
492  void GetCode(CodeDesc* desc);
493
494  // Read/Modify the code target in the relative branch/call instruction at pc.
495  // On the x64 architecture, we use relative jumps with a 32-bit displacement
496  // to jump to other Code objects in the Code space in the heap.
497  // Jumps to C functions are done indirectly through a 64-bit register holding
498  // the absolute address of the target.
499  // These functions convert between absolute Addresses of Code objects and
500  // the relative displacements stored in the code.
501  static inline Address target_address_at(Address pc, Address constant_pool);
502  static inline void set_target_address_at(
503      Isolate* isolate, Address pc, Address constant_pool, Address target,
504      ICacheFlushMode icache_flush_mode = FLUSH_ICACHE_IF_NEEDED);
505  static inline Address target_address_at(Address pc, Code* code) {
506    Address constant_pool = code ? code->constant_pool() : NULL;
507    return target_address_at(pc, constant_pool);
508  }
509  static inline void set_target_address_at(
510      Isolate* isolate, Address pc, Code* code, Address target,
511      ICacheFlushMode icache_flush_mode = FLUSH_ICACHE_IF_NEEDED) {
512    Address constant_pool = code ? code->constant_pool() : NULL;
513    set_target_address_at(isolate, pc, constant_pool, target,
514                          icache_flush_mode);
515  }
516
517  // Return the code target address at a call site from the return address
518  // of that call in the instruction stream.
519  static inline Address target_address_from_return_address(Address pc);
520
521  // This sets the branch destination (which is in the instruction on x64).
522  // This is for calls and branches within generated code.
523  inline static void deserialization_set_special_target_at(
524      Isolate* isolate, Address instruction_payload, Code* code,
525      Address target) {
526    set_target_address_at(isolate, instruction_payload, code, target);
527  }
528
529  // This sets the internal reference at the pc.
530  inline static void deserialization_set_target_internal_reference_at(
531      Isolate* isolate, Address pc, Address target,
532      RelocInfo::Mode mode = RelocInfo::INTERNAL_REFERENCE);
533
534  static inline RelocInfo::Mode RelocInfoNone() {
535    if (kPointerSize == kInt64Size) {
536      return RelocInfo::NONE64;
537    } else {
538      DCHECK(kPointerSize == kInt32Size);
539      return RelocInfo::NONE32;
540    }
541  }
542
543  inline Handle<Object> code_target_object_handle_at(Address pc);
544  inline Address runtime_entry_at(Address pc);
545  // Number of bytes taken up by the branch target in the code.
546  static const int kSpecialTargetSize = 4;  // Use 32-bit displacement.
547  // Distance between the address of the code target in the call instruction
548  // and the return address pushed on the stack.
549  static const int kCallTargetAddressOffset = 4;  // Use 32-bit displacement.
550  // The length of call(kScratchRegister).
551  static const int kCallScratchRegisterInstructionLength = 3;
552  // The length of call(Immediate32).
553  static const int kShortCallInstructionLength = 5;
554  // The length of movq(kScratchRegister, address).
555  static const int kMoveAddressIntoScratchRegisterInstructionLength =
556      2 + kPointerSize;
557  // The length of movq(kScratchRegister, address) and call(kScratchRegister).
558  static const int kCallSequenceLength =
559      kMoveAddressIntoScratchRegisterInstructionLength +
560      kCallScratchRegisterInstructionLength;
561
562  // The debug break slot must be able to contain an indirect call sequence.
563  static const int kDebugBreakSlotLength = kCallSequenceLength;
564  // Distance between start of patched debug break slot and the emitted address
565  // to jump to.
566  static const int kPatchDebugBreakSlotAddressOffset =
567      kMoveAddressIntoScratchRegisterInstructionLength - kPointerSize;
568
569  // One byte opcode for test eax,0xXXXXXXXX.
570  static const byte kTestEaxByte = 0xA9;
571  // One byte opcode for test al, 0xXX.
572  static const byte kTestAlByte = 0xA8;
573  // One byte opcode for nop.
574  static const byte kNopByte = 0x90;
575
576  // One byte prefix for a short conditional jump.
577  static const byte kJccShortPrefix = 0x70;
578  static const byte kJncShortOpcode = kJccShortPrefix | not_carry;
579  static const byte kJcShortOpcode = kJccShortPrefix | carry;
580  static const byte kJnzShortOpcode = kJccShortPrefix | not_zero;
581  static const byte kJzShortOpcode = kJccShortPrefix | zero;
582
583  // VEX prefix encodings.
584  enum SIMDPrefix { kNone = 0x0, k66 = 0x1, kF3 = 0x2, kF2 = 0x3 };
585  enum VectorLength { kL128 = 0x0, kL256 = 0x4, kLIG = kL128, kLZ = kL128 };
586  enum VexW { kW0 = 0x0, kW1 = 0x80, kWIG = kW0 };
587  enum LeadingOpcode { k0F = 0x1, k0F38 = 0x2, k0F3A = 0x3 };
588
589  // ---------------------------------------------------------------------------
590  // Code generation
591  //
592  // Function names correspond one-to-one to x64 instruction mnemonics.
593  // Unless specified otherwise, instructions operate on 64-bit operands.
594  //
595  // If we need versions of an assembly instruction that operate on different
596  // width arguments, we add a single-letter suffix specifying the width.
597  // This is done for the following instructions: mov, cmp, inc, dec,
598  // add, sub, and test.
599  // There are no versions of these instructions without the suffix.
600  // - Instructions on 8-bit (byte) operands/registers have a trailing 'b'.
601  // - Instructions on 16-bit (word) operands/registers have a trailing 'w'.
602  // - Instructions on 32-bit (doubleword) operands/registers use 'l'.
603  // - Instructions on 64-bit (quadword) operands/registers use 'q'.
604  // - Instructions on operands/registers with pointer size use 'p'.
605
606  STATIC_ASSERT(kPointerSize == kInt64Size || kPointerSize == kInt32Size);
607
608#define DECLARE_INSTRUCTION(instruction)                \
609  template<class P1>                                    \
610  void instruction##p(P1 p1) {                          \
611    emit_##instruction(p1, kPointerSize);               \
612  }                                                     \
613                                                        \
614  template<class P1>                                    \
615  void instruction##l(P1 p1) {                          \
616    emit_##instruction(p1, kInt32Size);                 \
617  }                                                     \
618                                                        \
619  template<class P1>                                    \
620  void instruction##q(P1 p1) {                          \
621    emit_##instruction(p1, kInt64Size);                 \
622  }                                                     \
623                                                        \
624  template<class P1, class P2>                          \
625  void instruction##p(P1 p1, P2 p2) {                   \
626    emit_##instruction(p1, p2, kPointerSize);           \
627  }                                                     \
628                                                        \
629  template<class P1, class P2>                          \
630  void instruction##l(P1 p1, P2 p2) {                   \
631    emit_##instruction(p1, p2, kInt32Size);             \
632  }                                                     \
633                                                        \
634  template<class P1, class P2>                          \
635  void instruction##q(P1 p1, P2 p2) {                   \
636    emit_##instruction(p1, p2, kInt64Size);             \
637  }                                                     \
638                                                        \
639  template<class P1, class P2, class P3>                \
640  void instruction##p(P1 p1, P2 p2, P3 p3) {            \
641    emit_##instruction(p1, p2, p3, kPointerSize);       \
642  }                                                     \
643                                                        \
644  template<class P1, class P2, class P3>                \
645  void instruction##l(P1 p1, P2 p2, P3 p3) {            \
646    emit_##instruction(p1, p2, p3, kInt32Size);         \
647  }                                                     \
648                                                        \
649  template<class P1, class P2, class P3>                \
650  void instruction##q(P1 p1, P2 p2, P3 p3) {            \
651    emit_##instruction(p1, p2, p3, kInt64Size);         \
652  }
653  ASSEMBLER_INSTRUCTION_LIST(DECLARE_INSTRUCTION)
654#undef DECLARE_INSTRUCTION
655
656  // Insert the smallest number of nop instructions
657  // possible to align the pc offset to a multiple
658  // of m, where m must be a power of 2.
659  void Align(int m);
660  // Insert the smallest number of zero bytes possible to align the pc offset
661  // to a mulitple of m. m must be a power of 2 (>= 2).
662  void DataAlign(int m);
663  void Nop(int bytes = 1);
664  // Aligns code to something that's optimal for a jump target for the platform.
665  void CodeTargetAlign();
666
667  // Stack
668  void pushfq();
669  void popfq();
670
671  void pushq(Immediate value);
672  // Push a 32 bit integer, and guarantee that it is actually pushed as a
673  // 32 bit value, the normal push will optimize the 8 bit case.
674  void pushq_imm32(int32_t imm32);
675  void pushq(Register src);
676  void pushq(const Operand& src);
677
678  void popq(Register dst);
679  void popq(const Operand& dst);
680
681  void enter(Immediate size);
682  void leave();
683
684  // Moves
685  void movb(Register dst, const Operand& src);
686  void movb(Register dst, Immediate imm);
687  void movb(const Operand& dst, Register src);
688  void movb(const Operand& dst, Immediate imm);
689
690  // Move the low 16 bits of a 64-bit register value to a 16-bit
691  // memory location.
692  void movw(Register dst, const Operand& src);
693  void movw(const Operand& dst, Register src);
694  void movw(const Operand& dst, Immediate imm);
695
696  // Move the offset of the label location relative to the current
697  // position (after the move) to the destination.
698  void movl(const Operand& dst, Label* src);
699
700  // Loads a pointer into a register with a relocation mode.
701  void movp(Register dst, void* ptr, RelocInfo::Mode rmode);
702
703  // Loads a 64-bit immediate into a register.
704  void movq(Register dst, int64_t value,
705            RelocInfo::Mode rmode = RelocInfo::NONE64);
706  void movq(Register dst, uint64_t value,
707            RelocInfo::Mode rmode = RelocInfo::NONE64);
708
709  void movsxbl(Register dst, Register src);
710  void movsxbl(Register dst, const Operand& src);
711  void movsxbq(Register dst, Register src);
712  void movsxbq(Register dst, const Operand& src);
713  void movsxwl(Register dst, Register src);
714  void movsxwl(Register dst, const Operand& src);
715  void movsxwq(Register dst, Register src);
716  void movsxwq(Register dst, const Operand& src);
717  void movsxlq(Register dst, Register src);
718  void movsxlq(Register dst, const Operand& src);
719
720  // Repeated moves.
721
722  void repmovsb();
723  void repmovsw();
724  void repmovsp() { emit_repmovs(kPointerSize); }
725  void repmovsl() { emit_repmovs(kInt32Size); }
726  void repmovsq() { emit_repmovs(kInt64Size); }
727
728  // Instruction to load from an immediate 64-bit pointer into RAX.
729  void load_rax(void* ptr, RelocInfo::Mode rmode);
730  void load_rax(ExternalReference ext);
731
732  // Conditional moves.
733  void cmovq(Condition cc, Register dst, Register src);
734  void cmovq(Condition cc, Register dst, const Operand& src);
735  void cmovl(Condition cc, Register dst, Register src);
736  void cmovl(Condition cc, Register dst, const Operand& src);
737
738  void cmpb(Register dst, Immediate src) {
739    immediate_arithmetic_op_8(0x7, dst, src);
740  }
741
742  void cmpb_al(Immediate src);
743
744  void cmpb(Register dst, Register src) {
745    arithmetic_op_8(0x3A, dst, src);
746  }
747
748  void cmpb(Register dst, const Operand& src) {
749    arithmetic_op_8(0x3A, dst, src);
750  }
751
752  void cmpb(const Operand& dst, Register src) {
753    arithmetic_op_8(0x38, src, dst);
754  }
755
756  void cmpb(const Operand& dst, Immediate src) {
757    immediate_arithmetic_op_8(0x7, dst, src);
758  }
759
760  void cmpw(const Operand& dst, Immediate src) {
761    immediate_arithmetic_op_16(0x7, dst, src);
762  }
763
764  void cmpw(Register dst, Immediate src) {
765    immediate_arithmetic_op_16(0x7, dst, src);
766  }
767
768  void cmpw(Register dst, const Operand& src) {
769    arithmetic_op_16(0x3B, dst, src);
770  }
771
772  void cmpw(Register dst, Register src) {
773    arithmetic_op_16(0x3B, dst, src);
774  }
775
776  void cmpw(const Operand& dst, Register src) {
777    arithmetic_op_16(0x39, src, dst);
778  }
779
780  void testb(Register reg, const Operand& op) { testb(op, reg); }
781
782  void testw(Register reg, const Operand& op) { testw(op, reg); }
783
784  void andb(Register dst, Immediate src) {
785    immediate_arithmetic_op_8(0x4, dst, src);
786  }
787
788  void decb(Register dst);
789  void decb(const Operand& dst);
790
791  // Lock prefix.
792  void lock();
793
794  void xchgb(Register reg, const Operand& op);
795  void xchgw(Register reg, const Operand& op);
796
797  void cmpxchgb(const Operand& dst, Register src);
798  void cmpxchgw(const Operand& dst, Register src);
799
800  // Sign-extends rax into rdx:rax.
801  void cqo();
802  // Sign-extends eax into edx:eax.
803  void cdq();
804
805  // Multiply eax by src, put the result in edx:eax.
806  void mull(Register src);
807  void mull(const Operand& src);
808  // Multiply rax by src, put the result in rdx:rax.
809  void mulq(Register src);
810
811#define DECLARE_SHIFT_INSTRUCTION(instruction, subcode)                       \
812  void instruction##p(Register dst, Immediate imm8) {                         \
813    shift(dst, imm8, subcode, kPointerSize);                                  \
814  }                                                                           \
815                                                                              \
816  void instruction##l(Register dst, Immediate imm8) {                         \
817    shift(dst, imm8, subcode, kInt32Size);                                    \
818  }                                                                           \
819                                                                              \
820  void instruction##q(Register dst, Immediate imm8) {                         \
821    shift(dst, imm8, subcode, kInt64Size);                                    \
822  }                                                                           \
823                                                                              \
824  void instruction##p(Operand dst, Immediate imm8) {                          \
825    shift(dst, imm8, subcode, kPointerSize);                                  \
826  }                                                                           \
827                                                                              \
828  void instruction##l(Operand dst, Immediate imm8) {                          \
829    shift(dst, imm8, subcode, kInt32Size);                                    \
830  }                                                                           \
831                                                                              \
832  void instruction##q(Operand dst, Immediate imm8) {                          \
833    shift(dst, imm8, subcode, kInt64Size);                                    \
834  }                                                                           \
835                                                                              \
836  void instruction##p_cl(Register dst) { shift(dst, subcode, kPointerSize); } \
837                                                                              \
838  void instruction##l_cl(Register dst) { shift(dst, subcode, kInt32Size); }   \
839                                                                              \
840  void instruction##q_cl(Register dst) { shift(dst, subcode, kInt64Size); }   \
841                                                                              \
842  void instruction##p_cl(Operand dst) { shift(dst, subcode, kPointerSize); }  \
843                                                                              \
844  void instruction##l_cl(Operand dst) { shift(dst, subcode, kInt32Size); }    \
845                                                                              \
846  void instruction##q_cl(Operand dst) { shift(dst, subcode, kInt64Size); }
847  SHIFT_INSTRUCTION_LIST(DECLARE_SHIFT_INSTRUCTION)
848#undef DECLARE_SHIFT_INSTRUCTION
849
850  // Shifts dst:src left by cl bits, affecting only dst.
851  void shld(Register dst, Register src);
852
853  // Shifts src:dst right by cl bits, affecting only dst.
854  void shrd(Register dst, Register src);
855
856  void store_rax(void* dst, RelocInfo::Mode mode);
857  void store_rax(ExternalReference ref);
858
859  void subb(Register dst, Immediate src) {
860    immediate_arithmetic_op_8(0x5, dst, src);
861  }
862
863  void testb(Register dst, Register src);
864  void testb(Register reg, Immediate mask);
865  void testb(const Operand& op, Immediate mask);
866  void testb(const Operand& op, Register reg);
867
868  void testw(Register dst, Register src);
869  void testw(Register reg, Immediate mask);
870  void testw(const Operand& op, Immediate mask);
871  void testw(const Operand& op, Register reg);
872
873  // Bit operations.
874  void bt(const Operand& dst, Register src);
875  void bts(const Operand& dst, Register src);
876  void bsrq(Register dst, Register src);
877  void bsrq(Register dst, const Operand& src);
878  void bsrl(Register dst, Register src);
879  void bsrl(Register dst, const Operand& src);
880  void bsfq(Register dst, Register src);
881  void bsfq(Register dst, const Operand& src);
882  void bsfl(Register dst, Register src);
883  void bsfl(Register dst, const Operand& src);
884
885  // Miscellaneous
886  void clc();
887  void cld();
888  void cpuid();
889  void hlt();
890  void int3();
891  void nop();
892  void ret(int imm16);
893  void ud2();
894  void setcc(Condition cc, Register reg);
895
896  // Label operations & relative jumps (PPUM Appendix D)
897  //
898  // Takes a branch opcode (cc) and a label (L) and generates
899  // either a backward branch or a forward branch and links it
900  // to the label fixup chain. Usage:
901  //
902  // Label L;    // unbound label
903  // j(cc, &L);  // forward branch to unbound label
904  // bind(&L);   // bind label to the current pc
905  // j(cc, &L);  // backward branch to bound label
906  // bind(&L);   // illegal: a label may be bound only once
907  //
908  // Note: The same Label can be used for forward and backward branches
909  // but it may be bound only once.
910
911  void bind(Label* L);  // binds an unbound label L to the current code position
912
913  // Calls
914  // Call near relative 32-bit displacement, relative to next instruction.
915  void call(Label* L);
916  void call(Address entry, RelocInfo::Mode rmode);
917  void call(Handle<Code> target,
918            RelocInfo::Mode rmode = RelocInfo::CODE_TARGET,
919            TypeFeedbackId ast_id = TypeFeedbackId::None());
920
921  // Calls directly to the given address using a relative offset.
922  // Should only ever be used in Code objects for calls within the
923  // same Code object. Should not be used when generating new code (use labels),
924  // but only when patching existing code.
925  void call(Address target);
926
927  // Call near absolute indirect, address in register
928  void call(Register adr);
929
930  // Jumps
931  // Jump short or near relative.
932  // Use a 32-bit signed displacement.
933  // Unconditional jump to L
934  void jmp(Label* L, Label::Distance distance = Label::kFar);
935  void jmp(Address entry, RelocInfo::Mode rmode);
936  void jmp(Handle<Code> target, RelocInfo::Mode rmode);
937
938  // Jump near absolute indirect (r64)
939  void jmp(Register adr);
940  void jmp(const Operand& src);
941
942  // Conditional jumps
943  void j(Condition cc,
944         Label* L,
945         Label::Distance distance = Label::kFar);
946  void j(Condition cc, Address entry, RelocInfo::Mode rmode);
947  void j(Condition cc, Handle<Code> target, RelocInfo::Mode rmode);
948
949  // Floating-point operations
950  void fld(int i);
951
952  void fld1();
953  void fldz();
954  void fldpi();
955  void fldln2();
956
957  void fld_s(const Operand& adr);
958  void fld_d(const Operand& adr);
959
960  void fstp_s(const Operand& adr);
961  void fstp_d(const Operand& adr);
962  void fstp(int index);
963
964  void fild_s(const Operand& adr);
965  void fild_d(const Operand& adr);
966
967  void fist_s(const Operand& adr);
968
969  void fistp_s(const Operand& adr);
970  void fistp_d(const Operand& adr);
971
972  void fisttp_s(const Operand& adr);
973  void fisttp_d(const Operand& adr);
974
975  void fabs();
976  void fchs();
977
978  void fadd(int i);
979  void fsub(int i);
980  void fmul(int i);
981  void fdiv(int i);
982
983  void fisub_s(const Operand& adr);
984
985  void faddp(int i = 1);
986  void fsubp(int i = 1);
987  void fsubrp(int i = 1);
988  void fmulp(int i = 1);
989  void fdivp(int i = 1);
990  void fprem();
991  void fprem1();
992
993  void fxch(int i = 1);
994  void fincstp();
995  void ffree(int i = 0);
996
997  void ftst();
998  void fucomp(int i);
999  void fucompp();
1000  void fucomi(int i);
1001  void fucomip();
1002
1003  void fcompp();
1004  void fnstsw_ax();
1005  void fwait();
1006  void fnclex();
1007
1008  void fsin();
1009  void fcos();
1010  void fptan();
1011  void fyl2x();
1012  void f2xm1();
1013  void fscale();
1014  void fninit();
1015
1016  void frndint();
1017
1018  void sahf();
1019
1020  // SSE instructions
1021  void addss(XMMRegister dst, XMMRegister src);
1022  void addss(XMMRegister dst, const Operand& src);
1023  void subss(XMMRegister dst, XMMRegister src);
1024  void subss(XMMRegister dst, const Operand& src);
1025  void mulss(XMMRegister dst, XMMRegister src);
1026  void mulss(XMMRegister dst, const Operand& src);
1027  void divss(XMMRegister dst, XMMRegister src);
1028  void divss(XMMRegister dst, const Operand& src);
1029
1030  void maxss(XMMRegister dst, XMMRegister src);
1031  void maxss(XMMRegister dst, const Operand& src);
1032  void minss(XMMRegister dst, XMMRegister src);
1033  void minss(XMMRegister dst, const Operand& src);
1034
1035  void sqrtss(XMMRegister dst, XMMRegister src);
1036  void sqrtss(XMMRegister dst, const Operand& src);
1037
1038  void ucomiss(XMMRegister dst, XMMRegister src);
1039  void ucomiss(XMMRegister dst, const Operand& src);
1040  void movaps(XMMRegister dst, XMMRegister src);
1041
1042  // Don't use this unless it's important to keep the
1043  // top half of the destination register unchanged.
1044  // Use movaps when moving float values and movd for integer
1045  // values in xmm registers.
1046  void movss(XMMRegister dst, XMMRegister src);
1047
1048  void movss(XMMRegister dst, const Operand& src);
1049  void movss(const Operand& dst, XMMRegister src);
1050  void shufps(XMMRegister dst, XMMRegister src, byte imm8);
1051
1052  void cvttss2si(Register dst, const Operand& src);
1053  void cvttss2si(Register dst, XMMRegister src);
1054  void cvtlsi2ss(XMMRegister dst, const Operand& src);
1055  void cvtlsi2ss(XMMRegister dst, Register src);
1056
1057  void andps(XMMRegister dst, XMMRegister src);
1058  void andps(XMMRegister dst, const Operand& src);
1059  void orps(XMMRegister dst, XMMRegister src);
1060  void orps(XMMRegister dst, const Operand& src);
1061  void xorps(XMMRegister dst, XMMRegister src);
1062  void xorps(XMMRegister dst, const Operand& src);
1063
1064  void addps(XMMRegister dst, XMMRegister src);
1065  void addps(XMMRegister dst, const Operand& src);
1066  void subps(XMMRegister dst, XMMRegister src);
1067  void subps(XMMRegister dst, const Operand& src);
1068  void mulps(XMMRegister dst, XMMRegister src);
1069  void mulps(XMMRegister dst, const Operand& src);
1070  void divps(XMMRegister dst, XMMRegister src);
1071  void divps(XMMRegister dst, const Operand& src);
1072
1073  void movmskps(Register dst, XMMRegister src);
1074
1075  // SSE2 instructions
1076  void movd(XMMRegister dst, Register src);
1077  void movd(XMMRegister dst, const Operand& src);
1078  void movd(Register dst, XMMRegister src);
1079  void movq(XMMRegister dst, Register src);
1080  void movq(Register dst, XMMRegister src);
1081  void movq(XMMRegister dst, XMMRegister src);
1082
1083  // Don't use this unless it's important to keep the
1084  // top half of the destination register unchanged.
1085  // Use movapd when moving double values and movq for integer
1086  // values in xmm registers.
1087  void movsd(XMMRegister dst, XMMRegister src);
1088
1089  void movsd(const Operand& dst, XMMRegister src);
1090  void movsd(XMMRegister dst, const Operand& src);
1091
1092  void movdqa(const Operand& dst, XMMRegister src);
1093  void movdqa(XMMRegister dst, const Operand& src);
1094
1095  void movdqu(const Operand& dst, XMMRegister src);
1096  void movdqu(XMMRegister dst, const Operand& src);
1097
1098  void movapd(XMMRegister dst, XMMRegister src);
1099  void movupd(XMMRegister dst, const Operand& src);
1100  void movupd(const Operand& dst, XMMRegister src);
1101
1102  void psllq(XMMRegister reg, byte imm8);
1103  void psrlq(XMMRegister reg, byte imm8);
1104  void pslld(XMMRegister reg, byte imm8);
1105  void psrld(XMMRegister reg, byte imm8);
1106
1107  void cvttsd2si(Register dst, const Operand& src);
1108  void cvttsd2si(Register dst, XMMRegister src);
1109  void cvttss2siq(Register dst, XMMRegister src);
1110  void cvttss2siq(Register dst, const Operand& src);
1111  void cvttsd2siq(Register dst, XMMRegister src);
1112  void cvttsd2siq(Register dst, const Operand& src);
1113
1114  void cvtlsi2sd(XMMRegister dst, const Operand& src);
1115  void cvtlsi2sd(XMMRegister dst, Register src);
1116
1117  void cvtqsi2ss(XMMRegister dst, const Operand& src);
1118  void cvtqsi2ss(XMMRegister dst, Register src);
1119
1120  void cvtqsi2sd(XMMRegister dst, const Operand& src);
1121  void cvtqsi2sd(XMMRegister dst, Register src);
1122
1123
1124  void cvtss2sd(XMMRegister dst, XMMRegister src);
1125  void cvtss2sd(XMMRegister dst, const Operand& src);
1126  void cvtsd2ss(XMMRegister dst, XMMRegister src);
1127  void cvtsd2ss(XMMRegister dst, const Operand& src);
1128
1129  void cvtsd2si(Register dst, XMMRegister src);
1130  void cvtsd2siq(Register dst, XMMRegister src);
1131
1132  void addsd(XMMRegister dst, XMMRegister src);
1133  void addsd(XMMRegister dst, const Operand& src);
1134  void subsd(XMMRegister dst, XMMRegister src);
1135  void subsd(XMMRegister dst, const Operand& src);
1136  void mulsd(XMMRegister dst, XMMRegister src);
1137  void mulsd(XMMRegister dst, const Operand& src);
1138  void divsd(XMMRegister dst, XMMRegister src);
1139  void divsd(XMMRegister dst, const Operand& src);
1140
1141  void maxsd(XMMRegister dst, XMMRegister src);
1142  void maxsd(XMMRegister dst, const Operand& src);
1143  void minsd(XMMRegister dst, XMMRegister src);
1144  void minsd(XMMRegister dst, const Operand& src);
1145
1146  void andpd(XMMRegister dst, XMMRegister src);
1147  void andpd(XMMRegister dst, const Operand& src);
1148  void orpd(XMMRegister dst, XMMRegister src);
1149  void orpd(XMMRegister dst, const Operand& src);
1150  void xorpd(XMMRegister dst, XMMRegister src);
1151  void xorpd(XMMRegister dst, const Operand& src);
1152  void sqrtsd(XMMRegister dst, XMMRegister src);
1153  void sqrtsd(XMMRegister dst, const Operand& src);
1154
1155  void ucomisd(XMMRegister dst, XMMRegister src);
1156  void ucomisd(XMMRegister dst, const Operand& src);
1157  void cmpltsd(XMMRegister dst, XMMRegister src);
1158  void pcmpeqd(XMMRegister dst, XMMRegister src);
1159
1160  void movmskpd(Register dst, XMMRegister src);
1161
1162  void punpckldq(XMMRegister dst, XMMRegister src);
1163  void punpckldq(XMMRegister dst, const Operand& src);
1164  void punpckhdq(XMMRegister dst, XMMRegister src);
1165
1166  // SSE 4.1 instruction
1167  void insertps(XMMRegister dst, XMMRegister src, byte imm8);
1168  void extractps(Register dst, XMMRegister src, byte imm8);
1169  void pextrd(Register dst, XMMRegister src, int8_t imm8);
1170  void pinsrd(XMMRegister dst, Register src, int8_t imm8);
1171  void pinsrd(XMMRegister dst, const Operand& src, int8_t imm8);
1172
1173  void roundss(XMMRegister dst, XMMRegister src, RoundingMode mode);
1174  void roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode);
1175
1176  void cmpps(XMMRegister dst, XMMRegister src, int8_t cmp);
1177  void cmpps(XMMRegister dst, const Operand& src, int8_t cmp);
1178  void cmppd(XMMRegister dst, XMMRegister src, int8_t cmp);
1179  void cmppd(XMMRegister dst, const Operand& src, int8_t cmp);
1180
1181#define SSE_CMP_P(instr, imm8)                                                \
1182  void instr##ps(XMMRegister dst, XMMRegister src) { cmpps(dst, src, imm8); } \
1183  void instr##ps(XMMRegister dst, const Operand& src) {                       \
1184    cmpps(dst, src, imm8);                                                    \
1185  }                                                                           \
1186  void instr##pd(XMMRegister dst, XMMRegister src) { cmppd(dst, src, imm8); } \
1187  void instr##pd(XMMRegister dst, const Operand& src) { cmppd(dst, src, imm8); }
1188
1189  SSE_CMP_P(cmpeq, 0x0);
1190  SSE_CMP_P(cmplt, 0x1);
1191  SSE_CMP_P(cmple, 0x2);
1192  SSE_CMP_P(cmpneq, 0x4);
1193  SSE_CMP_P(cmpnlt, 0x5);
1194  SSE_CMP_P(cmpnle, 0x6);
1195
1196#undef SSE_CMP_P
1197
1198  void minps(XMMRegister dst, XMMRegister src);
1199  void minps(XMMRegister dst, const Operand& src);
1200  void maxps(XMMRegister dst, XMMRegister src);
1201  void maxps(XMMRegister dst, const Operand& src);
1202  void rcpps(XMMRegister dst, XMMRegister src);
1203  void rcpps(XMMRegister dst, const Operand& src);
1204  void rsqrtps(XMMRegister dst, XMMRegister src);
1205  void rsqrtps(XMMRegister dst, const Operand& src);
1206  void sqrtps(XMMRegister dst, XMMRegister src);
1207  void sqrtps(XMMRegister dst, const Operand& src);
1208  void movups(XMMRegister dst, XMMRegister src);
1209  void movups(XMMRegister dst, const Operand& src);
1210  void movups(const Operand& dst, XMMRegister src);
1211  void paddd(XMMRegister dst, XMMRegister src);
1212  void paddd(XMMRegister dst, const Operand& src);
1213  void psubd(XMMRegister dst, XMMRegister src);
1214  void psubd(XMMRegister dst, const Operand& src);
1215  void pmulld(XMMRegister dst, XMMRegister src);
1216  void pmulld(XMMRegister dst, const Operand& src);
1217  void pmuludq(XMMRegister dst, XMMRegister src);
1218  void pmuludq(XMMRegister dst, const Operand& src);
1219  void psrldq(XMMRegister dst, uint8_t shift);
1220  void pshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle);
1221  void cvtps2dq(XMMRegister dst, XMMRegister src);
1222  void cvtps2dq(XMMRegister dst, const Operand& src);
1223  void cvtdq2ps(XMMRegister dst, XMMRegister src);
1224  void cvtdq2ps(XMMRegister dst, const Operand& src);
1225
1226  // AVX instruction
1227  void vfmadd132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1228    vfmasd(0x99, dst, src1, src2);
1229  }
1230  void vfmadd213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1231    vfmasd(0xa9, dst, src1, src2);
1232  }
1233  void vfmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1234    vfmasd(0xb9, dst, src1, src2);
1235  }
1236  void vfmadd132sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1237    vfmasd(0x99, dst, src1, src2);
1238  }
1239  void vfmadd213sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1240    vfmasd(0xa9, dst, src1, src2);
1241  }
1242  void vfmadd231sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1243    vfmasd(0xb9, dst, src1, src2);
1244  }
1245  void vfmsub132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1246    vfmasd(0x9b, dst, src1, src2);
1247  }
1248  void vfmsub213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1249    vfmasd(0xab, dst, src1, src2);
1250  }
1251  void vfmsub231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1252    vfmasd(0xbb, dst, src1, src2);
1253  }
1254  void vfmsub132sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1255    vfmasd(0x9b, dst, src1, src2);
1256  }
1257  void vfmsub213sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1258    vfmasd(0xab, dst, src1, src2);
1259  }
1260  void vfmsub231sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1261    vfmasd(0xbb, dst, src1, src2);
1262  }
1263  void vfnmadd132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1264    vfmasd(0x9d, dst, src1, src2);
1265  }
1266  void vfnmadd213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1267    vfmasd(0xad, dst, src1, src2);
1268  }
1269  void vfnmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1270    vfmasd(0xbd, dst, src1, src2);
1271  }
1272  void vfnmadd132sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1273    vfmasd(0x9d, dst, src1, src2);
1274  }
1275  void vfnmadd213sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1276    vfmasd(0xad, dst, src1, src2);
1277  }
1278  void vfnmadd231sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1279    vfmasd(0xbd, dst, src1, src2);
1280  }
1281  void vfnmsub132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1282    vfmasd(0x9f, dst, src1, src2);
1283  }
1284  void vfnmsub213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1285    vfmasd(0xaf, dst, src1, src2);
1286  }
1287  void vfnmsub231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1288    vfmasd(0xbf, dst, src1, src2);
1289  }
1290  void vfnmsub132sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1291    vfmasd(0x9f, dst, src1, src2);
1292  }
1293  void vfnmsub213sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1294    vfmasd(0xaf, dst, src1, src2);
1295  }
1296  void vfnmsub231sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1297    vfmasd(0xbf, dst, src1, src2);
1298  }
1299  void vfmasd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
1300  void vfmasd(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);
1301
1302  void vfmadd132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1303    vfmass(0x99, dst, src1, src2);
1304  }
1305  void vfmadd213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1306    vfmass(0xa9, dst, src1, src2);
1307  }
1308  void vfmadd231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1309    vfmass(0xb9, dst, src1, src2);
1310  }
1311  void vfmadd132ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1312    vfmass(0x99, dst, src1, src2);
1313  }
1314  void vfmadd213ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1315    vfmass(0xa9, dst, src1, src2);
1316  }
1317  void vfmadd231ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1318    vfmass(0xb9, dst, src1, src2);
1319  }
1320  void vfmsub132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1321    vfmass(0x9b, dst, src1, src2);
1322  }
1323  void vfmsub213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1324    vfmass(0xab, dst, src1, src2);
1325  }
1326  void vfmsub231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1327    vfmass(0xbb, dst, src1, src2);
1328  }
1329  void vfmsub132ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1330    vfmass(0x9b, dst, src1, src2);
1331  }
1332  void vfmsub213ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1333    vfmass(0xab, dst, src1, src2);
1334  }
1335  void vfmsub231ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1336    vfmass(0xbb, dst, src1, src2);
1337  }
1338  void vfnmadd132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1339    vfmass(0x9d, dst, src1, src2);
1340  }
1341  void vfnmadd213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1342    vfmass(0xad, dst, src1, src2);
1343  }
1344  void vfnmadd231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1345    vfmass(0xbd, dst, src1, src2);
1346  }
1347  void vfnmadd132ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1348    vfmass(0x9d, dst, src1, src2);
1349  }
1350  void vfnmadd213ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1351    vfmass(0xad, dst, src1, src2);
1352  }
1353  void vfnmadd231ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1354    vfmass(0xbd, dst, src1, src2);
1355  }
1356  void vfnmsub132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1357    vfmass(0x9f, dst, src1, src2);
1358  }
1359  void vfnmsub213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1360    vfmass(0xaf, dst, src1, src2);
1361  }
1362  void vfnmsub231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1363    vfmass(0xbf, dst, src1, src2);
1364  }
1365  void vfnmsub132ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1366    vfmass(0x9f, dst, src1, src2);
1367  }
1368  void vfnmsub213ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1369    vfmass(0xaf, dst, src1, src2);
1370  }
1371  void vfnmsub231ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1372    vfmass(0xbf, dst, src1, src2);
1373  }
1374  void vfmass(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
1375  void vfmass(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);
1376
1377  void vmovd(XMMRegister dst, Register src);
1378  void vmovd(XMMRegister dst, const Operand& src);
1379  void vmovd(Register dst, XMMRegister src);
1380  void vmovq(XMMRegister dst, Register src);
1381  void vmovq(XMMRegister dst, const Operand& src);
1382  void vmovq(Register dst, XMMRegister src);
1383
1384  void vmovsd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1385    vsd(0x10, dst, src1, src2);
1386  }
1387  void vmovsd(XMMRegister dst, const Operand& src) {
1388    vsd(0x10, dst, xmm0, src);
1389  }
1390  void vmovsd(const Operand& dst, XMMRegister src) {
1391    vsd(0x11, src, xmm0, dst);
1392  }
1393
1394#define AVX_SP_3(instr, opcode) \
1395  AVX_S_3(instr, opcode)        \
1396  AVX_P_3(instr, opcode)
1397
1398#define AVX_S_3(instr, opcode)  \
1399  AVX_3(instr##ss, opcode, vss) \
1400  AVX_3(instr##sd, opcode, vsd)
1401
1402#define AVX_P_3(instr, opcode)  \
1403  AVX_3(instr##ps, opcode, vps) \
1404  AVX_3(instr##pd, opcode, vpd)
1405
1406#define AVX_3(instr, opcode, impl)                                     \
1407  void instr(XMMRegister dst, XMMRegister src1, XMMRegister src2) {    \
1408    impl(opcode, dst, src1, src2);                                     \
1409  }                                                                    \
1410  void instr(XMMRegister dst, XMMRegister src1, const Operand& src2) { \
1411    impl(opcode, dst, src1, src2);                                     \
1412  }
1413
1414  AVX_SP_3(vsqrt, 0x51);
1415  AVX_SP_3(vadd, 0x58);
1416  AVX_SP_3(vsub, 0x5c);
1417  AVX_SP_3(vmul, 0x59);
1418  AVX_SP_3(vdiv, 0x5e);
1419  AVX_SP_3(vmin, 0x5d);
1420  AVX_SP_3(vmax, 0x5f);
1421  AVX_P_3(vand, 0x54);
1422  AVX_P_3(vor, 0x56);
1423  AVX_P_3(vxor, 0x57);
1424  AVX_3(vpcmpeqd, 0x76, vpd);
1425  AVX_3(vcvtsd2ss, 0x5a, vsd);
1426
1427#undef AVX_3
1428#undef AVX_S_3
1429#undef AVX_P_3
1430#undef AVX_SP_3
1431
1432  void vpsrlq(XMMRegister dst, XMMRegister src, byte imm8) {
1433    XMMRegister iop = {2};
1434    vpd(0x73, iop, dst, src);
1435    emit(imm8);
1436  }
1437  void vpsllq(XMMRegister dst, XMMRegister src, byte imm8) {
1438    XMMRegister iop = {6};
1439    vpd(0x73, iop, dst, src);
1440    emit(imm8);
1441  }
1442  void vcvtss2sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1443    vsd(0x5a, dst, src1, src2, kF3, k0F, kWIG);
1444  }
1445  void vcvtss2sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1446    vsd(0x5a, dst, src1, src2, kF3, k0F, kWIG);
1447  }
1448  void vcvtlsi2sd(XMMRegister dst, XMMRegister src1, Register src2) {
1449    XMMRegister isrc2 = {src2.code()};
1450    vsd(0x2a, dst, src1, isrc2, kF2, k0F, kW0);
1451  }
1452  void vcvtlsi2sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1453    vsd(0x2a, dst, src1, src2, kF2, k0F, kW0);
1454  }
1455  void vcvtlsi2ss(XMMRegister dst, XMMRegister src1, Register src2) {
1456    XMMRegister isrc2 = {src2.code()};
1457    vsd(0x2a, dst, src1, isrc2, kF3, k0F, kW0);
1458  }
1459  void vcvtlsi2ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1460    vsd(0x2a, dst, src1, src2, kF3, k0F, kW0);
1461  }
1462  void vcvtqsi2ss(XMMRegister dst, XMMRegister src1, Register src2) {
1463    XMMRegister isrc2 = {src2.code()};
1464    vsd(0x2a, dst, src1, isrc2, kF3, k0F, kW1);
1465  }
1466  void vcvtqsi2ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1467    vsd(0x2a, dst, src1, src2, kF3, k0F, kW1);
1468  }
1469  void vcvtqsi2sd(XMMRegister dst, XMMRegister src1, Register src2) {
1470    XMMRegister isrc2 = {src2.code()};
1471    vsd(0x2a, dst, src1, isrc2, kF2, k0F, kW1);
1472  }
1473  void vcvtqsi2sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1474    vsd(0x2a, dst, src1, src2, kF2, k0F, kW1);
1475  }
1476  void vcvttss2si(Register dst, XMMRegister src) {
1477    XMMRegister idst = {dst.code()};
1478    vsd(0x2c, idst, xmm0, src, kF3, k0F, kW0);
1479  }
1480  void vcvttss2si(Register dst, const Operand& src) {
1481    XMMRegister idst = {dst.code()};
1482    vsd(0x2c, idst, xmm0, src, kF3, k0F, kW0);
1483  }
1484  void vcvttsd2si(Register dst, XMMRegister src) {
1485    XMMRegister idst = {dst.code()};
1486    vsd(0x2c, idst, xmm0, src, kF2, k0F, kW0);
1487  }
1488  void vcvttsd2si(Register dst, const Operand& src) {
1489    XMMRegister idst = {dst.code()};
1490    vsd(0x2c, idst, xmm0, src, kF2, k0F, kW0);
1491  }
1492  void vcvttss2siq(Register dst, XMMRegister src) {
1493    XMMRegister idst = {dst.code()};
1494    vsd(0x2c, idst, xmm0, src, kF3, k0F, kW1);
1495  }
1496  void vcvttss2siq(Register dst, const Operand& src) {
1497    XMMRegister idst = {dst.code()};
1498    vsd(0x2c, idst, xmm0, src, kF3, k0F, kW1);
1499  }
1500  void vcvttsd2siq(Register dst, XMMRegister src) {
1501    XMMRegister idst = {dst.code()};
1502    vsd(0x2c, idst, xmm0, src, kF2, k0F, kW1);
1503  }
1504  void vcvttsd2siq(Register dst, const Operand& src) {
1505    XMMRegister idst = {dst.code()};
1506    vsd(0x2c, idst, xmm0, src, kF2, k0F, kW1);
1507  }
1508  void vcvtsd2si(Register dst, XMMRegister src) {
1509    XMMRegister idst = {dst.code()};
1510    vsd(0x2d, idst, xmm0, src, kF2, k0F, kW0);
1511  }
1512  void vucomisd(XMMRegister dst, XMMRegister src) {
1513    vsd(0x2e, dst, xmm0, src, k66, k0F, kWIG);
1514  }
1515  void vucomisd(XMMRegister dst, const Operand& src) {
1516    vsd(0x2e, dst, xmm0, src, k66, k0F, kWIG);
1517  }
1518  void vroundss(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1519                RoundingMode mode) {
1520    vsd(0x0a, dst, src1, src2, k66, k0F3A, kWIG);
1521    emit(static_cast<byte>(mode) | 0x8);  // Mask precision exception.
1522  }
1523  void vroundsd(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1524                RoundingMode mode) {
1525    vsd(0x0b, dst, src1, src2, k66, k0F3A, kWIG);
1526    emit(static_cast<byte>(mode) | 0x8);  // Mask precision exception.
1527  }
1528
1529  void vsd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1530    vsd(op, dst, src1, src2, kF2, k0F, kWIG);
1531  }
1532  void vsd(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2) {
1533    vsd(op, dst, src1, src2, kF2, k0F, kWIG);
1534  }
1535  void vsd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2,
1536           SIMDPrefix pp, LeadingOpcode m, VexW w);
1537  void vsd(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2,
1538           SIMDPrefix pp, LeadingOpcode m, VexW w);
1539
1540  void vmovss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1541    vss(0x10, dst, src1, src2);
1542  }
1543  void vmovss(XMMRegister dst, const Operand& src) {
1544    vss(0x10, dst, xmm0, src);
1545  }
1546  void vmovss(const Operand& dst, XMMRegister src) {
1547    vss(0x11, src, xmm0, dst);
1548  }
1549  void vucomiss(XMMRegister dst, XMMRegister src);
1550  void vucomiss(XMMRegister dst, const Operand& src);
1551  void vss(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
1552  void vss(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);
1553
1554  void vmovaps(XMMRegister dst, XMMRegister src) { vps(0x28, dst, xmm0, src); }
1555  void vmovups(XMMRegister dst, XMMRegister src) { vps(0x10, dst, xmm0, src); }
1556  void vmovups(XMMRegister dst, const Operand& src) {
1557    vps(0x10, dst, xmm0, src);
1558  }
1559  void vmovups(const Operand& dst, XMMRegister src) {
1560    vps(0x11, src, xmm0, dst);
1561  }
1562  void vmovapd(XMMRegister dst, XMMRegister src) { vpd(0x28, dst, xmm0, src); }
1563  void vmovupd(XMMRegister dst, const Operand& src) {
1564    vpd(0x10, dst, xmm0, src);
1565  }
1566  void vmovupd(const Operand& dst, XMMRegister src) {
1567    vpd(0x11, src, xmm0, dst);
1568  }
1569  void vmovmskps(Register dst, XMMRegister src) {
1570    XMMRegister idst = {dst.code()};
1571    vps(0x50, idst, xmm0, src);
1572  }
1573  void vmovmskpd(Register dst, XMMRegister src) {
1574    XMMRegister idst = {dst.code()};
1575    vpd(0x50, idst, xmm0, src);
1576  }
1577  void vcmpps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int8_t cmp) {
1578    vps(0xC2, dst, src1, src2);
1579    emit(cmp);
1580  }
1581  void vcmpps(XMMRegister dst, XMMRegister src1, const Operand& src2,
1582              int8_t cmp) {
1583    vps(0xC2, dst, src1, src2);
1584    emit(cmp);
1585  }
1586  void vcmppd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int8_t cmp) {
1587    vpd(0xC2, dst, src1, src2);
1588    emit(cmp);
1589  }
1590  void vcmppd(XMMRegister dst, XMMRegister src1, const Operand& src2,
1591              int8_t cmp) {
1592    vpd(0xC2, dst, src1, src2);
1593    emit(cmp);
1594  }
1595
1596#define AVX_CMP_P(instr, imm8)                                             \
1597  void instr##ps(XMMRegister dst, XMMRegister src1, XMMRegister src2) {    \
1598    vcmpps(dst, src1, src2, imm8);                                         \
1599  }                                                                        \
1600  void instr##ps(XMMRegister dst, XMMRegister src1, const Operand& src2) { \
1601    vcmpps(dst, src1, src2, imm8);                                         \
1602  }                                                                        \
1603  void instr##pd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {    \
1604    vcmppd(dst, src1, src2, imm8);                                         \
1605  }                                                                        \
1606  void instr##pd(XMMRegister dst, XMMRegister src1, const Operand& src2) { \
1607    vcmppd(dst, src1, src2, imm8);                                         \
1608  }
1609
1610  AVX_CMP_P(vcmpeq, 0x0);
1611  AVX_CMP_P(vcmplt, 0x1);
1612  AVX_CMP_P(vcmple, 0x2);
1613  AVX_CMP_P(vcmpneq, 0x4);
1614  AVX_CMP_P(vcmpnlt, 0x5);
1615  AVX_CMP_P(vcmpnle, 0x6);
1616
1617#undef AVX_CMP_P
1618
1619  void vps(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
1620  void vps(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);
1621  void vpd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
1622  void vpd(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);
1623
1624  // BMI instruction
1625  void andnq(Register dst, Register src1, Register src2) {
1626    bmi1q(0xf2, dst, src1, src2);
1627  }
1628  void andnq(Register dst, Register src1, const Operand& src2) {
1629    bmi1q(0xf2, dst, src1, src2);
1630  }
1631  void andnl(Register dst, Register src1, Register src2) {
1632    bmi1l(0xf2, dst, src1, src2);
1633  }
1634  void andnl(Register dst, Register src1, const Operand& src2) {
1635    bmi1l(0xf2, dst, src1, src2);
1636  }
1637  void bextrq(Register dst, Register src1, Register src2) {
1638    bmi1q(0xf7, dst, src2, src1);
1639  }
1640  void bextrq(Register dst, const Operand& src1, Register src2) {
1641    bmi1q(0xf7, dst, src2, src1);
1642  }
1643  void bextrl(Register dst, Register src1, Register src2) {
1644    bmi1l(0xf7, dst, src2, src1);
1645  }
1646  void bextrl(Register dst, const Operand& src1, Register src2) {
1647    bmi1l(0xf7, dst, src2, src1);
1648  }
1649  void blsiq(Register dst, Register src) {
1650    Register ireg = {3};
1651    bmi1q(0xf3, ireg, dst, src);
1652  }
1653  void blsiq(Register dst, const Operand& src) {
1654    Register ireg = {3};
1655    bmi1q(0xf3, ireg, dst, src);
1656  }
1657  void blsil(Register dst, Register src) {
1658    Register ireg = {3};
1659    bmi1l(0xf3, ireg, dst, src);
1660  }
1661  void blsil(Register dst, const Operand& src) {
1662    Register ireg = {3};
1663    bmi1l(0xf3, ireg, dst, src);
1664  }
1665  void blsmskq(Register dst, Register src) {
1666    Register ireg = {2};
1667    bmi1q(0xf3, ireg, dst, src);
1668  }
1669  void blsmskq(Register dst, const Operand& src) {
1670    Register ireg = {2};
1671    bmi1q(0xf3, ireg, dst, src);
1672  }
1673  void blsmskl(Register dst, Register src) {
1674    Register ireg = {2};
1675    bmi1l(0xf3, ireg, dst, src);
1676  }
1677  void blsmskl(Register dst, const Operand& src) {
1678    Register ireg = {2};
1679    bmi1l(0xf3, ireg, dst, src);
1680  }
1681  void blsrq(Register dst, Register src) {
1682    Register ireg = {1};
1683    bmi1q(0xf3, ireg, dst, src);
1684  }
1685  void blsrq(Register dst, const Operand& src) {
1686    Register ireg = {1};
1687    bmi1q(0xf3, ireg, dst, src);
1688  }
1689  void blsrl(Register dst, Register src) {
1690    Register ireg = {1};
1691    bmi1l(0xf3, ireg, dst, src);
1692  }
1693  void blsrl(Register dst, const Operand& src) {
1694    Register ireg = {1};
1695    bmi1l(0xf3, ireg, dst, src);
1696  }
1697  void tzcntq(Register dst, Register src);
1698  void tzcntq(Register dst, const Operand& src);
1699  void tzcntl(Register dst, Register src);
1700  void tzcntl(Register dst, const Operand& src);
1701
1702  void lzcntq(Register dst, Register src);
1703  void lzcntq(Register dst, const Operand& src);
1704  void lzcntl(Register dst, Register src);
1705  void lzcntl(Register dst, const Operand& src);
1706
1707  void popcntq(Register dst, Register src);
1708  void popcntq(Register dst, const Operand& src);
1709  void popcntl(Register dst, Register src);
1710  void popcntl(Register dst, const Operand& src);
1711
1712  void bzhiq(Register dst, Register src1, Register src2) {
1713    bmi2q(kNone, 0xf5, dst, src2, src1);
1714  }
1715  void bzhiq(Register dst, const Operand& src1, Register src2) {
1716    bmi2q(kNone, 0xf5, dst, src2, src1);
1717  }
1718  void bzhil(Register dst, Register src1, Register src2) {
1719    bmi2l(kNone, 0xf5, dst, src2, src1);
1720  }
1721  void bzhil(Register dst, const Operand& src1, Register src2) {
1722    bmi2l(kNone, 0xf5, dst, src2, src1);
1723  }
1724  void mulxq(Register dst1, Register dst2, Register src) {
1725    bmi2q(kF2, 0xf6, dst1, dst2, src);
1726  }
1727  void mulxq(Register dst1, Register dst2, const Operand& src) {
1728    bmi2q(kF2, 0xf6, dst1, dst2, src);
1729  }
1730  void mulxl(Register dst1, Register dst2, Register src) {
1731    bmi2l(kF2, 0xf6, dst1, dst2, src);
1732  }
1733  void mulxl(Register dst1, Register dst2, const Operand& src) {
1734    bmi2l(kF2, 0xf6, dst1, dst2, src);
1735  }
1736  void pdepq(Register dst, Register src1, Register src2) {
1737    bmi2q(kF2, 0xf5, dst, src1, src2);
1738  }
1739  void pdepq(Register dst, Register src1, const Operand& src2) {
1740    bmi2q(kF2, 0xf5, dst, src1, src2);
1741  }
1742  void pdepl(Register dst, Register src1, Register src2) {
1743    bmi2l(kF2, 0xf5, dst, src1, src2);
1744  }
1745  void pdepl(Register dst, Register src1, const Operand& src2) {
1746    bmi2l(kF2, 0xf5, dst, src1, src2);
1747  }
1748  void pextq(Register dst, Register src1, Register src2) {
1749    bmi2q(kF3, 0xf5, dst, src1, src2);
1750  }
1751  void pextq(Register dst, Register src1, const Operand& src2) {
1752    bmi2q(kF3, 0xf5, dst, src1, src2);
1753  }
1754  void pextl(Register dst, Register src1, Register src2) {
1755    bmi2l(kF3, 0xf5, dst, src1, src2);
1756  }
1757  void pextl(Register dst, Register src1, const Operand& src2) {
1758    bmi2l(kF3, 0xf5, dst, src1, src2);
1759  }
1760  void sarxq(Register dst, Register src1, Register src2) {
1761    bmi2q(kF3, 0xf7, dst, src2, src1);
1762  }
1763  void sarxq(Register dst, const Operand& src1, Register src2) {
1764    bmi2q(kF3, 0xf7, dst, src2, src1);
1765  }
1766  void sarxl(Register dst, Register src1, Register src2) {
1767    bmi2l(kF3, 0xf7, dst, src2, src1);
1768  }
1769  void sarxl(Register dst, const Operand& src1, Register src2) {
1770    bmi2l(kF3, 0xf7, dst, src2, src1);
1771  }
1772  void shlxq(Register dst, Register src1, Register src2) {
1773    bmi2q(k66, 0xf7, dst, src2, src1);
1774  }
1775  void shlxq(Register dst, const Operand& src1, Register src2) {
1776    bmi2q(k66, 0xf7, dst, src2, src1);
1777  }
1778  void shlxl(Register dst, Register src1, Register src2) {
1779    bmi2l(k66, 0xf7, dst, src2, src1);
1780  }
1781  void shlxl(Register dst, const Operand& src1, Register src2) {
1782    bmi2l(k66, 0xf7, dst, src2, src1);
1783  }
1784  void shrxq(Register dst, Register src1, Register src2) {
1785    bmi2q(kF2, 0xf7, dst, src2, src1);
1786  }
1787  void shrxq(Register dst, const Operand& src1, Register src2) {
1788    bmi2q(kF2, 0xf7, dst, src2, src1);
1789  }
1790  void shrxl(Register dst, Register src1, Register src2) {
1791    bmi2l(kF2, 0xf7, dst, src2, src1);
1792  }
1793  void shrxl(Register dst, const Operand& src1, Register src2) {
1794    bmi2l(kF2, 0xf7, dst, src2, src1);
1795  }
1796  void rorxq(Register dst, Register src, byte imm8);
1797  void rorxq(Register dst, const Operand& src, byte imm8);
1798  void rorxl(Register dst, Register src, byte imm8);
1799  void rorxl(Register dst, const Operand& src, byte imm8);
1800
1801  // Check the code size generated from label to here.
1802  int SizeOfCodeGeneratedSince(Label* label) {
1803    return pc_offset() - label->pos();
1804  }
1805
1806  // Mark generator continuation.
1807  void RecordGeneratorContinuation();
1808
1809  // Mark address of a debug break slot.
1810  void RecordDebugBreakSlot(RelocInfo::Mode mode);
1811
1812  // Record a comment relocation entry that can be used by a disassembler.
1813  // Use --code-comments to enable.
1814  void RecordComment(const char* msg);
1815
1816  // Record a deoptimization reason that can be used by a log or cpu profiler.
1817  // Use --trace-deopt to enable.
1818  void RecordDeoptReason(DeoptimizeReason reason, int raw_position, int id);
1819
1820  void PatchConstantPoolAccessInstruction(int pc_offset, int offset,
1821                                          ConstantPoolEntry::Access access,
1822                                          ConstantPoolEntry::Type type) {
1823    // No embedded constant pool support.
1824    UNREACHABLE();
1825  }
1826
1827  // Writes a single word of data in the code stream.
1828  // Used for inline tables, e.g., jump-tables.
1829  void db(uint8_t data);
1830  void dd(uint32_t data);
1831  void dq(uint64_t data);
1832  void dp(uintptr_t data) { dq(data); }
1833  void dq(Label* label);
1834
1835  // Check if there is less than kGap bytes available in the buffer.
1836  // If this is the case, we need to grow the buffer before emitting
1837  // an instruction or relocation information.
1838  inline bool buffer_overflow() const {
1839    return pc_ >= reloc_info_writer.pos() - kGap;
1840  }
1841
1842  // Get the number of bytes available in the buffer.
1843  inline int available_space() const {
1844    return static_cast<int>(reloc_info_writer.pos() - pc_);
1845  }
1846
1847  static bool IsNop(Address addr);
1848
1849  // Avoid overflows for displacements etc.
1850  static const int kMaximalBufferSize = 512*MB;
1851
1852  byte byte_at(int pos)  { return buffer_[pos]; }
1853  void set_byte_at(int pos, byte value) { buffer_[pos] = value; }
1854
1855 protected:
1856  // Call near indirect
1857  void call(const Operand& operand);
1858
1859 private:
1860  byte* addr_at(int pos)  { return buffer_ + pos; }
1861  uint32_t long_at(int pos)  {
1862    return *reinterpret_cast<uint32_t*>(addr_at(pos));
1863  }
1864  void long_at_put(int pos, uint32_t x)  {
1865    *reinterpret_cast<uint32_t*>(addr_at(pos)) = x;
1866  }
1867
1868  // code emission
1869  void GrowBuffer();
1870
1871  void emit(byte x) { *pc_++ = x; }
1872  inline void emitl(uint32_t x);
1873  inline void emitp(void* x, RelocInfo::Mode rmode);
1874  inline void emitq(uint64_t x);
1875  inline void emitw(uint16_t x);
1876  inline void emit_code_target(Handle<Code> target,
1877                               RelocInfo::Mode rmode,
1878                               TypeFeedbackId ast_id = TypeFeedbackId::None());
1879  inline void emit_runtime_entry(Address entry, RelocInfo::Mode rmode);
1880  void emit(Immediate x) {
1881    if (!RelocInfo::IsNone(x.rmode_)) {
1882      RecordRelocInfo(x.rmode_);
1883    }
1884    emitl(x.value_);
1885  }
1886
1887  // Emits a REX prefix that encodes a 64-bit operand size and
1888  // the top bit of both register codes.
1889  // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B.
1890  // REX.W is set.
1891  inline void emit_rex_64(XMMRegister reg, Register rm_reg);
1892  inline void emit_rex_64(Register reg, XMMRegister rm_reg);
1893  inline void emit_rex_64(Register reg, Register rm_reg);
1894
1895  // Emits a REX prefix that encodes a 64-bit operand size and
1896  // the top bit of the destination, index, and base register codes.
1897  // The high bit of reg is used for REX.R, the high bit of op's base
1898  // register is used for REX.B, and the high bit of op's index register
1899  // is used for REX.X.  REX.W is set.
1900  inline void emit_rex_64(Register reg, const Operand& op);
1901  inline void emit_rex_64(XMMRegister reg, const Operand& op);
1902
1903  // Emits a REX prefix that encodes a 64-bit operand size and
1904  // the top bit of the register code.
1905  // The high bit of register is used for REX.B.
1906  // REX.W is set and REX.R and REX.X are clear.
1907  inline void emit_rex_64(Register rm_reg);
1908
1909  // Emits a REX prefix that encodes a 64-bit operand size and
1910  // the top bit of the index and base register codes.
1911  // The high bit of op's base register is used for REX.B, and the high
1912  // bit of op's index register is used for REX.X.
1913  // REX.W is set and REX.R clear.
1914  inline void emit_rex_64(const Operand& op);
1915
1916  // Emit a REX prefix that only sets REX.W to choose a 64-bit operand size.
1917  void emit_rex_64() { emit(0x48); }
1918
1919  // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B.
1920  // REX.W is clear.
1921  inline void emit_rex_32(Register reg, Register rm_reg);
1922
1923  // The high bit of reg is used for REX.R, the high bit of op's base
1924  // register is used for REX.B, and the high bit of op's index register
1925  // is used for REX.X.  REX.W is cleared.
1926  inline void emit_rex_32(Register reg, const Operand& op);
1927
1928  // High bit of rm_reg goes to REX.B.
1929  // REX.W, REX.R and REX.X are clear.
1930  inline void emit_rex_32(Register rm_reg);
1931
1932  // High bit of base goes to REX.B and high bit of index to REX.X.
1933  // REX.W and REX.R are clear.
1934  inline void emit_rex_32(const Operand& op);
1935
1936  // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B.
1937  // REX.W is cleared.  If no REX bits are set, no byte is emitted.
1938  inline void emit_optional_rex_32(Register reg, Register rm_reg);
1939
1940  // The high bit of reg is used for REX.R, the high bit of op's base
1941  // register is used for REX.B, and the high bit of op's index register
1942  // is used for REX.X.  REX.W is cleared.  If no REX bits are set, nothing
1943  // is emitted.
1944  inline void emit_optional_rex_32(Register reg, const Operand& op);
1945
1946  // As for emit_optional_rex_32(Register, Register), except that
1947  // the registers are XMM registers.
1948  inline void emit_optional_rex_32(XMMRegister reg, XMMRegister base);
1949
1950  // As for emit_optional_rex_32(Register, Register), except that
1951  // one of the registers is an XMM registers.
1952  inline void emit_optional_rex_32(XMMRegister reg, Register base);
1953
1954  // As for emit_optional_rex_32(Register, Register), except that
1955  // one of the registers is an XMM registers.
1956  inline void emit_optional_rex_32(Register reg, XMMRegister base);
1957
1958  // As for emit_optional_rex_32(Register, const Operand&), except that
1959  // the register is an XMM register.
1960  inline void emit_optional_rex_32(XMMRegister reg, const Operand& op);
1961
1962  // Optionally do as emit_rex_32(Register) if the register number has
1963  // the high bit set.
1964  inline void emit_optional_rex_32(Register rm_reg);
1965  inline void emit_optional_rex_32(XMMRegister rm_reg);
1966
1967  // Optionally do as emit_rex_32(const Operand&) if the operand register
1968  // numbers have a high bit set.
1969  inline void emit_optional_rex_32(const Operand& op);
1970
1971  void emit_rex(int size) {
1972    if (size == kInt64Size) {
1973      emit_rex_64();
1974    } else {
1975      DCHECK(size == kInt32Size);
1976    }
1977  }
1978
1979  template<class P1>
1980  void emit_rex(P1 p1, int size) {
1981    if (size == kInt64Size) {
1982      emit_rex_64(p1);
1983    } else {
1984      DCHECK(size == kInt32Size);
1985      emit_optional_rex_32(p1);
1986    }
1987  }
1988
1989  template<class P1, class P2>
1990  void emit_rex(P1 p1, P2 p2, int size) {
1991    if (size == kInt64Size) {
1992      emit_rex_64(p1, p2);
1993    } else {
1994      DCHECK(size == kInt32Size);
1995      emit_optional_rex_32(p1, p2);
1996    }
1997  }
1998
1999  // Emit vex prefix
2000  void emit_vex2_byte0() { emit(0xc5); }
2001  inline void emit_vex2_byte1(XMMRegister reg, XMMRegister v, VectorLength l,
2002                              SIMDPrefix pp);
2003  void emit_vex3_byte0() { emit(0xc4); }
2004  inline void emit_vex3_byte1(XMMRegister reg, XMMRegister rm, LeadingOpcode m);
2005  inline void emit_vex3_byte1(XMMRegister reg, const Operand& rm,
2006                              LeadingOpcode m);
2007  inline void emit_vex3_byte2(VexW w, XMMRegister v, VectorLength l,
2008                              SIMDPrefix pp);
2009  inline void emit_vex_prefix(XMMRegister reg, XMMRegister v, XMMRegister rm,
2010                              VectorLength l, SIMDPrefix pp, LeadingOpcode m,
2011                              VexW w);
2012  inline void emit_vex_prefix(Register reg, Register v, Register rm,
2013                              VectorLength l, SIMDPrefix pp, LeadingOpcode m,
2014                              VexW w);
2015  inline void emit_vex_prefix(XMMRegister reg, XMMRegister v, const Operand& rm,
2016                              VectorLength l, SIMDPrefix pp, LeadingOpcode m,
2017                              VexW w);
2018  inline void emit_vex_prefix(Register reg, Register v, const Operand& rm,
2019                              VectorLength l, SIMDPrefix pp, LeadingOpcode m,
2020                              VexW w);
2021
2022  // Emit the ModR/M byte, and optionally the SIB byte and
2023  // 1- or 4-byte offset for a memory operand.  Also encodes
2024  // the second operand of the operation, a register or operation
2025  // subcode, into the reg field of the ModR/M byte.
2026  void emit_operand(Register reg, const Operand& adr) {
2027    emit_operand(reg.low_bits(), adr);
2028  }
2029
2030  // Emit the ModR/M byte, and optionally the SIB byte and
2031  // 1- or 4-byte offset for a memory operand.  Also used to encode
2032  // a three-bit opcode extension into the ModR/M byte.
2033  void emit_operand(int rm, const Operand& adr);
2034
2035  // Emit a ModR/M byte with registers coded in the reg and rm_reg fields.
2036  void emit_modrm(Register reg, Register rm_reg) {
2037    emit(0xC0 | reg.low_bits() << 3 | rm_reg.low_bits());
2038  }
2039
2040  // Emit a ModR/M byte with an operation subcode in the reg field and
2041  // a register in the rm_reg field.
2042  void emit_modrm(int code, Register rm_reg) {
2043    DCHECK(is_uint3(code));
2044    emit(0xC0 | code << 3 | rm_reg.low_bits());
2045  }
2046
2047  // Emit the code-object-relative offset of the label's position
2048  inline void emit_code_relative_offset(Label* label);
2049
2050  // The first argument is the reg field, the second argument is the r/m field.
2051  void emit_sse_operand(XMMRegister dst, XMMRegister src);
2052  void emit_sse_operand(XMMRegister reg, const Operand& adr);
2053  void emit_sse_operand(Register reg, const Operand& adr);
2054  void emit_sse_operand(XMMRegister dst, Register src);
2055  void emit_sse_operand(Register dst, XMMRegister src);
2056  void emit_sse_operand(XMMRegister dst);
2057
2058  // Emit machine code for one of the operations ADD, ADC, SUB, SBC,
2059  // AND, OR, XOR, or CMP.  The encodings of these operations are all
2060  // similar, differing just in the opcode or in the reg field of the
2061  // ModR/M byte.
2062  void arithmetic_op_8(byte opcode, Register reg, Register rm_reg);
2063  void arithmetic_op_8(byte opcode, Register reg, const Operand& rm_reg);
2064  void arithmetic_op_16(byte opcode, Register reg, Register rm_reg);
2065  void arithmetic_op_16(byte opcode, Register reg, const Operand& rm_reg);
2066  // Operate on operands/registers with pointer size, 32-bit or 64-bit size.
2067  void arithmetic_op(byte opcode, Register reg, Register rm_reg, int size);
2068  void arithmetic_op(byte opcode,
2069                     Register reg,
2070                     const Operand& rm_reg,
2071                     int size);
2072  // Operate on a byte in memory or register.
2073  void immediate_arithmetic_op_8(byte subcode,
2074                                 Register dst,
2075                                 Immediate src);
2076  void immediate_arithmetic_op_8(byte subcode,
2077                                 const Operand& dst,
2078                                 Immediate src);
2079  // Operate on a word in memory or register.
2080  void immediate_arithmetic_op_16(byte subcode,
2081                                  Register dst,
2082                                  Immediate src);
2083  void immediate_arithmetic_op_16(byte subcode,
2084                                  const Operand& dst,
2085                                  Immediate src);
2086  // Operate on operands/registers with pointer size, 32-bit or 64-bit size.
2087  void immediate_arithmetic_op(byte subcode,
2088                               Register dst,
2089                               Immediate src,
2090                               int size);
2091  void immediate_arithmetic_op(byte subcode,
2092                               const Operand& dst,
2093                               Immediate src,
2094                               int size);
2095
2096  // Emit machine code for a shift operation.
2097  void shift(Operand dst, Immediate shift_amount, int subcode, int size);
2098  void shift(Register dst, Immediate shift_amount, int subcode, int size);
2099  // Shift dst by cl % 64 bits.
2100  void shift(Register dst, int subcode, int size);
2101  void shift(Operand dst, int subcode, int size);
2102
2103  void emit_farith(int b1, int b2, int i);
2104
2105  // labels
2106  // void print(Label* L);
2107  void bind_to(Label* L, int pos);
2108
2109  // record reloc info for current pc_
2110  void RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data = 0);
2111
2112  // Arithmetics
2113  void emit_add(Register dst, Register src, int size) {
2114    arithmetic_op(0x03, dst, src, size);
2115  }
2116
2117  void emit_add(Register dst, Immediate src, int size) {
2118    immediate_arithmetic_op(0x0, dst, src, size);
2119  }
2120
2121  void emit_add(Register dst, const Operand& src, int size) {
2122    arithmetic_op(0x03, dst, src, size);
2123  }
2124
2125  void emit_add(const Operand& dst, Register src, int size) {
2126    arithmetic_op(0x1, src, dst, size);
2127  }
2128
2129  void emit_add(const Operand& dst, Immediate src, int size) {
2130    immediate_arithmetic_op(0x0, dst, src, size);
2131  }
2132
2133  void emit_and(Register dst, Register src, int size) {
2134    arithmetic_op(0x23, dst, src, size);
2135  }
2136
2137  void emit_and(Register dst, const Operand& src, int size) {
2138    arithmetic_op(0x23, dst, src, size);
2139  }
2140
2141  void emit_and(const Operand& dst, Register src, int size) {
2142    arithmetic_op(0x21, src, dst, size);
2143  }
2144
2145  void emit_and(Register dst, Immediate src, int size) {
2146    immediate_arithmetic_op(0x4, dst, src, size);
2147  }
2148
2149  void emit_and(const Operand& dst, Immediate src, int size) {
2150    immediate_arithmetic_op(0x4, dst, src, size);
2151  }
2152
2153  void emit_cmp(Register dst, Register src, int size) {
2154    arithmetic_op(0x3B, dst, src, size);
2155  }
2156
2157  void emit_cmp(Register dst, const Operand& src, int size) {
2158    arithmetic_op(0x3B, dst, src, size);
2159  }
2160
2161  void emit_cmp(const Operand& dst, Register src, int size) {
2162    arithmetic_op(0x39, src, dst, size);
2163  }
2164
2165  void emit_cmp(Register dst, Immediate src, int size) {
2166    immediate_arithmetic_op(0x7, dst, src, size);
2167  }
2168
2169  void emit_cmp(const Operand& dst, Immediate src, int size) {
2170    immediate_arithmetic_op(0x7, dst, src, size);
2171  }
2172
2173  // Compare {al,ax,eax,rax} with src.  If equal, set ZF and write dst into
2174  // src. Otherwise clear ZF and write src into {al,ax,eax,rax}.  This
2175  // operation is only atomic if prefixed by the lock instruction.
2176  void emit_cmpxchg(const Operand& dst, Register src, int size);
2177
2178  void emit_dec(Register dst, int size);
2179  void emit_dec(const Operand& dst, int size);
2180
2181  // Divide rdx:rax by src.  Quotient in rax, remainder in rdx when size is 64.
2182  // Divide edx:eax by lower 32 bits of src.  Quotient in eax, remainder in edx
2183  // when size is 32.
2184  void emit_idiv(Register src, int size);
2185  void emit_div(Register src, int size);
2186
2187  // Signed multiply instructions.
2188  // rdx:rax = rax * src when size is 64 or edx:eax = eax * src when size is 32.
2189  void emit_imul(Register src, int size);
2190  void emit_imul(const Operand& src, int size);
2191  void emit_imul(Register dst, Register src, int size);
2192  void emit_imul(Register dst, const Operand& src, int size);
2193  void emit_imul(Register dst, Register src, Immediate imm, int size);
2194  void emit_imul(Register dst, const Operand& src, Immediate imm, int size);
2195
2196  void emit_inc(Register dst, int size);
2197  void emit_inc(const Operand& dst, int size);
2198
2199  void emit_lea(Register dst, const Operand& src, int size);
2200
2201  void emit_mov(Register dst, const Operand& src, int size);
2202  void emit_mov(Register dst, Register src, int size);
2203  void emit_mov(const Operand& dst, Register src, int size);
2204  void emit_mov(Register dst, Immediate value, int size);
2205  void emit_mov(const Operand& dst, Immediate value, int size);
2206
2207  void emit_movzxb(Register dst, const Operand& src, int size);
2208  void emit_movzxb(Register dst, Register src, int size);
2209  void emit_movzxw(Register dst, const Operand& src, int size);
2210  void emit_movzxw(Register dst, Register src, int size);
2211
2212  void emit_neg(Register dst, int size);
2213  void emit_neg(const Operand& dst, int size);
2214
2215  void emit_not(Register dst, int size);
2216  void emit_not(const Operand& dst, int size);
2217
2218  void emit_or(Register dst, Register src, int size) {
2219    arithmetic_op(0x0B, dst, src, size);
2220  }
2221
2222  void emit_or(Register dst, const Operand& src, int size) {
2223    arithmetic_op(0x0B, dst, src, size);
2224  }
2225
2226  void emit_or(const Operand& dst, Register src, int size) {
2227    arithmetic_op(0x9, src, dst, size);
2228  }
2229
2230  void emit_or(Register dst, Immediate src, int size) {
2231    immediate_arithmetic_op(0x1, dst, src, size);
2232  }
2233
2234  void emit_or(const Operand& dst, Immediate src, int size) {
2235    immediate_arithmetic_op(0x1, dst, src, size);
2236  }
2237
2238  void emit_repmovs(int size);
2239
2240  void emit_sbb(Register dst, Register src, int size) {
2241    arithmetic_op(0x1b, dst, src, size);
2242  }
2243
2244  void emit_sub(Register dst, Register src, int size) {
2245    arithmetic_op(0x2B, dst, src, size);
2246  }
2247
2248  void emit_sub(Register dst, Immediate src, int size) {
2249    immediate_arithmetic_op(0x5, dst, src, size);
2250  }
2251
2252  void emit_sub(Register dst, const Operand& src, int size) {
2253    arithmetic_op(0x2B, dst, src, size);
2254  }
2255
2256  void emit_sub(const Operand& dst, Register src, int size) {
2257    arithmetic_op(0x29, src, dst, size);
2258  }
2259
2260  void emit_sub(const Operand& dst, Immediate src, int size) {
2261    immediate_arithmetic_op(0x5, dst, src, size);
2262  }
2263
2264  void emit_test(Register dst, Register src, int size);
2265  void emit_test(Register reg, Immediate mask, int size);
2266  void emit_test(const Operand& op, Register reg, int size);
2267  void emit_test(const Operand& op, Immediate mask, int size);
2268  void emit_test(Register reg, const Operand& op, int size) {
2269    return emit_test(op, reg, size);
2270  }
2271
2272  void emit_xchg(Register dst, Register src, int size);
2273  void emit_xchg(Register dst, const Operand& src, int size);
2274
2275  void emit_xor(Register dst, Register src, int size) {
2276    if (size == kInt64Size && dst.code() == src.code()) {
2277    // 32 bit operations zero the top 32 bits of 64 bit registers. Therefore
2278    // there is no need to make this a 64 bit operation.
2279      arithmetic_op(0x33, dst, src, kInt32Size);
2280    } else {
2281      arithmetic_op(0x33, dst, src, size);
2282    }
2283  }
2284
2285  void emit_xor(Register dst, const Operand& src, int size) {
2286    arithmetic_op(0x33, dst, src, size);
2287  }
2288
2289  void emit_xor(Register dst, Immediate src, int size) {
2290    immediate_arithmetic_op(0x6, dst, src, size);
2291  }
2292
2293  void emit_xor(const Operand& dst, Immediate src, int size) {
2294    immediate_arithmetic_op(0x6, dst, src, size);
2295  }
2296
2297  void emit_xor(const Operand& dst, Register src, int size) {
2298    arithmetic_op(0x31, src, dst, size);
2299  }
2300
2301  // Most BMI instructions are similiar.
2302  void bmi1q(byte op, Register reg, Register vreg, Register rm);
2303  void bmi1q(byte op, Register reg, Register vreg, const Operand& rm);
2304  void bmi1l(byte op, Register reg, Register vreg, Register rm);
2305  void bmi1l(byte op, Register reg, Register vreg, const Operand& rm);
2306  void bmi2q(SIMDPrefix pp, byte op, Register reg, Register vreg, Register rm);
2307  void bmi2q(SIMDPrefix pp, byte op, Register reg, Register vreg,
2308             const Operand& rm);
2309  void bmi2l(SIMDPrefix pp, byte op, Register reg, Register vreg, Register rm);
2310  void bmi2l(SIMDPrefix pp, byte op, Register reg, Register vreg,
2311             const Operand& rm);
2312
2313  friend class CodePatcher;
2314  friend class EnsureSpace;
2315  friend class RegExpMacroAssemblerX64;
2316
2317  // code generation
2318  RelocInfoWriter reloc_info_writer;
2319
2320  // Internal reference positions, required for (potential) patching in
2321  // GrowBuffer(); contains only those internal references whose labels
2322  // are already bound.
2323  std::deque<int> internal_reference_positions_;
2324
2325  List< Handle<Code> > code_targets_;
2326};
2327
2328
2329// Helper class that ensures that there is enough space for generating
2330// instructions and relocation information.  The constructor makes
2331// sure that there is enough space and (in debug mode) the destructor
2332// checks that we did not generate too much.
2333class EnsureSpace BASE_EMBEDDED {
2334 public:
2335  explicit EnsureSpace(Assembler* assembler) : assembler_(assembler) {
2336    if (assembler_->buffer_overflow()) assembler_->GrowBuffer();
2337#ifdef DEBUG
2338    space_before_ = assembler_->available_space();
2339#endif
2340  }
2341
2342#ifdef DEBUG
2343  ~EnsureSpace() {
2344    int bytes_generated = space_before_ - assembler_->available_space();
2345    DCHECK(bytes_generated < assembler_->kGap);
2346  }
2347#endif
2348
2349 private:
2350  Assembler* assembler_;
2351#ifdef DEBUG
2352  int space_before_;
2353#endif
2354};
2355
2356}  // namespace internal
2357}  // namespace v8
2358
2359#endif  // V8_X64_ASSEMBLER_X64_H_
2360