1// Copyright (c) 1994-2006 Sun Microsystems Inc.
2// All Rights Reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8// - Redistributions of source code must retain the above copyright notice,
9// this list of conditions and the following disclaimer.
10//
11// - Redistribution in binary form must reproduce the above copyright
12// notice, this list of conditions and the following disclaimer in the
13// documentation and/or other materials provided with the distribution.
14//
15// - Neither the name of Sun Microsystems or the names of contributors may
16// be used to endorse or promote products derived from this software without
17// specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
20// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// The original source code covered by the above license above has been
32// modified significantly by Google Inc.
33// Copyright 2012 the V8 project authors. All rights reserved.
34
35// A lightweight X64 Assembler.
36
37#ifndef V8_X64_ASSEMBLER_X64_H_
38#define V8_X64_ASSEMBLER_X64_H_
39
40#include "src/serialize.h"
41
42namespace v8 {
43namespace internal {
44
45// Utility functions
46
47// CPU Registers.
48//
49// 1) We would prefer to use an enum, but enum values are assignment-
50// compatible with int, which has caused code-generation bugs.
51//
52// 2) We would prefer to use a class instead of a struct but we don't like
53// the register initialization to depend on the particular initialization
54// order (which appears to be different on OS X, Linux, and Windows for the
55// installed versions of C++ we tried). Using a struct permits C-style
56// "initialization". Also, the Register objects cannot be const as this
57// forces initialization stubs in MSVC, making us dependent on initialization
58// order.
59//
60// 3) By not using an enum, we are possibly preventing the compiler from
61// doing certain constant folds, which may significantly reduce the
62// code generated for some assembly instructions (because they boil down
63// to a few constants). If this is a problem, we could change the code
64// such that we use an enum in optimized mode, and the struct in debug
65// mode. This way we get the compile-time error checking in debug mode
66// and best performance in optimized code.
67//
68
69struct Register {
70  // The non-allocatable registers are:
71  //  rsp - stack pointer
72  //  rbp - frame pointer
73  //  r10 - fixed scratch register
74  //  r12 - smi constant register
75  //  r13 - root register
76  static const int kMaxNumAllocatableRegisters = 11;
77  static int NumAllocatableRegisters() {
78    return kMaxNumAllocatableRegisters;
79  }
80  static const int kNumRegisters = 16;
81
82  static int ToAllocationIndex(Register reg) {
83    return kAllocationIndexByRegisterCode[reg.code()];
84  }
85
86  static Register FromAllocationIndex(int index) {
87    DCHECK(index >= 0 && index < kMaxNumAllocatableRegisters);
88    Register result = { kRegisterCodeByAllocationIndex[index] };
89    return result;
90  }
91
92  static const char* AllocationIndexToString(int index) {
93    DCHECK(index >= 0 && index < kMaxNumAllocatableRegisters);
94    const char* const names[] = {
95      "rax",
96      "rbx",
97      "rdx",
98      "rcx",
99      "rsi",
100      "rdi",
101      "r8",
102      "r9",
103      "r11",
104      "r14",
105      "r15"
106    };
107    return names[index];
108  }
109
110  static Register from_code(int code) {
111    Register r = { code };
112    return r;
113  }
114  bool is_valid() const { return 0 <= code_ && code_ < kNumRegisters; }
115  bool is(Register reg) const { return code_ == reg.code_; }
116  // rax, rbx, rcx and rdx are byte registers, the rest are not.
117  bool is_byte_register() const { return code_ <= 3; }
118  int code() const {
119    DCHECK(is_valid());
120    return code_;
121  }
122  int bit() const {
123    return 1 << code_;
124  }
125
126  // Return the high bit of the register code as a 0 or 1.  Used often
127  // when constructing the REX prefix byte.
128  int high_bit() const {
129    return code_ >> 3;
130  }
131  // Return the 3 low bits of the register code.  Used when encoding registers
132  // in modR/M, SIB, and opcode bytes.
133  int low_bits() const {
134    return code_ & 0x7;
135  }
136
137  // Unfortunately we can't make this private in a struct when initializing
138  // by assignment.
139  int code_;
140
141 private:
142  static const int kRegisterCodeByAllocationIndex[kMaxNumAllocatableRegisters];
143  static const int kAllocationIndexByRegisterCode[kNumRegisters];
144};
145
146const int kRegister_rax_Code = 0;
147const int kRegister_rcx_Code = 1;
148const int kRegister_rdx_Code = 2;
149const int kRegister_rbx_Code = 3;
150const int kRegister_rsp_Code = 4;
151const int kRegister_rbp_Code = 5;
152const int kRegister_rsi_Code = 6;
153const int kRegister_rdi_Code = 7;
154const int kRegister_r8_Code = 8;
155const int kRegister_r9_Code = 9;
156const int kRegister_r10_Code = 10;
157const int kRegister_r11_Code = 11;
158const int kRegister_r12_Code = 12;
159const int kRegister_r13_Code = 13;
160const int kRegister_r14_Code = 14;
161const int kRegister_r15_Code = 15;
162const int kRegister_no_reg_Code = -1;
163
164const Register rax = { kRegister_rax_Code };
165const Register rcx = { kRegister_rcx_Code };
166const Register rdx = { kRegister_rdx_Code };
167const Register rbx = { kRegister_rbx_Code };
168const Register rsp = { kRegister_rsp_Code };
169const Register rbp = { kRegister_rbp_Code };
170const Register rsi = { kRegister_rsi_Code };
171const Register rdi = { kRegister_rdi_Code };
172const Register r8 = { kRegister_r8_Code };
173const Register r9 = { kRegister_r9_Code };
174const Register r10 = { kRegister_r10_Code };
175const Register r11 = { kRegister_r11_Code };
176const Register r12 = { kRegister_r12_Code };
177const Register r13 = { kRegister_r13_Code };
178const Register r14 = { kRegister_r14_Code };
179const Register r15 = { kRegister_r15_Code };
180const Register no_reg = { kRegister_no_reg_Code };
181
182#ifdef _WIN64
183  // Windows calling convention
184  const Register arg_reg_1 = { kRegister_rcx_Code };
185  const Register arg_reg_2 = { kRegister_rdx_Code };
186  const Register arg_reg_3 = { kRegister_r8_Code };
187  const Register arg_reg_4 = { kRegister_r9_Code };
188#else
189  // AMD64 calling convention
190  const Register arg_reg_1 = { kRegister_rdi_Code };
191  const Register arg_reg_2 = { kRegister_rsi_Code };
192  const Register arg_reg_3 = { kRegister_rdx_Code };
193  const Register arg_reg_4 = { kRegister_rcx_Code };
194#endif  // _WIN64
195
196struct XMMRegister {
197  static const int kMaxNumRegisters = 16;
198  static const int kMaxNumAllocatableRegisters = 15;
199  static int NumAllocatableRegisters() {
200    return kMaxNumAllocatableRegisters;
201  }
202
203  static int ToAllocationIndex(XMMRegister reg) {
204    DCHECK(reg.code() != 0);
205    return reg.code() - 1;
206  }
207
208  static XMMRegister FromAllocationIndex(int index) {
209    DCHECK(0 <= index && index < kMaxNumAllocatableRegisters);
210    XMMRegister result = { index + 1 };
211    return result;
212  }
213
214  static const char* AllocationIndexToString(int index) {
215    DCHECK(index >= 0 && index < kMaxNumAllocatableRegisters);
216    const char* const names[] = {
217      "xmm1",
218      "xmm2",
219      "xmm3",
220      "xmm4",
221      "xmm5",
222      "xmm6",
223      "xmm7",
224      "xmm8",
225      "xmm9",
226      "xmm10",
227      "xmm11",
228      "xmm12",
229      "xmm13",
230      "xmm14",
231      "xmm15"
232    };
233    return names[index];
234  }
235
236  static XMMRegister from_code(int code) {
237    DCHECK(code >= 0);
238    DCHECK(code < kMaxNumRegisters);
239    XMMRegister r = { code };
240    return r;
241  }
242  bool is_valid() const { return 0 <= code_ && code_ < kMaxNumRegisters; }
243  bool is(XMMRegister reg) const { return code_ == reg.code_; }
244  int code() const {
245    DCHECK(is_valid());
246    return code_;
247  }
248
249  // Return the high bit of the register code as a 0 or 1.  Used often
250  // when constructing the REX prefix byte.
251  int high_bit() const {
252    return code_ >> 3;
253  }
254  // Return the 3 low bits of the register code.  Used when encoding registers
255  // in modR/M, SIB, and opcode bytes.
256  int low_bits() const {
257    return code_ & 0x7;
258  }
259
260  int code_;
261};
262
263const XMMRegister xmm0 = { 0 };
264const XMMRegister xmm1 = { 1 };
265const XMMRegister xmm2 = { 2 };
266const XMMRegister xmm3 = { 3 };
267const XMMRegister xmm4 = { 4 };
268const XMMRegister xmm5 = { 5 };
269const XMMRegister xmm6 = { 6 };
270const XMMRegister xmm7 = { 7 };
271const XMMRegister xmm8 = { 8 };
272const XMMRegister xmm9 = { 9 };
273const XMMRegister xmm10 = { 10 };
274const XMMRegister xmm11 = { 11 };
275const XMMRegister xmm12 = { 12 };
276const XMMRegister xmm13 = { 13 };
277const XMMRegister xmm14 = { 14 };
278const XMMRegister xmm15 = { 15 };
279
280
281typedef XMMRegister DoubleRegister;
282
283
284enum Condition {
285  // any value < 0 is considered no_condition
286  no_condition  = -1,
287
288  overflow      =  0,
289  no_overflow   =  1,
290  below         =  2,
291  above_equal   =  3,
292  equal         =  4,
293  not_equal     =  5,
294  below_equal   =  6,
295  above         =  7,
296  negative      =  8,
297  positive      =  9,
298  parity_even   = 10,
299  parity_odd    = 11,
300  less          = 12,
301  greater_equal = 13,
302  less_equal    = 14,
303  greater       = 15,
304
305  // Fake conditions that are handled by the
306  // opcodes using them.
307  always        = 16,
308  never         = 17,
309  // aliases
310  carry         = below,
311  not_carry     = above_equal,
312  zero          = equal,
313  not_zero      = not_equal,
314  sign          = negative,
315  not_sign      = positive,
316  last_condition = greater
317};
318
319
320// Returns the equivalent of !cc.
321// Negation of the default no_condition (-1) results in a non-default
322// no_condition value (-2). As long as tests for no_condition check
323// for condition < 0, this will work as expected.
324inline Condition NegateCondition(Condition cc) {
325  return static_cast<Condition>(cc ^ 1);
326}
327
328
329// Commute a condition such that {a cond b == b cond' a}.
330inline Condition CommuteCondition(Condition cc) {
331  switch (cc) {
332    case below:
333      return above;
334    case above:
335      return below;
336    case above_equal:
337      return below_equal;
338    case below_equal:
339      return above_equal;
340    case less:
341      return greater;
342    case greater:
343      return less;
344    case greater_equal:
345      return less_equal;
346    case less_equal:
347      return greater_equal;
348    default:
349      return cc;
350  }
351}
352
353
354// -----------------------------------------------------------------------------
355// Machine instruction Immediates
356
357class Immediate BASE_EMBEDDED {
358 public:
359  explicit Immediate(int32_t value) : value_(value) {}
360  explicit Immediate(Smi* value) {
361    DCHECK(SmiValuesAre31Bits());  // Only available for 31-bit SMI.
362    value_ = static_cast<int32_t>(reinterpret_cast<intptr_t>(value));
363  }
364
365 private:
366  int32_t value_;
367
368  friend class Assembler;
369};
370
371
372// -----------------------------------------------------------------------------
373// Machine instruction Operands
374
375enum ScaleFactor {
376  times_1 = 0,
377  times_2 = 1,
378  times_4 = 2,
379  times_8 = 3,
380  times_int_size = times_4,
381  times_pointer_size = (kPointerSize == 8) ? times_8 : times_4
382};
383
384
385class Operand BASE_EMBEDDED {
386 public:
387  // [base + disp/r]
388  Operand(Register base, int32_t disp);
389
390  // [base + index*scale + disp/r]
391  Operand(Register base,
392          Register index,
393          ScaleFactor scale,
394          int32_t disp);
395
396  // [index*scale + disp/r]
397  Operand(Register index,
398          ScaleFactor scale,
399          int32_t disp);
400
401  // Offset from existing memory operand.
402  // Offset is added to existing displacement as 32-bit signed values and
403  // this must not overflow.
404  Operand(const Operand& base, int32_t offset);
405
406  // Checks whether either base or index register is the given register.
407  // Does not check the "reg" part of the Operand.
408  bool AddressUsesRegister(Register reg) const;
409
410  // Queries related to the size of the generated instruction.
411  // Whether the generated instruction will have a REX prefix.
412  bool requires_rex() const { return rex_ != 0; }
413  // Size of the ModR/M, SIB and displacement parts of the generated
414  // instruction.
415  int operand_size() const { return len_; }
416
417 private:
418  byte rex_;
419  byte buf_[6];
420  // The number of bytes of buf_ in use.
421  byte len_;
422
423  // Set the ModR/M byte without an encoded 'reg' register. The
424  // register is encoded later as part of the emit_operand operation.
425  // set_modrm can be called before or after set_sib and set_disp*.
426  inline void set_modrm(int mod, Register rm);
427
428  // Set the SIB byte if one is needed. Sets the length to 2 rather than 1.
429  inline void set_sib(ScaleFactor scale, Register index, Register base);
430
431  // Adds operand displacement fields (offsets added to the memory address).
432  // Needs to be called after set_sib, not before it.
433  inline void set_disp8(int disp);
434  inline void set_disp32(int disp);
435
436  friend class Assembler;
437};
438
439
440#define ASSEMBLER_INSTRUCTION_LIST(V) \
441  V(add)                              \
442  V(and)                              \
443  V(cmp)                              \
444  V(dec)                              \
445  V(idiv)                             \
446  V(div)                              \
447  V(imul)                             \
448  V(inc)                              \
449  V(lea)                              \
450  V(mov)                              \
451  V(movzxb)                           \
452  V(movzxw)                           \
453  V(neg)                              \
454  V(not)                              \
455  V(or)                               \
456  V(repmovs)                          \
457  V(sbb)                              \
458  V(sub)                              \
459  V(test)                             \
460  V(xchg)                             \
461  V(xor)
462
463
464// Shift instructions on operands/registers with kPointerSize, kInt32Size and
465// kInt64Size.
466#define SHIFT_INSTRUCTION_LIST(V)       \
467  V(rol, 0x0)                           \
468  V(ror, 0x1)                           \
469  V(rcl, 0x2)                           \
470  V(rcr, 0x3)                           \
471  V(shl, 0x4)                           \
472  V(shr, 0x5)                           \
473  V(sar, 0x7)                           \
474
475
476class Assembler : public AssemblerBase {
477 private:
478  // We check before assembling an instruction that there is sufficient
479  // space to write an instruction and its relocation information.
480  // The relocation writer's position must be kGap bytes above the end of
481  // the generated instructions. This leaves enough space for the
482  // longest possible x64 instruction, 15 bytes, and the longest possible
483  // relocation information encoding, RelocInfoWriter::kMaxLength == 16.
484  // (There is a 15 byte limit on x64 instruction length that rules out some
485  // otherwise valid instructions.)
486  // This allows for a single, fast space check per instruction.
487  static const int kGap = 32;
488
489 public:
490  // Create an assembler. Instructions and relocation information are emitted
491  // into a buffer, with the instructions starting from the beginning and the
492  // relocation information starting from the end of the buffer. See CodeDesc
493  // for a detailed comment on the layout (globals.h).
494  //
495  // If the provided buffer is NULL, the assembler allocates and grows its own
496  // buffer, and buffer_size determines the initial buffer size. The buffer is
497  // owned by the assembler and deallocated upon destruction of the assembler.
498  //
499  // If the provided buffer is not NULL, the assembler uses the provided buffer
500  // for code generation and assumes its size to be buffer_size. If the buffer
501  // is too small, a fatal error occurs. No deallocation of the buffer is done
502  // upon destruction of the assembler.
503  Assembler(Isolate* isolate, void* buffer, int buffer_size);
504  virtual ~Assembler() { }
505
506  // GetCode emits any pending (non-emitted) code and fills the descriptor
507  // desc. GetCode() is idempotent; it returns the same result if no other
508  // Assembler functions are invoked in between GetCode() calls.
509  void GetCode(CodeDesc* desc);
510
511  // Read/Modify the code target in the relative branch/call instruction at pc.
512  // On the x64 architecture, we use relative jumps with a 32-bit displacement
513  // to jump to other Code objects in the Code space in the heap.
514  // Jumps to C functions are done indirectly through a 64-bit register holding
515  // the absolute address of the target.
516  // These functions convert between absolute Addresses of Code objects and
517  // the relative displacements stored in the code.
518  static inline Address target_address_at(Address pc,
519                                          ConstantPoolArray* constant_pool);
520  static inline void set_target_address_at(Address pc,
521                                           ConstantPoolArray* constant_pool,
522                                           Address target,
523                                           ICacheFlushMode icache_flush_mode =
524                                               FLUSH_ICACHE_IF_NEEDED) ;
525  static inline Address target_address_at(Address pc, Code* code) {
526    ConstantPoolArray* constant_pool = code ? code->constant_pool() : NULL;
527    return target_address_at(pc, constant_pool);
528  }
529  static inline void set_target_address_at(Address pc,
530                                           Code* code,
531                                           Address target,
532                                           ICacheFlushMode icache_flush_mode =
533                                               FLUSH_ICACHE_IF_NEEDED) {
534    ConstantPoolArray* constant_pool = code ? code->constant_pool() : NULL;
535    set_target_address_at(pc, constant_pool, target, icache_flush_mode);
536  }
537
538  // Return the code target address at a call site from the return address
539  // of that call in the instruction stream.
540  static inline Address target_address_from_return_address(Address pc);
541
542  // Return the code target address of the patch debug break slot
543  inline static Address break_address_from_return_address(Address pc);
544
545  // This sets the branch destination (which is in the instruction on x64).
546  // This is for calls and branches within generated code.
547  inline static void deserialization_set_special_target_at(
548      Address instruction_payload, Code* code, Address target) {
549    set_target_address_at(instruction_payload, code, target);
550  }
551
552  static inline RelocInfo::Mode RelocInfoNone() {
553    if (kPointerSize == kInt64Size) {
554      return RelocInfo::NONE64;
555    } else {
556      DCHECK(kPointerSize == kInt32Size);
557      return RelocInfo::NONE32;
558    }
559  }
560
561  inline Handle<Object> code_target_object_handle_at(Address pc);
562  inline Address runtime_entry_at(Address pc);
563  // Number of bytes taken up by the branch target in the code.
564  static const int kSpecialTargetSize = 4;  // Use 32-bit displacement.
565  // Distance between the address of the code target in the call instruction
566  // and the return address pushed on the stack.
567  static const int kCallTargetAddressOffset = 4;  // Use 32-bit displacement.
568  // The length of call(kScratchRegister).
569  static const int kCallScratchRegisterInstructionLength = 3;
570  // The length of call(Immediate32).
571  static const int kShortCallInstructionLength = 5;
572  // The length of movq(kScratchRegister, address).
573  static const int kMoveAddressIntoScratchRegisterInstructionLength =
574      2 + kPointerSize;
575  // The length of movq(kScratchRegister, address) and call(kScratchRegister).
576  static const int kCallSequenceLength =
577      kMoveAddressIntoScratchRegisterInstructionLength +
578      kCallScratchRegisterInstructionLength;
579
580  // The js return and debug break slot must be able to contain an indirect
581  // call sequence, some x64 JS code is padded with int3 to make it large
582  // enough to hold an instruction when the debugger patches it.
583  static const int kJSReturnSequenceLength = kCallSequenceLength;
584  static const int kDebugBreakSlotLength = kCallSequenceLength;
585  static const int kPatchDebugBreakSlotReturnOffset = kCallTargetAddressOffset;
586  // Distance between the start of the JS return sequence and where the
587  // 32-bit displacement of a short call would be. The short call is from
588  // SetDebugBreakAtIC from debug-x64.cc.
589  static const int kPatchReturnSequenceAddressOffset =
590      kJSReturnSequenceLength - kPatchDebugBreakSlotReturnOffset;
591  // Distance between the start of the JS return sequence and where the
592  // 32-bit displacement of a short call would be. The short call is from
593  // SetDebugBreakAtIC from debug-x64.cc.
594  static const int kPatchDebugBreakSlotAddressOffset =
595      kDebugBreakSlotLength - kPatchDebugBreakSlotReturnOffset;
596  static const int kRealPatchReturnSequenceAddressOffset =
597      kMoveAddressIntoScratchRegisterInstructionLength - kPointerSize;
598
599  // One byte opcode for test eax,0xXXXXXXXX.
600  static const byte kTestEaxByte = 0xA9;
601  // One byte opcode for test al, 0xXX.
602  static const byte kTestAlByte = 0xA8;
603  // One byte opcode for nop.
604  static const byte kNopByte = 0x90;
605
606  // One byte prefix for a short conditional jump.
607  static const byte kJccShortPrefix = 0x70;
608  static const byte kJncShortOpcode = kJccShortPrefix | not_carry;
609  static const byte kJcShortOpcode = kJccShortPrefix | carry;
610  static const byte kJnzShortOpcode = kJccShortPrefix | not_zero;
611  static const byte kJzShortOpcode = kJccShortPrefix | zero;
612
613
614  // ---------------------------------------------------------------------------
615  // Code generation
616  //
617  // Function names correspond one-to-one to x64 instruction mnemonics.
618  // Unless specified otherwise, instructions operate on 64-bit operands.
619  //
620  // If we need versions of an assembly instruction that operate on different
621  // width arguments, we add a single-letter suffix specifying the width.
622  // This is done for the following instructions: mov, cmp, inc, dec,
623  // add, sub, and test.
624  // There are no versions of these instructions without the suffix.
625  // - Instructions on 8-bit (byte) operands/registers have a trailing 'b'.
626  // - Instructions on 16-bit (word) operands/registers have a trailing 'w'.
627  // - Instructions on 32-bit (doubleword) operands/registers use 'l'.
628  // - Instructions on 64-bit (quadword) operands/registers use 'q'.
629  // - Instructions on operands/registers with pointer size use 'p'.
630
631  STATIC_ASSERT(kPointerSize == kInt64Size || kPointerSize == kInt32Size);
632
633#define DECLARE_INSTRUCTION(instruction)                \
634  template<class P1>                                    \
635  void instruction##p(P1 p1) {                          \
636    emit_##instruction(p1, kPointerSize);               \
637  }                                                     \
638                                                        \
639  template<class P1>                                    \
640  void instruction##l(P1 p1) {                          \
641    emit_##instruction(p1, kInt32Size);                 \
642  }                                                     \
643                                                        \
644  template<class P1>                                    \
645  void instruction##q(P1 p1) {                          \
646    emit_##instruction(p1, kInt64Size);                 \
647  }                                                     \
648                                                        \
649  template<class P1, class P2>                          \
650  void instruction##p(P1 p1, P2 p2) {                   \
651    emit_##instruction(p1, p2, kPointerSize);           \
652  }                                                     \
653                                                        \
654  template<class P1, class P2>                          \
655  void instruction##l(P1 p1, P2 p2) {                   \
656    emit_##instruction(p1, p2, kInt32Size);             \
657  }                                                     \
658                                                        \
659  template<class P1, class P2>                          \
660  void instruction##q(P1 p1, P2 p2) {                   \
661    emit_##instruction(p1, p2, kInt64Size);             \
662  }                                                     \
663                                                        \
664  template<class P1, class P2, class P3>                \
665  void instruction##p(P1 p1, P2 p2, P3 p3) {            \
666    emit_##instruction(p1, p2, p3, kPointerSize);       \
667  }                                                     \
668                                                        \
669  template<class P1, class P2, class P3>                \
670  void instruction##l(P1 p1, P2 p2, P3 p3) {            \
671    emit_##instruction(p1, p2, p3, kInt32Size);         \
672  }                                                     \
673                                                        \
674  template<class P1, class P2, class P3>                \
675  void instruction##q(P1 p1, P2 p2, P3 p3) {            \
676    emit_##instruction(p1, p2, p3, kInt64Size);         \
677  }
678  ASSEMBLER_INSTRUCTION_LIST(DECLARE_INSTRUCTION)
679#undef DECLARE_INSTRUCTION
680
681  // Insert the smallest number of nop instructions
682  // possible to align the pc offset to a multiple
683  // of m, where m must be a power of 2.
684  void Align(int m);
685  void Nop(int bytes = 1);
686  // Aligns code to something that's optimal for a jump target for the platform.
687  void CodeTargetAlign();
688
689  // Stack
690  void pushfq();
691  void popfq();
692
693  void pushq(Immediate value);
694  // Push a 32 bit integer, and guarantee that it is actually pushed as a
695  // 32 bit value, the normal push will optimize the 8 bit case.
696  void pushq_imm32(int32_t imm32);
697  void pushq(Register src);
698  void pushq(const Operand& src);
699
700  void popq(Register dst);
701  void popq(const Operand& dst);
702
703  void enter(Immediate size);
704  void leave();
705
706  // Moves
707  void movb(Register dst, const Operand& src);
708  void movb(Register dst, Immediate imm);
709  void movb(const Operand& dst, Register src);
710  void movb(const Operand& dst, Immediate imm);
711
712  // Move the low 16 bits of a 64-bit register value to a 16-bit
713  // memory location.
714  void movw(Register dst, const Operand& src);
715  void movw(const Operand& dst, Register src);
716  void movw(const Operand& dst, Immediate imm);
717
718  // Move the offset of the label location relative to the current
719  // position (after the move) to the destination.
720  void movl(const Operand& dst, Label* src);
721
722  // Loads a pointer into a register with a relocation mode.
723  void movp(Register dst, void* ptr, RelocInfo::Mode rmode);
724
725  // Loads a 64-bit immediate into a register.
726  void movq(Register dst, int64_t value);
727  void movq(Register dst, uint64_t value);
728
729  void movsxbl(Register dst, const Operand& src);
730  void movsxbq(Register dst, const Operand& src);
731  void movsxwl(Register dst, const Operand& src);
732  void movsxwq(Register dst, const Operand& src);
733  void movsxlq(Register dst, Register src);
734  void movsxlq(Register dst, const Operand& src);
735
736  // Repeated moves.
737
738  void repmovsb();
739  void repmovsw();
740  void repmovsp() { emit_repmovs(kPointerSize); }
741  void repmovsl() { emit_repmovs(kInt32Size); }
742  void repmovsq() { emit_repmovs(kInt64Size); }
743
744  // Instruction to load from an immediate 64-bit pointer into RAX.
745  void load_rax(void* ptr, RelocInfo::Mode rmode);
746  void load_rax(ExternalReference ext);
747
748  // Conditional moves.
749  void cmovq(Condition cc, Register dst, Register src);
750  void cmovq(Condition cc, Register dst, const Operand& src);
751  void cmovl(Condition cc, Register dst, Register src);
752  void cmovl(Condition cc, Register dst, const Operand& src);
753
754  void cmpb(Register dst, Immediate src) {
755    immediate_arithmetic_op_8(0x7, dst, src);
756  }
757
758  void cmpb_al(Immediate src);
759
760  void cmpb(Register dst, Register src) {
761    arithmetic_op_8(0x3A, dst, src);
762  }
763
764  void cmpb(Register dst, const Operand& src) {
765    arithmetic_op_8(0x3A, dst, src);
766  }
767
768  void cmpb(const Operand& dst, Register src) {
769    arithmetic_op_8(0x38, src, dst);
770  }
771
772  void cmpb(const Operand& dst, Immediate src) {
773    immediate_arithmetic_op_8(0x7, dst, src);
774  }
775
776  void cmpw(const Operand& dst, Immediate src) {
777    immediate_arithmetic_op_16(0x7, dst, src);
778  }
779
780  void cmpw(Register dst, Immediate src) {
781    immediate_arithmetic_op_16(0x7, dst, src);
782  }
783
784  void cmpw(Register dst, const Operand& src) {
785    arithmetic_op_16(0x3B, dst, src);
786  }
787
788  void cmpw(Register dst, Register src) {
789    arithmetic_op_16(0x3B, dst, src);
790  }
791
792  void cmpw(const Operand& dst, Register src) {
793    arithmetic_op_16(0x39, src, dst);
794  }
795
796  void andb(Register dst, Immediate src) {
797    immediate_arithmetic_op_8(0x4, dst, src);
798  }
799
800  void decb(Register dst);
801  void decb(const Operand& dst);
802
803  // Sign-extends rax into rdx:rax.
804  void cqo();
805  // Sign-extends eax into edx:eax.
806  void cdq();
807
808  // Multiply rax by src, put the result in rdx:rax.
809  void mul(Register src);
810
811#define DECLARE_SHIFT_INSTRUCTION(instruction, subcode)     \
812  void instruction##p(Register dst, Immediate imm8) {       \
813    shift(dst, imm8, subcode, kPointerSize);                \
814  }                                                         \
815                                                            \
816  void instruction##l(Register dst, Immediate imm8) {       \
817    shift(dst, imm8, subcode, kInt32Size);                  \
818  }                                                         \
819                                                            \
820  void instruction##q(Register dst, Immediate imm8) {       \
821    shift(dst, imm8, subcode, kInt64Size);                  \
822  }                                                         \
823                                                            \
824  void instruction##p_cl(Register dst) {                    \
825    shift(dst, subcode, kPointerSize);                      \
826  }                                                         \
827                                                            \
828  void instruction##l_cl(Register dst) {                    \
829    shift(dst, subcode, kInt32Size);                        \
830  }                                                         \
831                                                            \
832  void instruction##q_cl(Register dst) {                    \
833    shift(dst, subcode, kInt64Size);                        \
834  }
835  SHIFT_INSTRUCTION_LIST(DECLARE_SHIFT_INSTRUCTION)
836#undef DECLARE_SHIFT_INSTRUCTION
837
838  // Shifts dst:src left by cl bits, affecting only dst.
839  void shld(Register dst, Register src);
840
841  // Shifts src:dst right by cl bits, affecting only dst.
842  void shrd(Register dst, Register src);
843
844  void store_rax(void* dst, RelocInfo::Mode mode);
845  void store_rax(ExternalReference ref);
846
847  void subb(Register dst, Immediate src) {
848    immediate_arithmetic_op_8(0x5, dst, src);
849  }
850
851  void testb(Register dst, Register src);
852  void testb(Register reg, Immediate mask);
853  void testb(const Operand& op, Immediate mask);
854  void testb(const Operand& op, Register reg);
855
856  // Bit operations.
857  void bt(const Operand& dst, Register src);
858  void bts(const Operand& dst, Register src);
859  void bsrl(Register dst, Register src);
860
861  // Miscellaneous
862  void clc();
863  void cld();
864  void cpuid();
865  void hlt();
866  void int3();
867  void nop();
868  void ret(int imm16);
869  void setcc(Condition cc, Register reg);
870
871  // Label operations & relative jumps (PPUM Appendix D)
872  //
873  // Takes a branch opcode (cc) and a label (L) and generates
874  // either a backward branch or a forward branch and links it
875  // to the label fixup chain. Usage:
876  //
877  // Label L;    // unbound label
878  // j(cc, &L);  // forward branch to unbound label
879  // bind(&L);   // bind label to the current pc
880  // j(cc, &L);  // backward branch to bound label
881  // bind(&L);   // illegal: a label may be bound only once
882  //
883  // Note: The same Label can be used for forward and backward branches
884  // but it may be bound only once.
885
886  void bind(Label* L);  // binds an unbound label L to the current code position
887
888  // Calls
889  // Call near relative 32-bit displacement, relative to next instruction.
890  void call(Label* L);
891  void call(Address entry, RelocInfo::Mode rmode);
892  void call(Handle<Code> target,
893            RelocInfo::Mode rmode = RelocInfo::CODE_TARGET,
894            TypeFeedbackId ast_id = TypeFeedbackId::None());
895
896  // Calls directly to the given address using a relative offset.
897  // Should only ever be used in Code objects for calls within the
898  // same Code object. Should not be used when generating new code (use labels),
899  // but only when patching existing code.
900  void call(Address target);
901
902  // Call near absolute indirect, address in register
903  void call(Register adr);
904
905  // Jumps
906  // Jump short or near relative.
907  // Use a 32-bit signed displacement.
908  // Unconditional jump to L
909  void jmp(Label* L, Label::Distance distance = Label::kFar);
910  void jmp(Address entry, RelocInfo::Mode rmode);
911  void jmp(Handle<Code> target, RelocInfo::Mode rmode);
912
913  // Jump near absolute indirect (r64)
914  void jmp(Register adr);
915
916  // Conditional jumps
917  void j(Condition cc,
918         Label* L,
919         Label::Distance distance = Label::kFar);
920  void j(Condition cc, Address entry, RelocInfo::Mode rmode);
921  void j(Condition cc, Handle<Code> target, RelocInfo::Mode rmode);
922
923  // Floating-point operations
924  void fld(int i);
925
926  void fld1();
927  void fldz();
928  void fldpi();
929  void fldln2();
930
931  void fld_s(const Operand& adr);
932  void fld_d(const Operand& adr);
933
934  void fstp_s(const Operand& adr);
935  void fstp_d(const Operand& adr);
936  void fstp(int index);
937
938  void fild_s(const Operand& adr);
939  void fild_d(const Operand& adr);
940
941  void fist_s(const Operand& adr);
942
943  void fistp_s(const Operand& adr);
944  void fistp_d(const Operand& adr);
945
946  void fisttp_s(const Operand& adr);
947  void fisttp_d(const Operand& adr);
948
949  void fabs();
950  void fchs();
951
952  void fadd(int i);
953  void fsub(int i);
954  void fmul(int i);
955  void fdiv(int i);
956
957  void fisub_s(const Operand& adr);
958
959  void faddp(int i = 1);
960  void fsubp(int i = 1);
961  void fsubrp(int i = 1);
962  void fmulp(int i = 1);
963  void fdivp(int i = 1);
964  void fprem();
965  void fprem1();
966
967  void fxch(int i = 1);
968  void fincstp();
969  void ffree(int i = 0);
970
971  void ftst();
972  void fucomp(int i);
973  void fucompp();
974  void fucomi(int i);
975  void fucomip();
976
977  void fcompp();
978  void fnstsw_ax();
979  void fwait();
980  void fnclex();
981
982  void fsin();
983  void fcos();
984  void fptan();
985  void fyl2x();
986  void f2xm1();
987  void fscale();
988  void fninit();
989
990  void frndint();
991
992  void sahf();
993
994  // SSE instructions
995  void movaps(XMMRegister dst, XMMRegister src);
996  void movss(XMMRegister dst, const Operand& src);
997  void movss(const Operand& dst, XMMRegister src);
998  void shufps(XMMRegister dst, XMMRegister src, byte imm8);
999
1000  void cvttss2si(Register dst, const Operand& src);
1001  void cvttss2si(Register dst, XMMRegister src);
1002  void cvtlsi2ss(XMMRegister dst, Register src);
1003
1004  void andps(XMMRegister dst, XMMRegister src);
1005  void andps(XMMRegister dst, const Operand& src);
1006  void orps(XMMRegister dst, XMMRegister src);
1007  void orps(XMMRegister dst, const Operand& src);
1008  void xorps(XMMRegister dst, XMMRegister src);
1009  void xorps(XMMRegister dst, const Operand& src);
1010
1011  void addps(XMMRegister dst, XMMRegister src);
1012  void addps(XMMRegister dst, const Operand& src);
1013  void subps(XMMRegister dst, XMMRegister src);
1014  void subps(XMMRegister dst, const Operand& src);
1015  void mulps(XMMRegister dst, XMMRegister src);
1016  void mulps(XMMRegister dst, const Operand& src);
1017  void divps(XMMRegister dst, XMMRegister src);
1018  void divps(XMMRegister dst, const Operand& src);
1019
1020  void movmskps(Register dst, XMMRegister src);
1021
1022  // SSE2 instructions
1023  void movd(XMMRegister dst, Register src);
1024  void movd(Register dst, XMMRegister src);
1025  void movq(XMMRegister dst, Register src);
1026  void movq(Register dst, XMMRegister src);
1027  void movq(XMMRegister dst, XMMRegister src);
1028
1029  // Don't use this unless it's important to keep the
1030  // top half of the destination register unchanged.
1031  // Used movaps when moving double values and movq for integer
1032  // values in xmm registers.
1033  void movsd(XMMRegister dst, XMMRegister src);
1034
1035  void movsd(const Operand& dst, XMMRegister src);
1036  void movsd(XMMRegister dst, const Operand& src);
1037
1038  void movdqa(const Operand& dst, XMMRegister src);
1039  void movdqa(XMMRegister dst, const Operand& src);
1040
1041  void movdqu(const Operand& dst, XMMRegister src);
1042  void movdqu(XMMRegister dst, const Operand& src);
1043
1044  void movapd(XMMRegister dst, XMMRegister src);
1045
1046  void psllq(XMMRegister reg, byte imm8);
1047
1048  void cvttsd2si(Register dst, const Operand& src);
1049  void cvttsd2si(Register dst, XMMRegister src);
1050  void cvttsd2siq(Register dst, XMMRegister src);
1051  void cvttsd2siq(Register dst, const Operand& src);
1052
1053  void cvtlsi2sd(XMMRegister dst, const Operand& src);
1054  void cvtlsi2sd(XMMRegister dst, Register src);
1055  void cvtqsi2sd(XMMRegister dst, const Operand& src);
1056  void cvtqsi2sd(XMMRegister dst, Register src);
1057
1058
1059  void cvtss2sd(XMMRegister dst, XMMRegister src);
1060  void cvtss2sd(XMMRegister dst, const Operand& src);
1061  void cvtsd2ss(XMMRegister dst, XMMRegister src);
1062
1063  void cvtsd2si(Register dst, XMMRegister src);
1064  void cvtsd2siq(Register dst, XMMRegister src);
1065
1066  void addsd(XMMRegister dst, XMMRegister src);
1067  void addsd(XMMRegister dst, const Operand& src);
1068  void subsd(XMMRegister dst, XMMRegister src);
1069  void mulsd(XMMRegister dst, XMMRegister src);
1070  void mulsd(XMMRegister dst, const Operand& src);
1071  void divsd(XMMRegister dst, XMMRegister src);
1072
1073  void andpd(XMMRegister dst, XMMRegister src);
1074  void orpd(XMMRegister dst, XMMRegister src);
1075  void xorpd(XMMRegister dst, XMMRegister src);
1076  void sqrtsd(XMMRegister dst, XMMRegister src);
1077  void sqrtsd(XMMRegister dst, const Operand& src);
1078
1079  void ucomisd(XMMRegister dst, XMMRegister src);
1080  void ucomisd(XMMRegister dst, const Operand& src);
1081  void cmpltsd(XMMRegister dst, XMMRegister src);
1082
1083  void movmskpd(Register dst, XMMRegister src);
1084
1085  // SSE 4.1 instruction
1086  void extractps(Register dst, XMMRegister src, byte imm8);
1087
1088  enum RoundingMode {
1089    kRoundToNearest = 0x0,
1090    kRoundDown      = 0x1,
1091    kRoundUp        = 0x2,
1092    kRoundToZero    = 0x3
1093  };
1094
1095  void roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode);
1096
1097  // Debugging
1098  void Print();
1099
1100  // Check the code size generated from label to here.
1101  int SizeOfCodeGeneratedSince(Label* label) {
1102    return pc_offset() - label->pos();
1103  }
1104
1105  // Mark address of the ExitJSFrame code.
1106  void RecordJSReturn();
1107
1108  // Mark address of a debug break slot.
1109  void RecordDebugBreakSlot();
1110
1111  // Record a comment relocation entry that can be used by a disassembler.
1112  // Use --code-comments to enable.
1113  void RecordComment(const char* msg, bool force = false);
1114
1115  // Allocate a constant pool of the correct size for the generated code.
1116  Handle<ConstantPoolArray> NewConstantPool(Isolate* isolate);
1117
1118  // Generate the constant pool for the generated code.
1119  void PopulateConstantPool(ConstantPoolArray* constant_pool);
1120
1121  // Writes a single word of data in the code stream.
1122  // Used for inline tables, e.g., jump-tables.
1123  void db(uint8_t data);
1124  void dd(uint32_t data);
1125
1126  PositionsRecorder* positions_recorder() { return &positions_recorder_; }
1127
1128  // Check if there is less than kGap bytes available in the buffer.
1129  // If this is the case, we need to grow the buffer before emitting
1130  // an instruction or relocation information.
1131  inline bool buffer_overflow() const {
1132    return pc_ >= reloc_info_writer.pos() - kGap;
1133  }
1134
1135  // Get the number of bytes available in the buffer.
1136  inline int available_space() const {
1137    return static_cast<int>(reloc_info_writer.pos() - pc_);
1138  }
1139
1140  static bool IsNop(Address addr);
1141
1142  // Avoid overflows for displacements etc.
1143  static const int kMaximalBufferSize = 512*MB;
1144
1145  byte byte_at(int pos)  { return buffer_[pos]; }
1146  void set_byte_at(int pos, byte value) { buffer_[pos] = value; }
1147
1148 protected:
1149  // Call near indirect
1150  void call(const Operand& operand);
1151
1152  // Jump near absolute indirect (m64)
1153  void jmp(const Operand& src);
1154
1155 private:
1156  byte* addr_at(int pos)  { return buffer_ + pos; }
1157  uint32_t long_at(int pos)  {
1158    return *reinterpret_cast<uint32_t*>(addr_at(pos));
1159  }
1160  void long_at_put(int pos, uint32_t x)  {
1161    *reinterpret_cast<uint32_t*>(addr_at(pos)) = x;
1162  }
1163
1164  // code emission
1165  void GrowBuffer();
1166
1167  void emit(byte x) { *pc_++ = x; }
1168  inline void emitl(uint32_t x);
1169  inline void emitp(void* x, RelocInfo::Mode rmode);
1170  inline void emitq(uint64_t x);
1171  inline void emitw(uint16_t x);
1172  inline void emit_code_target(Handle<Code> target,
1173                               RelocInfo::Mode rmode,
1174                               TypeFeedbackId ast_id = TypeFeedbackId::None());
1175  inline void emit_runtime_entry(Address entry, RelocInfo::Mode rmode);
1176  void emit(Immediate x) { emitl(x.value_); }
1177
1178  // Emits a REX prefix that encodes a 64-bit operand size and
1179  // the top bit of both register codes.
1180  // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B.
1181  // REX.W is set.
1182  inline void emit_rex_64(XMMRegister reg, Register rm_reg);
1183  inline void emit_rex_64(Register reg, XMMRegister rm_reg);
1184  inline void emit_rex_64(Register reg, Register rm_reg);
1185
1186  // Emits a REX prefix that encodes a 64-bit operand size and
1187  // the top bit of the destination, index, and base register codes.
1188  // The high bit of reg is used for REX.R, the high bit of op's base
1189  // register is used for REX.B, and the high bit of op's index register
1190  // is used for REX.X.  REX.W is set.
1191  inline void emit_rex_64(Register reg, const Operand& op);
1192  inline void emit_rex_64(XMMRegister reg, const Operand& op);
1193
1194  // Emits a REX prefix that encodes a 64-bit operand size and
1195  // the top bit of the register code.
1196  // The high bit of register is used for REX.B.
1197  // REX.W is set and REX.R and REX.X are clear.
1198  inline void emit_rex_64(Register rm_reg);
1199
1200  // Emits a REX prefix that encodes a 64-bit operand size and
1201  // the top bit of the index and base register codes.
1202  // The high bit of op's base register is used for REX.B, and the high
1203  // bit of op's index register is used for REX.X.
1204  // REX.W is set and REX.R clear.
1205  inline void emit_rex_64(const Operand& op);
1206
1207  // Emit a REX prefix that only sets REX.W to choose a 64-bit operand size.
1208  void emit_rex_64() { emit(0x48); }
1209
1210  // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B.
1211  // REX.W is clear.
1212  inline void emit_rex_32(Register reg, Register rm_reg);
1213
1214  // The high bit of reg is used for REX.R, the high bit of op's base
1215  // register is used for REX.B, and the high bit of op's index register
1216  // is used for REX.X.  REX.W is cleared.
1217  inline void emit_rex_32(Register reg, const Operand& op);
1218
1219  // High bit of rm_reg goes to REX.B.
1220  // REX.W, REX.R and REX.X are clear.
1221  inline void emit_rex_32(Register rm_reg);
1222
1223  // High bit of base goes to REX.B and high bit of index to REX.X.
1224  // REX.W and REX.R are clear.
1225  inline void emit_rex_32(const Operand& op);
1226
1227  // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B.
1228  // REX.W is cleared.  If no REX bits are set, no byte is emitted.
1229  inline void emit_optional_rex_32(Register reg, Register rm_reg);
1230
1231  // The high bit of reg is used for REX.R, the high bit of op's base
1232  // register is used for REX.B, and the high bit of op's index register
1233  // is used for REX.X.  REX.W is cleared.  If no REX bits are set, nothing
1234  // is emitted.
1235  inline void emit_optional_rex_32(Register reg, const Operand& op);
1236
1237  // As for emit_optional_rex_32(Register, Register), except that
1238  // the registers are XMM registers.
1239  inline void emit_optional_rex_32(XMMRegister reg, XMMRegister base);
1240
1241  // As for emit_optional_rex_32(Register, Register), except that
1242  // one of the registers is an XMM registers.
1243  inline void emit_optional_rex_32(XMMRegister reg, Register base);
1244
1245  // As for emit_optional_rex_32(Register, Register), except that
1246  // one of the registers is an XMM registers.
1247  inline void emit_optional_rex_32(Register reg, XMMRegister base);
1248
1249  // As for emit_optional_rex_32(Register, const Operand&), except that
1250  // the register is an XMM register.
1251  inline void emit_optional_rex_32(XMMRegister reg, const Operand& op);
1252
1253  // Optionally do as emit_rex_32(Register) if the register number has
1254  // the high bit set.
1255  inline void emit_optional_rex_32(Register rm_reg);
1256
1257  // Optionally do as emit_rex_32(const Operand&) if the operand register
1258  // numbers have a high bit set.
1259  inline void emit_optional_rex_32(const Operand& op);
1260
1261  void emit_rex(int size) {
1262    if (size == kInt64Size) {
1263      emit_rex_64();
1264    } else {
1265      DCHECK(size == kInt32Size);
1266    }
1267  }
1268
1269  template<class P1>
1270  void emit_rex(P1 p1, int size) {
1271    if (size == kInt64Size) {
1272      emit_rex_64(p1);
1273    } else {
1274      DCHECK(size == kInt32Size);
1275      emit_optional_rex_32(p1);
1276    }
1277  }
1278
1279  template<class P1, class P2>
1280  void emit_rex(P1 p1, P2 p2, int size) {
1281    if (size == kInt64Size) {
1282      emit_rex_64(p1, p2);
1283    } else {
1284      DCHECK(size == kInt32Size);
1285      emit_optional_rex_32(p1, p2);
1286    }
1287  }
1288
1289  // Emit the ModR/M byte, and optionally the SIB byte and
1290  // 1- or 4-byte offset for a memory operand.  Also encodes
1291  // the second operand of the operation, a register or operation
1292  // subcode, into the reg field of the ModR/M byte.
1293  void emit_operand(Register reg, const Operand& adr) {
1294    emit_operand(reg.low_bits(), adr);
1295  }
1296
1297  // Emit the ModR/M byte, and optionally the SIB byte and
1298  // 1- or 4-byte offset for a memory operand.  Also used to encode
1299  // a three-bit opcode extension into the ModR/M byte.
1300  void emit_operand(int rm, const Operand& adr);
1301
1302  // Emit a ModR/M byte with registers coded in the reg and rm_reg fields.
1303  void emit_modrm(Register reg, Register rm_reg) {
1304    emit(0xC0 | reg.low_bits() << 3 | rm_reg.low_bits());
1305  }
1306
1307  // Emit a ModR/M byte with an operation subcode in the reg field and
1308  // a register in the rm_reg field.
1309  void emit_modrm(int code, Register rm_reg) {
1310    DCHECK(is_uint3(code));
1311    emit(0xC0 | code << 3 | rm_reg.low_bits());
1312  }
1313
1314  // Emit the code-object-relative offset of the label's position
1315  inline void emit_code_relative_offset(Label* label);
1316
1317  // The first argument is the reg field, the second argument is the r/m field.
1318  void emit_sse_operand(XMMRegister dst, XMMRegister src);
1319  void emit_sse_operand(XMMRegister reg, const Operand& adr);
1320  void emit_sse_operand(Register reg, const Operand& adr);
1321  void emit_sse_operand(XMMRegister dst, Register src);
1322  void emit_sse_operand(Register dst, XMMRegister src);
1323
1324  // Emit machine code for one of the operations ADD, ADC, SUB, SBC,
1325  // AND, OR, XOR, or CMP.  The encodings of these operations are all
1326  // similar, differing just in the opcode or in the reg field of the
1327  // ModR/M byte.
1328  void arithmetic_op_8(byte opcode, Register reg, Register rm_reg);
1329  void arithmetic_op_8(byte opcode, Register reg, const Operand& rm_reg);
1330  void arithmetic_op_16(byte opcode, Register reg, Register rm_reg);
1331  void arithmetic_op_16(byte opcode, Register reg, const Operand& rm_reg);
1332  // Operate on operands/registers with pointer size, 32-bit or 64-bit size.
1333  void arithmetic_op(byte opcode, Register reg, Register rm_reg, int size);
1334  void arithmetic_op(byte opcode,
1335                     Register reg,
1336                     const Operand& rm_reg,
1337                     int size);
1338  // Operate on a byte in memory or register.
1339  void immediate_arithmetic_op_8(byte subcode,
1340                                 Register dst,
1341                                 Immediate src);
1342  void immediate_arithmetic_op_8(byte subcode,
1343                                 const Operand& dst,
1344                                 Immediate src);
1345  // Operate on a word in memory or register.
1346  void immediate_arithmetic_op_16(byte subcode,
1347                                  Register dst,
1348                                  Immediate src);
1349  void immediate_arithmetic_op_16(byte subcode,
1350                                  const Operand& dst,
1351                                  Immediate src);
1352  // Operate on operands/registers with pointer size, 32-bit or 64-bit size.
1353  void immediate_arithmetic_op(byte subcode,
1354                               Register dst,
1355                               Immediate src,
1356                               int size);
1357  void immediate_arithmetic_op(byte subcode,
1358                               const Operand& dst,
1359                               Immediate src,
1360                               int size);
1361
1362  // Emit machine code for a shift operation.
1363  void shift(Register dst, Immediate shift_amount, int subcode, int size);
1364  // Shift dst by cl % 64 bits.
1365  void shift(Register dst, int subcode, int size);
1366
1367  void emit_farith(int b1, int b2, int i);
1368
1369  // labels
1370  // void print(Label* L);
1371  void bind_to(Label* L, int pos);
1372
1373  // record reloc info for current pc_
1374  void RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data = 0);
1375
1376  // Arithmetics
1377  void emit_add(Register dst, Register src, int size) {
1378    arithmetic_op(0x03, dst, src, size);
1379  }
1380
1381  void emit_add(Register dst, Immediate src, int size) {
1382    immediate_arithmetic_op(0x0, dst, src, size);
1383  }
1384
1385  void emit_add(Register dst, const Operand& src, int size) {
1386    arithmetic_op(0x03, dst, src, size);
1387  }
1388
1389  void emit_add(const Operand& dst, Register src, int size) {
1390    arithmetic_op(0x1, src, dst, size);
1391  }
1392
1393  void emit_add(const Operand& dst, Immediate src, int size) {
1394    immediate_arithmetic_op(0x0, dst, src, size);
1395  }
1396
1397  void emit_and(Register dst, Register src, int size) {
1398    arithmetic_op(0x23, dst, src, size);
1399  }
1400
1401  void emit_and(Register dst, const Operand& src, int size) {
1402    arithmetic_op(0x23, dst, src, size);
1403  }
1404
1405  void emit_and(const Operand& dst, Register src, int size) {
1406    arithmetic_op(0x21, src, dst, size);
1407  }
1408
1409  void emit_and(Register dst, Immediate src, int size) {
1410    immediate_arithmetic_op(0x4, dst, src, size);
1411  }
1412
1413  void emit_and(const Operand& dst, Immediate src, int size) {
1414    immediate_arithmetic_op(0x4, dst, src, size);
1415  }
1416
1417  void emit_cmp(Register dst, Register src, int size) {
1418    arithmetic_op(0x3B, dst, src, size);
1419  }
1420
1421  void emit_cmp(Register dst, const Operand& src, int size) {
1422    arithmetic_op(0x3B, dst, src, size);
1423  }
1424
1425  void emit_cmp(const Operand& dst, Register src, int size) {
1426    arithmetic_op(0x39, src, dst, size);
1427  }
1428
1429  void emit_cmp(Register dst, Immediate src, int size) {
1430    immediate_arithmetic_op(0x7, dst, src, size);
1431  }
1432
1433  void emit_cmp(const Operand& dst, Immediate src, int size) {
1434    immediate_arithmetic_op(0x7, dst, src, size);
1435  }
1436
1437  void emit_dec(Register dst, int size);
1438  void emit_dec(const Operand& dst, int size);
1439
1440  // Divide rdx:rax by src.  Quotient in rax, remainder in rdx when size is 64.
1441  // Divide edx:eax by lower 32 bits of src.  Quotient in eax, remainder in edx
1442  // when size is 32.
1443  void emit_idiv(Register src, int size);
1444  void emit_div(Register src, int size);
1445
1446  // Signed multiply instructions.
1447  // rdx:rax = rax * src when size is 64 or edx:eax = eax * src when size is 32.
1448  void emit_imul(Register src, int size);
1449  void emit_imul(Register dst, Register src, int size);
1450  void emit_imul(Register dst, const Operand& src, int size);
1451  void emit_imul(Register dst, Register src, Immediate imm, int size);
1452
1453  void emit_inc(Register dst, int size);
1454  void emit_inc(const Operand& dst, int size);
1455
1456  void emit_lea(Register dst, const Operand& src, int size);
1457
1458  void emit_mov(Register dst, const Operand& src, int size);
1459  void emit_mov(Register dst, Register src, int size);
1460  void emit_mov(const Operand& dst, Register src, int size);
1461  void emit_mov(Register dst, Immediate value, int size);
1462  void emit_mov(const Operand& dst, Immediate value, int size);
1463
1464  void emit_movzxb(Register dst, const Operand& src, int size);
1465  void emit_movzxb(Register dst, Register src, int size);
1466  void emit_movzxw(Register dst, const Operand& src, int size);
1467  void emit_movzxw(Register dst, Register src, int size);
1468
1469  void emit_neg(Register dst, int size);
1470  void emit_neg(const Operand& dst, int size);
1471
1472  void emit_not(Register dst, int size);
1473  void emit_not(const Operand& dst, int size);
1474
1475  void emit_or(Register dst, Register src, int size) {
1476    arithmetic_op(0x0B, dst, src, size);
1477  }
1478
1479  void emit_or(Register dst, const Operand& src, int size) {
1480    arithmetic_op(0x0B, dst, src, size);
1481  }
1482
1483  void emit_or(const Operand& dst, Register src, int size) {
1484    arithmetic_op(0x9, src, dst, size);
1485  }
1486
1487  void emit_or(Register dst, Immediate src, int size) {
1488    immediate_arithmetic_op(0x1, dst, src, size);
1489  }
1490
1491  void emit_or(const Operand& dst, Immediate src, int size) {
1492    immediate_arithmetic_op(0x1, dst, src, size);
1493  }
1494
1495  void emit_repmovs(int size);
1496
1497  void emit_sbb(Register dst, Register src, int size) {
1498    arithmetic_op(0x1b, dst, src, size);
1499  }
1500
1501  void emit_sub(Register dst, Register src, int size) {
1502    arithmetic_op(0x2B, dst, src, size);
1503  }
1504
1505  void emit_sub(Register dst, Immediate src, int size) {
1506    immediate_arithmetic_op(0x5, dst, src, size);
1507  }
1508
1509  void emit_sub(Register dst, const Operand& src, int size) {
1510    arithmetic_op(0x2B, dst, src, size);
1511  }
1512
1513  void emit_sub(const Operand& dst, Register src, int size) {
1514    arithmetic_op(0x29, src, dst, size);
1515  }
1516
1517  void emit_sub(const Operand& dst, Immediate src, int size) {
1518    immediate_arithmetic_op(0x5, dst, src, size);
1519  }
1520
1521  void emit_test(Register dst, Register src, int size);
1522  void emit_test(Register reg, Immediate mask, int size);
1523  void emit_test(const Operand& op, Register reg, int size);
1524  void emit_test(const Operand& op, Immediate mask, int size);
1525  void emit_test(Register reg, const Operand& op, int size) {
1526    return emit_test(op, reg, size);
1527  }
1528
1529  void emit_xchg(Register dst, Register src, int size);
1530  void emit_xchg(Register dst, const Operand& src, int size);
1531
1532  void emit_xor(Register dst, Register src, int size) {
1533    if (size == kInt64Size && dst.code() == src.code()) {
1534    // 32 bit operations zero the top 32 bits of 64 bit registers. Therefore
1535    // there is no need to make this a 64 bit operation.
1536      arithmetic_op(0x33, dst, src, kInt32Size);
1537    } else {
1538      arithmetic_op(0x33, dst, src, size);
1539    }
1540  }
1541
1542  void emit_xor(Register dst, const Operand& src, int size) {
1543    arithmetic_op(0x33, dst, src, size);
1544  }
1545
1546  void emit_xor(Register dst, Immediate src, int size) {
1547    immediate_arithmetic_op(0x6, dst, src, size);
1548  }
1549
1550  void emit_xor(const Operand& dst, Immediate src, int size) {
1551    immediate_arithmetic_op(0x6, dst, src, size);
1552  }
1553
1554  void emit_xor(const Operand& dst, Register src, int size) {
1555    arithmetic_op(0x31, src, dst, size);
1556  }
1557
1558  friend class CodePatcher;
1559  friend class EnsureSpace;
1560  friend class RegExpMacroAssemblerX64;
1561
1562  // code generation
1563  RelocInfoWriter reloc_info_writer;
1564
1565  List< Handle<Code> > code_targets_;
1566
1567  PositionsRecorder positions_recorder_;
1568  friend class PositionsRecorder;
1569};
1570
1571
1572// Helper class that ensures that there is enough space for generating
1573// instructions and relocation information.  The constructor makes
1574// sure that there is enough space and (in debug mode) the destructor
1575// checks that we did not generate too much.
1576class EnsureSpace BASE_EMBEDDED {
1577 public:
1578  explicit EnsureSpace(Assembler* assembler) : assembler_(assembler) {
1579    if (assembler_->buffer_overflow()) assembler_->GrowBuffer();
1580#ifdef DEBUG
1581    space_before_ = assembler_->available_space();
1582#endif
1583  }
1584
1585#ifdef DEBUG
1586  ~EnsureSpace() {
1587    int bytes_generated = space_before_ - assembler_->available_space();
1588    DCHECK(bytes_generated < assembler_->kGap);
1589  }
1590#endif
1591
1592 private:
1593  Assembler* assembler_;
1594#ifdef DEBUG
1595  int space_before_;
1596#endif
1597};
1598
1599} }  // namespace v8::internal
1600
1601#endif  // V8_X64_ASSEMBLER_X64_H_
1602