assembler-x64.h revision e0cee9b3ed82e2391fd85d118aeaa4ea361c687d
1// Copyright (c) 1994-2006 Sun Microsystems Inc.
2// All Rights Reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8// - Redistributions of source code must retain the above copyright notice,
9// this list of conditions and the following disclaimer.
10//
11// - Redistribution in binary form must reproduce the above copyright
12// notice, this list of conditions and the following disclaimer in the
13// documentation and/or other materials provided with the distribution.
14//
15// - Neither the name of Sun Microsystems or the names of contributors may
16// be used to endorse or promote products derived from this software without
17// specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
20// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// The original source code covered by the above license above has been
32// modified significantly by Google Inc.
33// Copyright 2011 the V8 project authors. All rights reserved.
34
35// A lightweight X64 Assembler.
36
37#ifndef V8_X64_ASSEMBLER_X64_H_
38#define V8_X64_ASSEMBLER_X64_H_
39
40#include "serialize.h"
41
42namespace v8 {
43namespace internal {
44
45// Utility functions
46
47// Test whether a 64-bit value is in a specific range.
48static inline bool is_uint32(int64_t x) {
49  static const uint64_t kMaxUInt32 = V8_UINT64_C(0xffffffff);
50  return static_cast<uint64_t>(x) <= kMaxUInt32;
51}
52
53static inline bool is_int32(int64_t x) {
54  static const int64_t kMinInt32 = -V8_INT64_C(0x80000000);
55  return is_uint32(x - kMinInt32);
56}
57
58static inline bool uint_is_int32(uint64_t x) {
59  static const uint64_t kMaxInt32 = V8_UINT64_C(0x7fffffff);
60  return x <= kMaxInt32;
61}
62
63static inline bool is_uint32(uint64_t x) {
64  static const uint64_t kMaxUInt32 = V8_UINT64_C(0xffffffff);
65  return x <= kMaxUInt32;
66}
67
68// CPU Registers.
69//
70// 1) We would prefer to use an enum, but enum values are assignment-
71// compatible with int, which has caused code-generation bugs.
72//
73// 2) We would prefer to use a class instead of a struct but we don't like
74// the register initialization to depend on the particular initialization
75// order (which appears to be different on OS X, Linux, and Windows for the
76// installed versions of C++ we tried). Using a struct permits C-style
77// "initialization". Also, the Register objects cannot be const as this
78// forces initialization stubs in MSVC, making us dependent on initialization
79// order.
80//
81// 3) By not using an enum, we are possibly preventing the compiler from
82// doing certain constant folds, which may significantly reduce the
83// code generated for some assembly instructions (because they boil down
84// to a few constants). If this is a problem, we could change the code
85// such that we use an enum in optimized mode, and the struct in debug
86// mode. This way we get the compile-time error checking in debug mode
87// and best performance in optimized code.
88//
89
90struct Register {
91  // The non-allocatable registers are:
92  //  rsp - stack pointer
93  //  rbp - frame pointer
94  //  rsi - context register
95  //  r10 - fixed scratch register
96  //  r13 - root register
97  //  r15 - smi constant register
98  static const int kNumRegisters = 16;
99  static const int kNumAllocatableRegisters = 10;
100
101  static int ToAllocationIndex(Register reg) {
102    return kAllocationIndexByRegisterCode[reg.code()];
103  }
104
105  static Register FromAllocationIndex(int index) {
106    ASSERT(index >= 0 && index < kNumAllocatableRegisters);
107    Register result = { kRegisterCodeByAllocationIndex[index] };
108    return result;
109  }
110
111  static const char* AllocationIndexToString(int index) {
112    ASSERT(index >= 0 && index < kNumAllocatableRegisters);
113    const char* const names[] = {
114      "rax",
115      "rbx",
116      "rdx",
117      "rcx",
118      "rdi",
119      "r8",
120      "r9",
121      "r11",
122      "r14",
123      "r12"
124    };
125    return names[index];
126  }
127
128  static Register toRegister(int code) {
129    Register r = { code };
130    return r;
131  }
132  bool is_valid() const { return 0 <= code_ && code_ < kNumRegisters; }
133  bool is(Register reg) const { return code_ == reg.code_; }
134  int code() const {
135    ASSERT(is_valid());
136    return code_;
137  }
138  int bit() const {
139    return 1 << code_;
140  }
141
142  // Return the high bit of the register code as a 0 or 1.  Used often
143  // when constructing the REX prefix byte.
144  int high_bit() const {
145    return code_ >> 3;
146  }
147  // Return the 3 low bits of the register code.  Used when encoding registers
148  // in modR/M, SIB, and opcode bytes.
149  int low_bits() const {
150    return code_ & 0x7;
151  }
152
153  // Unfortunately we can't make this private in a struct when initializing
154  // by assignment.
155  int code_;
156
157 private:
158  static const int kRegisterCodeByAllocationIndex[kNumAllocatableRegisters];
159  static const int kAllocationIndexByRegisterCode[kNumRegisters];
160};
161
162const Register rax = { 0 };
163const Register rcx = { 1 };
164const Register rdx = { 2 };
165const Register rbx = { 3 };
166const Register rsp = { 4 };
167const Register rbp = { 5 };
168const Register rsi = { 6 };
169const Register rdi = { 7 };
170const Register r8 = { 8 };
171const Register r9 = { 9 };
172const Register r10 = { 10 };
173const Register r11 = { 11 };
174const Register r12 = { 12 };
175const Register r13 = { 13 };
176const Register r14 = { 14 };
177const Register r15 = { 15 };
178const Register no_reg = { -1 };
179
180
181struct XMMRegister {
182  static const int kNumRegisters = 16;
183  static const int kNumAllocatableRegisters = 15;
184
185  static int ToAllocationIndex(XMMRegister reg) {
186    ASSERT(reg.code() != 0);
187    return reg.code() - 1;
188  }
189
190  static XMMRegister FromAllocationIndex(int index) {
191    ASSERT(0 <= index && index < kNumAllocatableRegisters);
192    XMMRegister result = { index + 1 };
193    return result;
194  }
195
196  static const char* AllocationIndexToString(int index) {
197    ASSERT(index >= 0 && index < kNumAllocatableRegisters);
198    const char* const names[] = {
199      "xmm1",
200      "xmm2",
201      "xmm3",
202      "xmm4",
203      "xmm5",
204      "xmm6",
205      "xmm7",
206      "xmm8",
207      "xmm9",
208      "xmm10",
209      "xmm11",
210      "xmm12",
211      "xmm13",
212      "xmm14",
213      "xmm15"
214    };
215    return names[index];
216  }
217
218  bool is_valid() const { return 0 <= code_ && code_ < kNumRegisters; }
219  bool is(XMMRegister reg) const { return code_ == reg.code_; }
220  int code() const {
221    ASSERT(is_valid());
222    return code_;
223  }
224
225  // Return the high bit of the register code as a 0 or 1.  Used often
226  // when constructing the REX prefix byte.
227  int high_bit() const {
228    return code_ >> 3;
229  }
230  // Return the 3 low bits of the register code.  Used when encoding registers
231  // in modR/M, SIB, and opcode bytes.
232  int low_bits() const {
233    return code_ & 0x7;
234  }
235
236  int code_;
237};
238
239const XMMRegister xmm0 = { 0 };
240const XMMRegister xmm1 = { 1 };
241const XMMRegister xmm2 = { 2 };
242const XMMRegister xmm3 = { 3 };
243const XMMRegister xmm4 = { 4 };
244const XMMRegister xmm5 = { 5 };
245const XMMRegister xmm6 = { 6 };
246const XMMRegister xmm7 = { 7 };
247const XMMRegister xmm8 = { 8 };
248const XMMRegister xmm9 = { 9 };
249const XMMRegister xmm10 = { 10 };
250const XMMRegister xmm11 = { 11 };
251const XMMRegister xmm12 = { 12 };
252const XMMRegister xmm13 = { 13 };
253const XMMRegister xmm14 = { 14 };
254const XMMRegister xmm15 = { 15 };
255
256
257typedef XMMRegister DoubleRegister;
258
259
260enum Condition {
261  // any value < 0 is considered no_condition
262  no_condition  = -1,
263
264  overflow      =  0,
265  no_overflow   =  1,
266  below         =  2,
267  above_equal   =  3,
268  equal         =  4,
269  not_equal     =  5,
270  below_equal   =  6,
271  above         =  7,
272  negative      =  8,
273  positive      =  9,
274  parity_even   = 10,
275  parity_odd    = 11,
276  less          = 12,
277  greater_equal = 13,
278  less_equal    = 14,
279  greater       = 15,
280
281  // Fake conditions that are handled by the
282  // opcodes using them.
283  always        = 16,
284  never         = 17,
285  // aliases
286  carry         = below,
287  not_carry     = above_equal,
288  zero          = equal,
289  not_zero      = not_equal,
290  sign          = negative,
291  not_sign      = positive,
292  last_condition = greater
293};
294
295
296// Returns the equivalent of !cc.
297// Negation of the default no_condition (-1) results in a non-default
298// no_condition value (-2). As long as tests for no_condition check
299// for condition < 0, this will work as expected.
300inline Condition NegateCondition(Condition cc) {
301  return static_cast<Condition>(cc ^ 1);
302}
303
304
305// Corresponds to transposing the operands of a comparison.
306inline Condition ReverseCondition(Condition cc) {
307  switch (cc) {
308    case below:
309      return above;
310    case above:
311      return below;
312    case above_equal:
313      return below_equal;
314    case below_equal:
315      return above_equal;
316    case less:
317      return greater;
318    case greater:
319      return less;
320    case greater_equal:
321      return less_equal;
322    case less_equal:
323      return greater_equal;
324    default:
325      return cc;
326  };
327}
328
329
330enum Hint {
331  no_hint = 0,
332  not_taken = 0x2e,
333  taken = 0x3e
334};
335
336// The result of negating a hint is as if the corresponding condition
337// were negated by NegateCondition.  That is, no_hint is mapped to
338// itself and not_taken and taken are mapped to each other.
339inline Hint NegateHint(Hint hint) {
340  return (hint == no_hint)
341      ? no_hint
342      : ((hint == not_taken) ? taken : not_taken);
343}
344
345
346// -----------------------------------------------------------------------------
347// Machine instruction Immediates
348
349class Immediate BASE_EMBEDDED {
350 public:
351  explicit Immediate(int32_t value) : value_(value) {}
352
353 private:
354  int32_t value_;
355
356  friend class Assembler;
357};
358
359
360// -----------------------------------------------------------------------------
361// Machine instruction Operands
362
363enum ScaleFactor {
364  times_1 = 0,
365  times_2 = 1,
366  times_4 = 2,
367  times_8 = 3,
368  times_int_size = times_4,
369  times_pointer_size = times_8
370};
371
372
373class Operand BASE_EMBEDDED {
374 public:
375  // [base + disp/r]
376  Operand(Register base, int32_t disp);
377
378  // [base + index*scale + disp/r]
379  Operand(Register base,
380          Register index,
381          ScaleFactor scale,
382          int32_t disp);
383
384  // [index*scale + disp/r]
385  Operand(Register index,
386          ScaleFactor scale,
387          int32_t disp);
388
389  // Offset from existing memory operand.
390  // Offset is added to existing displacement as 32-bit signed values and
391  // this must not overflow.
392  Operand(const Operand& base, int32_t offset);
393
394  // Checks whether either base or index register is the given register.
395  // Does not check the "reg" part of the Operand.
396  bool AddressUsesRegister(Register reg) const;
397
398 private:
399  byte rex_;
400  byte buf_[6];
401  // The number of bytes of buf_ in use.
402  byte len_;
403
404  // Set the ModR/M byte without an encoded 'reg' register. The
405  // register is encoded later as part of the emit_operand operation.
406  // set_modrm can be called before or after set_sib and set_disp*.
407  inline void set_modrm(int mod, Register rm);
408
409  // Set the SIB byte if one is needed. Sets the length to 2 rather than 1.
410  inline void set_sib(ScaleFactor scale, Register index, Register base);
411
412  // Adds operand displacement fields (offsets added to the memory address).
413  // Needs to be called after set_sib, not before it.
414  inline void set_disp8(int disp);
415  inline void set_disp32(int disp);
416
417  friend class Assembler;
418};
419
420
421// CpuFeatures keeps track of which features are supported by the target CPU.
422// Supported features must be enabled by a Scope before use.
423// Example:
424//   if (CpuFeatures::IsSupported(SSE3)) {
425//     CpuFeatures::Scope fscope(SSE3);
426//     // Generate SSE3 floating point code.
427//   } else {
428//     // Generate standard x87 or SSE2 floating point code.
429//   }
430class CpuFeatures : public AllStatic {
431 public:
432  // Detect features of the target CPU. Set safe defaults if the serializer
433  // is enabled (snapshots must be portable).
434  static void Probe(bool portable);
435  // Check whether a feature is supported by the target CPU.
436  static bool IsSupported(CpuFeature f) {
437    if (f == SSE2 && !FLAG_enable_sse2) return false;
438    if (f == SSE3 && !FLAG_enable_sse3) return false;
439    if (f == CMOV && !FLAG_enable_cmov) return false;
440    if (f == RDTSC && !FLAG_enable_rdtsc) return false;
441    if (f == SAHF && !FLAG_enable_sahf) return false;
442    return (supported_ & (V8_UINT64_C(1) << f)) != 0;
443  }
444  // Check whether a feature is currently enabled.
445  static bool IsEnabled(CpuFeature f) {
446    return (enabled_ & (V8_UINT64_C(1) << f)) != 0;
447  }
448  // Enable a specified feature within a scope.
449  class Scope BASE_EMBEDDED {
450#ifdef DEBUG
451   public:
452    explicit Scope(CpuFeature f) {
453      uint64_t mask = (V8_UINT64_C(1) << f);
454      ASSERT(CpuFeatures::IsSupported(f));
455      ASSERT(!Serializer::enabled() || (found_by_runtime_probing_ & mask) == 0);
456      old_enabled_ = CpuFeatures::enabled_;
457      CpuFeatures::enabled_ |= mask;
458    }
459    ~Scope() { CpuFeatures::enabled_ = old_enabled_; }
460   private:
461    uint64_t old_enabled_;
462#else
463   public:
464    explicit Scope(CpuFeature f) {}
465#endif
466  };
467 private:
468  // Safe defaults include SSE2 and CMOV for X64. It is always available, if
469  // anyone checks, but they shouldn't need to check.
470  static const uint64_t kDefaultCpuFeatures = (1 << SSE2 | 1 << CMOV);
471  static uint64_t supported_;
472  static uint64_t enabled_;
473  static uint64_t found_by_runtime_probing_;
474};
475
476
477class Assembler : public Malloced {
478 private:
479  // We check before assembling an instruction that there is sufficient
480  // space to write an instruction and its relocation information.
481  // The relocation writer's position must be kGap bytes above the end of
482  // the generated instructions. This leaves enough space for the
483  // longest possible x64 instruction, 15 bytes, and the longest possible
484  // relocation information encoding, RelocInfoWriter::kMaxLength == 16.
485  // (There is a 15 byte limit on x64 instruction length that rules out some
486  // otherwise valid instructions.)
487  // This allows for a single, fast space check per instruction.
488  static const int kGap = 32;
489
490 public:
491  // Create an assembler. Instructions and relocation information are emitted
492  // into a buffer, with the instructions starting from the beginning and the
493  // relocation information starting from the end of the buffer. See CodeDesc
494  // for a detailed comment on the layout (globals.h).
495  //
496  // If the provided buffer is NULL, the assembler allocates and grows its own
497  // buffer, and buffer_size determines the initial buffer size. The buffer is
498  // owned by the assembler and deallocated upon destruction of the assembler.
499  //
500  // If the provided buffer is not NULL, the assembler uses the provided buffer
501  // for code generation and assumes its size to be buffer_size. If the buffer
502  // is too small, a fatal error occurs. No deallocation of the buffer is done
503  // upon destruction of the assembler.
504  Assembler(void* buffer, int buffer_size);
505  ~Assembler();
506
507  // GetCode emits any pending (non-emitted) code and fills the descriptor
508  // desc. GetCode() is idempotent; it returns the same result if no other
509  // Assembler functions are invoked in between GetCode() calls.
510  void GetCode(CodeDesc* desc);
511
512  // Read/Modify the code target in the relative branch/call instruction at pc.
513  // On the x64 architecture, we use relative jumps with a 32-bit displacement
514  // to jump to other Code objects in the Code space in the heap.
515  // Jumps to C functions are done indirectly through a 64-bit register holding
516  // the absolute address of the target.
517  // These functions convert between absolute Addresses of Code objects and
518  // the relative displacements stored in the code.
519  static inline Address target_address_at(Address pc);
520  static inline void set_target_address_at(Address pc, Address target);
521
522  // This sets the branch destination (which is in the instruction on x64).
523  // This is for calls and branches within generated code.
524  inline static void set_target_at(Address instruction_payload,
525                                   Address target) {
526    set_target_address_at(instruction_payload, target);
527  }
528
529  // This sets the branch destination (which is a load instruction on x64).
530  // This is for calls and branches to runtime code.
531  inline static void set_external_target_at(Address instruction_payload,
532                                            Address target) {
533    *reinterpret_cast<Address*>(instruction_payload) = target;
534  }
535
536  inline Handle<Object> code_target_object_handle_at(Address pc);
537  // Number of bytes taken up by the branch target in the code.
538  static const int kCallTargetSize = 4;      // Use 32-bit displacement.
539  static const int kExternalTargetSize = 8;  // Use 64-bit absolute.
540  // Distance between the address of the code target in the call instruction
541  // and the return address pushed on the stack.
542  static const int kCallTargetAddressOffset = 4;  // Use 32-bit displacement.
543  // Distance between the start of the JS return sequence and where the
544  // 32-bit displacement of a near call would be, relative to the pushed
545  // return address.  TODO: Use return sequence length instead.
546  // Should equal Debug::kX64JSReturnSequenceLength - kCallTargetAddressOffset;
547  static const int kPatchReturnSequenceAddressOffset = 13 - 4;
548  // Distance between start of patched debug break slot and where the
549  // 32-bit displacement of a near call would be, relative to the pushed
550  // return address.  TODO: Use return sequence length instead.
551  // Should equal Debug::kX64JSReturnSequenceLength - kCallTargetAddressOffset;
552  static const int kPatchDebugBreakSlotAddressOffset = 13 - 4;
553  // TODO(X64): Rename this, removing the "Real", after changing the above.
554  static const int kRealPatchReturnSequenceAddressOffset = 2;
555
556  // Some x64 JS code is padded with int3 to make it large
557  // enough to hold an instruction when the debugger patches it.
558  static const int kJumpInstructionLength = 13;
559  static const int kCallInstructionLength = 13;
560  static const int kJSReturnSequenceLength = 13;
561  static const int kShortCallInstructionLength = 5;
562
563  // The debug break slot must be able to contain a call instruction.
564  static const int kDebugBreakSlotLength = kCallInstructionLength;
565
566  // One byte opcode for test eax,0xXXXXXXXX.
567  static const byte kTestEaxByte = 0xA9;
568  // One byte opcode for test al, 0xXX.
569  static const byte kTestAlByte = 0xA8;
570  // One byte opcode for nop.
571  static const byte kNopByte = 0x90;
572
573  // One byte prefix for a short conditional jump.
574  static const byte kJccShortPrefix = 0x70;
575  static const byte kJncShortOpcode = kJccShortPrefix | not_carry;
576  static const byte kJcShortOpcode = kJccShortPrefix | carry;
577
578
579
580  // ---------------------------------------------------------------------------
581  // Code generation
582  //
583  // Function names correspond one-to-one to x64 instruction mnemonics.
584  // Unless specified otherwise, instructions operate on 64-bit operands.
585  //
586  // If we need versions of an assembly instruction that operate on different
587  // width arguments, we add a single-letter suffix specifying the width.
588  // This is done for the following instructions: mov, cmp, inc, dec,
589  // add, sub, and test.
590  // There are no versions of these instructions without the suffix.
591  // - Instructions on 8-bit (byte) operands/registers have a trailing 'b'.
592  // - Instructions on 16-bit (word) operands/registers have a trailing 'w'.
593  // - Instructions on 32-bit (doubleword) operands/registers use 'l'.
594  // - Instructions on 64-bit (quadword) operands/registers use 'q'.
595  //
596  // Some mnemonics, such as "and", are the same as C++ keywords.
597  // Naming conflicts with C++ keywords are resolved by adding a trailing '_'.
598
599  // Insert the smallest number of nop instructions
600  // possible to align the pc offset to a multiple
601  // of m, where m must be a power of 2.
602  void Align(int m);
603  // Aligns code to something that's optimal for a jump target for the platform.
604  void CodeTargetAlign();
605
606  // Stack
607  void pushfq();
608  void popfq();
609
610  void push(Immediate value);
611  // Push a 32 bit integer, and guarantee that it is actually pushed as a
612  // 32 bit value, the normal push will optimize the 8 bit case.
613  void push_imm32(int32_t imm32);
614  void push(Register src);
615  void push(const Operand& src);
616
617  void pop(Register dst);
618  void pop(const Operand& dst);
619
620  void enter(Immediate size);
621  void leave();
622
623  // Moves
624  void movb(Register dst, const Operand& src);
625  void movb(Register dst, Immediate imm);
626  void movb(const Operand& dst, Register src);
627
628  // Move the low 16 bits of a 64-bit register value to a 16-bit
629  // memory location.
630  void movw(const Operand& dst, Register src);
631
632  void movl(Register dst, Register src);
633  void movl(Register dst, const Operand& src);
634  void movl(const Operand& dst, Register src);
635  void movl(const Operand& dst, Immediate imm);
636  // Load a 32-bit immediate value, zero-extended to 64 bits.
637  void movl(Register dst, Immediate imm32);
638
639  // Move 64 bit register value to 64-bit memory location.
640  void movq(const Operand& dst, Register src);
641  // Move 64 bit memory location to 64-bit register value.
642  void movq(Register dst, const Operand& src);
643  void movq(Register dst, Register src);
644  // Sign extends immediate 32-bit value to 64 bits.
645  void movq(Register dst, Immediate x);
646  // Move the offset of the label location relative to the current
647  // position (after the move) to the destination.
648  void movl(const Operand& dst, Label* src);
649
650  // Move sign extended immediate to memory location.
651  void movq(const Operand& dst, Immediate value);
652  // New x64 instructions to load a 64-bit immediate into a register.
653  // All 64-bit immediates must have a relocation mode.
654  void movq(Register dst, void* ptr, RelocInfo::Mode rmode);
655  void movq(Register dst, int64_t value, RelocInfo::Mode rmode);
656  void movq(Register dst, const char* s, RelocInfo::Mode rmode);
657  // Moves the address of the external reference into the register.
658  void movq(Register dst, ExternalReference ext);
659  void movq(Register dst, Handle<Object> handle, RelocInfo::Mode rmode);
660
661  void movsxbq(Register dst, const Operand& src);
662  void movsxwq(Register dst, const Operand& src);
663  void movsxlq(Register dst, Register src);
664  void movsxlq(Register dst, const Operand& src);
665  void movzxbq(Register dst, const Operand& src);
666  void movzxbl(Register dst, const Operand& src);
667  void movzxwq(Register dst, const Operand& src);
668  void movzxwl(Register dst, const Operand& src);
669
670  // Repeated moves.
671
672  void repmovsb();
673  void repmovsw();
674  void repmovsl();
675  void repmovsq();
676
677  // New x64 instruction to load from an immediate 64-bit pointer into RAX.
678  void load_rax(void* ptr, RelocInfo::Mode rmode);
679  void load_rax(ExternalReference ext);
680
681  // Conditional moves.
682  void cmovq(Condition cc, Register dst, Register src);
683  void cmovq(Condition cc, Register dst, const Operand& src);
684  void cmovl(Condition cc, Register dst, Register src);
685  void cmovl(Condition cc, Register dst, const Operand& src);
686
687  // Exchange two registers
688  void xchg(Register dst, Register src);
689
690  // Arithmetics
691  void addl(Register dst, Register src) {
692    arithmetic_op_32(0x03, dst, src);
693  }
694
695  void addl(Register dst, Immediate src) {
696    immediate_arithmetic_op_32(0x0, dst, src);
697  }
698
699  void addl(Register dst, const Operand& src) {
700    arithmetic_op_32(0x03, dst, src);
701  }
702
703  void addl(const Operand& dst, Immediate src) {
704    immediate_arithmetic_op_32(0x0, dst, src);
705  }
706
707  void addq(Register dst, Register src) {
708    arithmetic_op(0x03, dst, src);
709  }
710
711  void addq(Register dst, const Operand& src) {
712    arithmetic_op(0x03, dst, src);
713  }
714
715  void addq(const Operand& dst, Register src) {
716    arithmetic_op(0x01, src, dst);
717  }
718
719  void addq(Register dst, Immediate src) {
720    immediate_arithmetic_op(0x0, dst, src);
721  }
722
723  void addq(const Operand& dst, Immediate src) {
724    immediate_arithmetic_op(0x0, dst, src);
725  }
726
727  void sbbl(Register dst, Register src) {
728    arithmetic_op_32(0x1b, dst, src);
729  }
730
731  void sbbq(Register dst, Register src) {
732    arithmetic_op(0x1b, dst, src);
733  }
734
735  void cmpb(Register dst, Immediate src) {
736    immediate_arithmetic_op_8(0x7, dst, src);
737  }
738
739  void cmpb_al(Immediate src);
740
741  void cmpb(Register dst, Register src) {
742    arithmetic_op(0x3A, dst, src);
743  }
744
745  void cmpb(Register dst, const Operand& src) {
746    arithmetic_op(0x3A, dst, src);
747  }
748
749  void cmpb(const Operand& dst, Register src) {
750    arithmetic_op(0x38, src, dst);
751  }
752
753  void cmpb(const Operand& dst, Immediate src) {
754    immediate_arithmetic_op_8(0x7, dst, src);
755  }
756
757  void cmpw(const Operand& dst, Immediate src) {
758    immediate_arithmetic_op_16(0x7, dst, src);
759  }
760
761  void cmpw(Register dst, Immediate src) {
762    immediate_arithmetic_op_16(0x7, dst, src);
763  }
764
765  void cmpw(Register dst, const Operand& src) {
766    arithmetic_op_16(0x3B, dst, src);
767  }
768
769  void cmpw(Register dst, Register src) {
770    arithmetic_op_16(0x3B, dst, src);
771  }
772
773  void cmpw(const Operand& dst, Register src) {
774    arithmetic_op_16(0x39, src, dst);
775  }
776
777  void cmpl(Register dst, Register src) {
778    arithmetic_op_32(0x3B, dst, src);
779  }
780
781  void cmpl(Register dst, const Operand& src) {
782    arithmetic_op_32(0x3B, dst, src);
783  }
784
785  void cmpl(const Operand& dst, Register src) {
786    arithmetic_op_32(0x39, src, dst);
787  }
788
789  void cmpl(Register dst, Immediate src) {
790    immediate_arithmetic_op_32(0x7, dst, src);
791  }
792
793  void cmpl(const Operand& dst, Immediate src) {
794    immediate_arithmetic_op_32(0x7, dst, src);
795  }
796
797  void cmpq(Register dst, Register src) {
798    arithmetic_op(0x3B, dst, src);
799  }
800
801  void cmpq(Register dst, const Operand& src) {
802    arithmetic_op(0x3B, dst, src);
803  }
804
805  void cmpq(const Operand& dst, Register src) {
806    arithmetic_op(0x39, src, dst);
807  }
808
809  void cmpq(Register dst, Immediate src) {
810    immediate_arithmetic_op(0x7, dst, src);
811  }
812
813  void cmpq(const Operand& dst, Immediate src) {
814    immediate_arithmetic_op(0x7, dst, src);
815  }
816
817  void and_(Register dst, Register src) {
818    arithmetic_op(0x23, dst, src);
819  }
820
821  void and_(Register dst, const Operand& src) {
822    arithmetic_op(0x23, dst, src);
823  }
824
825  void and_(const Operand& dst, Register src) {
826    arithmetic_op(0x21, src, dst);
827  }
828
829  void and_(Register dst, Immediate src) {
830    immediate_arithmetic_op(0x4, dst, src);
831  }
832
833  void and_(const Operand& dst, Immediate src) {
834    immediate_arithmetic_op(0x4, dst, src);
835  }
836
837  void andl(Register dst, Immediate src) {
838    immediate_arithmetic_op_32(0x4, dst, src);
839  }
840
841  void andl(Register dst, Register src) {
842    arithmetic_op_32(0x23, dst, src);
843  }
844
845  void andl(Register dst, const Operand& src) {
846    arithmetic_op_32(0x23, dst, src);
847  }
848
849  void andb(Register dst, Immediate src) {
850    immediate_arithmetic_op_8(0x4, dst, src);
851  }
852
853  void decq(Register dst);
854  void decq(const Operand& dst);
855  void decl(Register dst);
856  void decl(const Operand& dst);
857  void decb(Register dst);
858  void decb(const Operand& dst);
859
860  // Sign-extends rax into rdx:rax.
861  void cqo();
862  // Sign-extends eax into edx:eax.
863  void cdq();
864
865  // Divide rdx:rax by src.  Quotient in rax, remainder in rdx.
866  void idivq(Register src);
867  // Divide edx:eax by lower 32 bits of src.  Quotient in eax, rem. in edx.
868  void idivl(Register src);
869
870  // Signed multiply instructions.
871  void imul(Register src);                               // rdx:rax = rax * src.
872  void imul(Register dst, Register src);                 // dst = dst * src.
873  void imul(Register dst, const Operand& src);           // dst = dst * src.
874  void imul(Register dst, Register src, Immediate imm);  // dst = src * imm.
875  // Signed 32-bit multiply instructions.
876  void imull(Register dst, Register src);                 // dst = dst * src.
877  void imull(Register dst, const Operand& src);           // dst = dst * src.
878  void imull(Register dst, Register src, Immediate imm);  // dst = src * imm.
879
880  void incq(Register dst);
881  void incq(const Operand& dst);
882  void incl(Register dst);
883  void incl(const Operand& dst);
884
885  void lea(Register dst, const Operand& src);
886  void leal(Register dst, const Operand& src);
887
888  // Multiply rax by src, put the result in rdx:rax.
889  void mul(Register src);
890
891  void neg(Register dst);
892  void neg(const Operand& dst);
893  void negl(Register dst);
894
895  void not_(Register dst);
896  void not_(const Operand& dst);
897  void notl(Register dst);
898
899  void or_(Register dst, Register src) {
900    arithmetic_op(0x0B, dst, src);
901  }
902
903  void orl(Register dst, Register src) {
904    arithmetic_op_32(0x0B, dst, src);
905  }
906
907  void or_(Register dst, const Operand& src) {
908    arithmetic_op(0x0B, dst, src);
909  }
910
911  void orl(Register dst, const Operand& src) {
912    arithmetic_op_32(0x0B, dst, src);
913  }
914
915  void or_(const Operand& dst, Register src) {
916    arithmetic_op(0x09, src, dst);
917  }
918
919  void or_(Register dst, Immediate src) {
920    immediate_arithmetic_op(0x1, dst, src);
921  }
922
923  void orl(Register dst, Immediate src) {
924    immediate_arithmetic_op_32(0x1, dst, src);
925  }
926
927  void or_(const Operand& dst, Immediate src) {
928    immediate_arithmetic_op(0x1, dst, src);
929  }
930
931  void orl(const Operand& dst, Immediate src) {
932    immediate_arithmetic_op_32(0x1, dst, src);
933  }
934
935
936  void rcl(Register dst, Immediate imm8) {
937    shift(dst, imm8, 0x2);
938  }
939
940  void rol(Register dst, Immediate imm8) {
941    shift(dst, imm8, 0x0);
942  }
943
944  void rcr(Register dst, Immediate imm8) {
945    shift(dst, imm8, 0x3);
946  }
947
948  void ror(Register dst, Immediate imm8) {
949    shift(dst, imm8, 0x1);
950  }
951
952  // Shifts dst:src left by cl bits, affecting only dst.
953  void shld(Register dst, Register src);
954
955  // Shifts src:dst right by cl bits, affecting only dst.
956  void shrd(Register dst, Register src);
957
958  // Shifts dst right, duplicating sign bit, by shift_amount bits.
959  // Shifting by 1 is handled efficiently.
960  void sar(Register dst, Immediate shift_amount) {
961    shift(dst, shift_amount, 0x7);
962  }
963
964  // Shifts dst right, duplicating sign bit, by shift_amount bits.
965  // Shifting by 1 is handled efficiently.
966  void sarl(Register dst, Immediate shift_amount) {
967    shift_32(dst, shift_amount, 0x7);
968  }
969
970  // Shifts dst right, duplicating sign bit, by cl % 64 bits.
971  void sar_cl(Register dst) {
972    shift(dst, 0x7);
973  }
974
975  // Shifts dst right, duplicating sign bit, by cl % 64 bits.
976  void sarl_cl(Register dst) {
977    shift_32(dst, 0x7);
978  }
979
980  void shl(Register dst, Immediate shift_amount) {
981    shift(dst, shift_amount, 0x4);
982  }
983
984  void shl_cl(Register dst) {
985    shift(dst, 0x4);
986  }
987
988  void shll_cl(Register dst) {
989    shift_32(dst, 0x4);
990  }
991
992  void shll(Register dst, Immediate shift_amount) {
993    shift_32(dst, shift_amount, 0x4);
994  }
995
996  void shr(Register dst, Immediate shift_amount) {
997    shift(dst, shift_amount, 0x5);
998  }
999
1000  void shr_cl(Register dst) {
1001    shift(dst, 0x5);
1002  }
1003
1004  void shrl_cl(Register dst) {
1005    shift_32(dst, 0x5);
1006  }
1007
1008  void shrl(Register dst, Immediate shift_amount) {
1009    shift_32(dst, shift_amount, 0x5);
1010  }
1011
1012  void store_rax(void* dst, RelocInfo::Mode mode);
1013  void store_rax(ExternalReference ref);
1014
1015  void subq(Register dst, Register src) {
1016    arithmetic_op(0x2B, dst, src);
1017  }
1018
1019  void subq(Register dst, const Operand& src) {
1020    arithmetic_op(0x2B, dst, src);
1021  }
1022
1023  void subq(const Operand& dst, Register src) {
1024    arithmetic_op(0x29, src, dst);
1025  }
1026
1027  void subq(Register dst, Immediate src) {
1028    immediate_arithmetic_op(0x5, dst, src);
1029  }
1030
1031  void subq(const Operand& dst, Immediate src) {
1032    immediate_arithmetic_op(0x5, dst, src);
1033  }
1034
1035  void subl(Register dst, Register src) {
1036    arithmetic_op_32(0x2B, dst, src);
1037  }
1038
1039  void subl(Register dst, const Operand& src) {
1040    arithmetic_op_32(0x2B, dst, src);
1041  }
1042
1043  void subl(const Operand& dst, Immediate src) {
1044    immediate_arithmetic_op_32(0x5, dst, src);
1045  }
1046
1047  void subl(Register dst, Immediate src) {
1048    immediate_arithmetic_op_32(0x5, dst, src);
1049  }
1050
1051  void subb(Register dst, Immediate src) {
1052    immediate_arithmetic_op_8(0x5, dst, src);
1053  }
1054
1055  void testb(Register dst, Register src);
1056  void testb(Register reg, Immediate mask);
1057  void testb(const Operand& op, Immediate mask);
1058  void testb(const Operand& op, Register reg);
1059  void testl(Register dst, Register src);
1060  void testl(Register reg, Immediate mask);
1061  void testl(const Operand& op, Immediate mask);
1062  void testq(const Operand& op, Register reg);
1063  void testq(Register dst, Register src);
1064  void testq(Register dst, Immediate mask);
1065
1066  void xor_(Register dst, Register src) {
1067    if (dst.code() == src.code()) {
1068      arithmetic_op_32(0x33, dst, src);
1069    } else {
1070      arithmetic_op(0x33, dst, src);
1071    }
1072  }
1073
1074  void xorl(Register dst, Register src) {
1075    arithmetic_op_32(0x33, dst, src);
1076  }
1077
1078  void xorl(Register dst, const Operand& src) {
1079    arithmetic_op_32(0x33, dst, src);
1080  }
1081
1082  void xorl(Register dst, Immediate src) {
1083    immediate_arithmetic_op_32(0x6, dst, src);
1084  }
1085
1086  void xorl(const Operand& dst, Immediate src) {
1087    immediate_arithmetic_op_32(0x6, dst, src);
1088  }
1089
1090  void xor_(Register dst, const Operand& src) {
1091    arithmetic_op(0x33, dst, src);
1092  }
1093
1094  void xor_(const Operand& dst, Register src) {
1095    arithmetic_op(0x31, src, dst);
1096  }
1097
1098  void xor_(Register dst, Immediate src) {
1099    immediate_arithmetic_op(0x6, dst, src);
1100  }
1101
1102  void xor_(const Operand& dst, Immediate src) {
1103    immediate_arithmetic_op(0x6, dst, src);
1104  }
1105
1106  // Bit operations.
1107  void bt(const Operand& dst, Register src);
1108  void bts(const Operand& dst, Register src);
1109
1110  // Miscellaneous
1111  void clc();
1112  void cpuid();
1113  void hlt();
1114  void int3();
1115  void nop();
1116  void nop(int n);
1117  void rdtsc();
1118  void ret(int imm16);
1119  void setcc(Condition cc, Register reg);
1120
1121  // Label operations & relative jumps (PPUM Appendix D)
1122  //
1123  // Takes a branch opcode (cc) and a label (L) and generates
1124  // either a backward branch or a forward branch and links it
1125  // to the label fixup chain. Usage:
1126  //
1127  // Label L;    // unbound label
1128  // j(cc, &L);  // forward branch to unbound label
1129  // bind(&L);   // bind label to the current pc
1130  // j(cc, &L);  // backward branch to bound label
1131  // bind(&L);   // illegal: a label may be bound only once
1132  //
1133  // Note: The same Label can be used for forward and backward branches
1134  // but it may be bound only once.
1135
1136  void bind(Label* L);  // binds an unbound label L to the current code position
1137  void bind(NearLabel* L);
1138
1139  // Calls
1140  // Call near relative 32-bit displacement, relative to next instruction.
1141  void call(Label* L);
1142  void call(Handle<Code> target, RelocInfo::Mode rmode);
1143
1144  // Calls directly to the given address using a relative offset.
1145  // Should only ever be used in Code objects for calls within the
1146  // same Code object. Should not be used when generating new code (use labels),
1147  // but only when patching existing code.
1148  void call(Address target);
1149
1150  // Call near absolute indirect, address in register
1151  void call(Register adr);
1152
1153  // Call near indirect
1154  void call(const Operand& operand);
1155
1156  // Jumps
1157  // Jump short or near relative.
1158  // Use a 32-bit signed displacement.
1159  void jmp(Label* L);  // unconditional jump to L
1160  void jmp(Handle<Code> target, RelocInfo::Mode rmode);
1161
1162  // Jump near absolute indirect (r64)
1163  void jmp(Register adr);
1164
1165  // Jump near absolute indirect (m64)
1166  void jmp(const Operand& src);
1167
1168  // Short jump
1169  void jmp(NearLabel* L);
1170
1171  // Conditional jumps
1172  void j(Condition cc, Label* L);
1173  void j(Condition cc, Handle<Code> target, RelocInfo::Mode rmode);
1174
1175  // Conditional short jump
1176  void j(Condition cc, NearLabel* L, Hint hint = no_hint);
1177
1178  // Floating-point operations
1179  void fld(int i);
1180
1181  void fld1();
1182  void fldz();
1183  void fldpi();
1184  void fldln2();
1185
1186  void fld_s(const Operand& adr);
1187  void fld_d(const Operand& adr);
1188
1189  void fstp_s(const Operand& adr);
1190  void fstp_d(const Operand& adr);
1191  void fstp(int index);
1192
1193  void fild_s(const Operand& adr);
1194  void fild_d(const Operand& adr);
1195
1196  void fist_s(const Operand& adr);
1197
1198  void fistp_s(const Operand& adr);
1199  void fistp_d(const Operand& adr);
1200
1201  void fisttp_s(const Operand& adr);
1202  void fisttp_d(const Operand& adr);
1203
1204  void fabs();
1205  void fchs();
1206
1207  void fadd(int i);
1208  void fsub(int i);
1209  void fmul(int i);
1210  void fdiv(int i);
1211
1212  void fisub_s(const Operand& adr);
1213
1214  void faddp(int i = 1);
1215  void fsubp(int i = 1);
1216  void fsubrp(int i = 1);
1217  void fmulp(int i = 1);
1218  void fdivp(int i = 1);
1219  void fprem();
1220  void fprem1();
1221
1222  void fxch(int i = 1);
1223  void fincstp();
1224  void ffree(int i = 0);
1225
1226  void ftst();
1227  void fucomp(int i);
1228  void fucompp();
1229  void fucomi(int i);
1230  void fucomip();
1231
1232  void fcompp();
1233  void fnstsw_ax();
1234  void fwait();
1235  void fnclex();
1236
1237  void fsin();
1238  void fcos();
1239  void fyl2x();
1240
1241  void frndint();
1242
1243  void sahf();
1244
1245  // SSE2 instructions
1246  void movd(XMMRegister dst, Register src);
1247  void movd(Register dst, XMMRegister src);
1248  void movq(XMMRegister dst, Register src);
1249  void movq(Register dst, XMMRegister src);
1250  void extractps(Register dst, XMMRegister src, byte imm8);
1251
1252  void movsd(const Operand& dst, XMMRegister src);
1253  void movsd(XMMRegister dst, XMMRegister src);
1254  void movsd(XMMRegister dst, const Operand& src);
1255
1256  void movdqa(const Operand& dst, XMMRegister src);
1257  void movdqa(XMMRegister dst, const Operand& src);
1258
1259  void movss(XMMRegister dst, const Operand& src);
1260  void movss(const Operand& dst, XMMRegister src);
1261
1262  void cvttss2si(Register dst, const Operand& src);
1263  void cvttss2si(Register dst, XMMRegister src);
1264  void cvttsd2si(Register dst, const Operand& src);
1265  void cvttsd2si(Register dst, XMMRegister src);
1266  void cvttsd2siq(Register dst, XMMRegister src);
1267
1268  void cvtlsi2sd(XMMRegister dst, const Operand& src);
1269  void cvtlsi2sd(XMMRegister dst, Register src);
1270  void cvtqsi2sd(XMMRegister dst, const Operand& src);
1271  void cvtqsi2sd(XMMRegister dst, Register src);
1272
1273  void cvtlsi2ss(XMMRegister dst, Register src);
1274
1275  void cvtss2sd(XMMRegister dst, XMMRegister src);
1276  void cvtss2sd(XMMRegister dst, const Operand& src);
1277  void cvtsd2ss(XMMRegister dst, XMMRegister src);
1278
1279  void cvtsd2si(Register dst, XMMRegister src);
1280  void cvtsd2siq(Register dst, XMMRegister src);
1281
1282  void addsd(XMMRegister dst, XMMRegister src);
1283  void subsd(XMMRegister dst, XMMRegister src);
1284  void mulsd(XMMRegister dst, XMMRegister src);
1285  void divsd(XMMRegister dst, XMMRegister src);
1286
1287  void andpd(XMMRegister dst, XMMRegister src);
1288  void orpd(XMMRegister dst, XMMRegister src);
1289  void xorpd(XMMRegister dst, XMMRegister src);
1290  void sqrtsd(XMMRegister dst, XMMRegister src);
1291
1292  void ucomisd(XMMRegister dst, XMMRegister src);
1293  void ucomisd(XMMRegister dst, const Operand& src);
1294
1295  void movmskpd(Register dst, XMMRegister src);
1296
1297  // The first argument is the reg field, the second argument is the r/m field.
1298  void emit_sse_operand(XMMRegister dst, XMMRegister src);
1299  void emit_sse_operand(XMMRegister reg, const Operand& adr);
1300  void emit_sse_operand(XMMRegister dst, Register src);
1301  void emit_sse_operand(Register dst, XMMRegister src);
1302
1303  // Debugging
1304  void Print();
1305
1306  // Check the code size generated from label to here.
1307  int SizeOfCodeGeneratedSince(Label* l) { return pc_offset() - l->pos(); }
1308
1309  // Mark address of the ExitJSFrame code.
1310  void RecordJSReturn();
1311
1312  // Mark address of a debug break slot.
1313  void RecordDebugBreakSlot();
1314
1315  // Record a comment relocation entry that can be used by a disassembler.
1316  // Use --code-comments to enable.
1317  void RecordComment(const char* msg, bool force = false);
1318
1319  // Writes a single word of data in the code stream.
1320  // Used for inline tables, e.g., jump-tables.
1321  void db(uint8_t data);
1322  void dd(uint32_t data);
1323
1324  int pc_offset() const { return static_cast<int>(pc_ - buffer_); }
1325
1326  PositionsRecorder* positions_recorder() { return &positions_recorder_; }
1327
1328  // Check if there is less than kGap bytes available in the buffer.
1329  // If this is the case, we need to grow the buffer before emitting
1330  // an instruction or relocation information.
1331  inline bool buffer_overflow() const {
1332    return pc_ >= reloc_info_writer.pos() - kGap;
1333  }
1334
1335  // Get the number of bytes available in the buffer.
1336  inline int available_space() const {
1337    return static_cast<int>(reloc_info_writer.pos() - pc_);
1338  }
1339
1340  static bool IsNop(Address addr) { return *addr == 0x90; }
1341
1342  // Avoid overflows for displacements etc.
1343  static const int kMaximalBufferSize = 512*MB;
1344  static const int kMinimalBufferSize = 4*KB;
1345
1346 private:
1347  byte* addr_at(int pos)  { return buffer_ + pos; }
1348  byte byte_at(int pos)  { return buffer_[pos]; }
1349  void set_byte_at(int pos, byte value) { buffer_[pos] = value; }
1350  uint32_t long_at(int pos)  {
1351    return *reinterpret_cast<uint32_t*>(addr_at(pos));
1352  }
1353  void long_at_put(int pos, uint32_t x)  {
1354    *reinterpret_cast<uint32_t*>(addr_at(pos)) = x;
1355  }
1356
1357  // code emission
1358  void GrowBuffer();
1359
1360  void emit(byte x) { *pc_++ = x; }
1361  inline void emitl(uint32_t x);
1362  inline void emitq(uint64_t x, RelocInfo::Mode rmode);
1363  inline void emitw(uint16_t x);
1364  inline void emit_code_target(Handle<Code> target, RelocInfo::Mode rmode);
1365  void emit(Immediate x) { emitl(x.value_); }
1366
1367  // Emits a REX prefix that encodes a 64-bit operand size and
1368  // the top bit of both register codes.
1369  // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B.
1370  // REX.W is set.
1371  inline void emit_rex_64(XMMRegister reg, Register rm_reg);
1372  inline void emit_rex_64(Register reg, XMMRegister rm_reg);
1373  inline void emit_rex_64(Register reg, Register rm_reg);
1374
1375  // Emits a REX prefix that encodes a 64-bit operand size and
1376  // the top bit of the destination, index, and base register codes.
1377  // The high bit of reg is used for REX.R, the high bit of op's base
1378  // register is used for REX.B, and the high bit of op's index register
1379  // is used for REX.X.  REX.W is set.
1380  inline void emit_rex_64(Register reg, const Operand& op);
1381  inline void emit_rex_64(XMMRegister reg, const Operand& op);
1382
1383  // Emits a REX prefix that encodes a 64-bit operand size and
1384  // the top bit of the register code.
1385  // The high bit of register is used for REX.B.
1386  // REX.W is set and REX.R and REX.X are clear.
1387  inline void emit_rex_64(Register rm_reg);
1388
1389  // Emits a REX prefix that encodes a 64-bit operand size and
1390  // the top bit of the index and base register codes.
1391  // The high bit of op's base register is used for REX.B, and the high
1392  // bit of op's index register is used for REX.X.
1393  // REX.W is set and REX.R clear.
1394  inline void emit_rex_64(const Operand& op);
1395
1396  // Emit a REX prefix that only sets REX.W to choose a 64-bit operand size.
1397  void emit_rex_64() { emit(0x48); }
1398
1399  // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B.
1400  // REX.W is clear.
1401  inline void emit_rex_32(Register reg, Register rm_reg);
1402
1403  // The high bit of reg is used for REX.R, the high bit of op's base
1404  // register is used for REX.B, and the high bit of op's index register
1405  // is used for REX.X.  REX.W is cleared.
1406  inline void emit_rex_32(Register reg, const Operand& op);
1407
1408  // High bit of rm_reg goes to REX.B.
1409  // REX.W, REX.R and REX.X are clear.
1410  inline void emit_rex_32(Register rm_reg);
1411
1412  // High bit of base goes to REX.B and high bit of index to REX.X.
1413  // REX.W and REX.R are clear.
1414  inline void emit_rex_32(const Operand& op);
1415
1416  // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B.
1417  // REX.W is cleared.  If no REX bits are set, no byte is emitted.
1418  inline void emit_optional_rex_32(Register reg, Register rm_reg);
1419
1420  // The high bit of reg is used for REX.R, the high bit of op's base
1421  // register is used for REX.B, and the high bit of op's index register
1422  // is used for REX.X.  REX.W is cleared.  If no REX bits are set, nothing
1423  // is emitted.
1424  inline void emit_optional_rex_32(Register reg, const Operand& op);
1425
1426  // As for emit_optional_rex_32(Register, Register), except that
1427  // the registers are XMM registers.
1428  inline void emit_optional_rex_32(XMMRegister reg, XMMRegister base);
1429
1430  // As for emit_optional_rex_32(Register, Register), except that
1431  // one of the registers is an XMM registers.
1432  inline void emit_optional_rex_32(XMMRegister reg, Register base);
1433
1434  // As for emit_optional_rex_32(Register, Register), except that
1435  // one of the registers is an XMM registers.
1436  inline void emit_optional_rex_32(Register reg, XMMRegister base);
1437
1438  // As for emit_optional_rex_32(Register, const Operand&), except that
1439  // the register is an XMM register.
1440  inline void emit_optional_rex_32(XMMRegister reg, const Operand& op);
1441
1442  // Optionally do as emit_rex_32(Register) if the register number has
1443  // the high bit set.
1444  inline void emit_optional_rex_32(Register rm_reg);
1445
1446  // Optionally do as emit_rex_32(const Operand&) if the operand register
1447  // numbers have a high bit set.
1448  inline void emit_optional_rex_32(const Operand& op);
1449
1450
1451  // Emit the ModR/M byte, and optionally the SIB byte and
1452  // 1- or 4-byte offset for a memory operand.  Also encodes
1453  // the second operand of the operation, a register or operation
1454  // subcode, into the reg field of the ModR/M byte.
1455  void emit_operand(Register reg, const Operand& adr) {
1456    emit_operand(reg.low_bits(), adr);
1457  }
1458
1459  // Emit the ModR/M byte, and optionally the SIB byte and
1460  // 1- or 4-byte offset for a memory operand.  Also used to encode
1461  // a three-bit opcode extension into the ModR/M byte.
1462  void emit_operand(int rm, const Operand& adr);
1463
1464  // Emit a ModR/M byte with registers coded in the reg and rm_reg fields.
1465  void emit_modrm(Register reg, Register rm_reg) {
1466    emit(0xC0 | reg.low_bits() << 3 | rm_reg.low_bits());
1467  }
1468
1469  // Emit a ModR/M byte with an operation subcode in the reg field and
1470  // a register in the rm_reg field.
1471  void emit_modrm(int code, Register rm_reg) {
1472    ASSERT(is_uint3(code));
1473    emit(0xC0 | code << 3 | rm_reg.low_bits());
1474  }
1475
1476  // Emit the code-object-relative offset of the label's position
1477  inline void emit_code_relative_offset(Label* label);
1478
1479  // Emit machine code for one of the operations ADD, ADC, SUB, SBC,
1480  // AND, OR, XOR, or CMP.  The encodings of these operations are all
1481  // similar, differing just in the opcode or in the reg field of the
1482  // ModR/M byte.
1483  void arithmetic_op_16(byte opcode, Register reg, Register rm_reg);
1484  void arithmetic_op_16(byte opcode, Register reg, const Operand& rm_reg);
1485  void arithmetic_op_32(byte opcode, Register reg, Register rm_reg);
1486  void arithmetic_op_32(byte opcode, Register reg, const Operand& rm_reg);
1487  void arithmetic_op(byte opcode, Register reg, Register rm_reg);
1488  void arithmetic_op(byte opcode, Register reg, const Operand& rm_reg);
1489  void immediate_arithmetic_op(byte subcode, Register dst, Immediate src);
1490  void immediate_arithmetic_op(byte subcode, const Operand& dst, Immediate src);
1491  // Operate on a byte in memory or register.
1492  void immediate_arithmetic_op_8(byte subcode,
1493                                 Register dst,
1494                                 Immediate src);
1495  void immediate_arithmetic_op_8(byte subcode,
1496                                 const Operand& dst,
1497                                 Immediate src);
1498  // Operate on a word in memory or register.
1499  void immediate_arithmetic_op_16(byte subcode,
1500                                  Register dst,
1501                                  Immediate src);
1502  void immediate_arithmetic_op_16(byte subcode,
1503                                  const Operand& dst,
1504                                  Immediate src);
1505  // Operate on a 32-bit word in memory or register.
1506  void immediate_arithmetic_op_32(byte subcode,
1507                                  Register dst,
1508                                  Immediate src);
1509  void immediate_arithmetic_op_32(byte subcode,
1510                                  const Operand& dst,
1511                                  Immediate src);
1512
1513  // Emit machine code for a shift operation.
1514  void shift(Register dst, Immediate shift_amount, int subcode);
1515  void shift_32(Register dst, Immediate shift_amount, int subcode);
1516  // Shift dst by cl % 64 bits.
1517  void shift(Register dst, int subcode);
1518  void shift_32(Register dst, int subcode);
1519
1520  void emit_farith(int b1, int b2, int i);
1521
1522  // labels
1523  // void print(Label* L);
1524  void bind_to(Label* L, int pos);
1525
1526  // record reloc info for current pc_
1527  void RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data = 0);
1528
1529  friend class CodePatcher;
1530  friend class EnsureSpace;
1531  friend class RegExpMacroAssemblerX64;
1532
1533  // Code buffer:
1534  // The buffer into which code and relocation info are generated.
1535  byte* buffer_;
1536  int buffer_size_;
1537  // True if the assembler owns the buffer, false if buffer is external.
1538  bool own_buffer_;
1539  // A previously allocated buffer of kMinimalBufferSize bytes, or NULL.
1540  static byte* spare_buffer_;
1541
1542  // code generation
1543  byte* pc_;  // the program counter; moves forward
1544  RelocInfoWriter reloc_info_writer;
1545
1546  List< Handle<Code> > code_targets_;
1547  // push-pop elimination
1548  byte* last_pc_;
1549
1550  PositionsRecorder positions_recorder_;
1551  friend class PositionsRecorder;
1552};
1553
1554
1555// Helper class that ensures that there is enough space for generating
1556// instructions and relocation information.  The constructor makes
1557// sure that there is enough space and (in debug mode) the destructor
1558// checks that we did not generate too much.
1559class EnsureSpace BASE_EMBEDDED {
1560 public:
1561  explicit EnsureSpace(Assembler* assembler) : assembler_(assembler) {
1562    if (assembler_->buffer_overflow()) assembler_->GrowBuffer();
1563#ifdef DEBUG
1564    space_before_ = assembler_->available_space();
1565#endif
1566  }
1567
1568#ifdef DEBUG
1569  ~EnsureSpace() {
1570    int bytes_generated = space_before_ - assembler_->available_space();
1571    ASSERT(bytes_generated < assembler_->kGap);
1572  }
1573#endif
1574
1575 private:
1576  Assembler* assembler_;
1577#ifdef DEBUG
1578  int space_before_;
1579#endif
1580};
1581
1582} }  // namespace v8::internal
1583
1584#endif  // V8_X64_ASSEMBLER_X64_H_
1585