assembler-x64.h revision c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7a
1// Copyright (c) 1994-2006 Sun Microsystems Inc.
2// All Rights Reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8// - Redistributions of source code must retain the above copyright notice,
9// this list of conditions and the following disclaimer.
10//
11// - Redistribution in binary form must reproduce the above copyright
12// notice, this list of conditions and the following disclaimer in the
13// documentation and/or other materials provided with the distribution.
14//
15// - Neither the name of Sun Microsystems or the names of contributors may
16// be used to endorse or promote products derived from this software without
17// specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
20// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// The original source code covered by the above license above has been
32// modified significantly by Google Inc.
33// Copyright 2012 the V8 project authors. All rights reserved.
34
35// A lightweight X64 Assembler.
36
37#ifndef V8_X64_ASSEMBLER_X64_H_
38#define V8_X64_ASSEMBLER_X64_H_
39
40#include <deque>
41
42#include "src/assembler.h"
43#include "src/x64/sse-instr.h"
44
45namespace v8 {
46namespace internal {
47
48// Utility functions
49
50#define GENERAL_REGISTERS(V) \
51  V(rax)                     \
52  V(rcx)                     \
53  V(rdx)                     \
54  V(rbx)                     \
55  V(rsp)                     \
56  V(rbp)                     \
57  V(rsi)                     \
58  V(rdi)                     \
59  V(r8)                      \
60  V(r9)                      \
61  V(r10)                     \
62  V(r11)                     \
63  V(r12)                     \
64  V(r13)                     \
65  V(r14)                     \
66  V(r15)
67
68#define ALLOCATABLE_GENERAL_REGISTERS(V) \
69  V(rax)                                 \
70  V(rbx)                                 \
71  V(rdx)                                 \
72  V(rcx)                                 \
73  V(rsi)                                 \
74  V(rdi)                                 \
75  V(r8)                                  \
76  V(r9)                                  \
77  V(r11)                                 \
78  V(r12)                                 \
79  V(r14)                                 \
80  V(r15)
81
82
83// CPU Registers.
84//
85// 1) We would prefer to use an enum, but enum values are assignment-
86// compatible with int, which has caused code-generation bugs.
87//
88// 2) We would prefer to use a class instead of a struct but we don't like
89// the register initialization to depend on the particular initialization
90// order (which appears to be different on OS X, Linux, and Windows for the
91// installed versions of C++ we tried). Using a struct permits C-style
92// "initialization". Also, the Register objects cannot be const as this
93// forces initialization stubs in MSVC, making us dependent on initialization
94// order.
95//
96// 3) By not using an enum, we are possibly preventing the compiler from
97// doing certain constant folds, which may significantly reduce the
98// code generated for some assembly instructions (because they boil down
99// to a few constants). If this is a problem, we could change the code
100// such that we use an enum in optimized mode, and the struct in debug
101// mode. This way we get the compile-time error checking in debug mode
102// and best performance in optimized code.
103//
104struct Register {
105  enum Code {
106#define REGISTER_CODE(R) kCode_##R,
107    GENERAL_REGISTERS(REGISTER_CODE)
108#undef REGISTER_CODE
109        kAfterLast,
110    kCode_no_reg = -1
111  };
112
113  static const int kNumRegisters = Code::kAfterLast;
114
115  static Register from_code(int code) {
116    DCHECK(code >= 0);
117    DCHECK(code < kNumRegisters);
118    Register r = {code};
119    return r;
120  }
121  bool is_valid() const { return 0 <= reg_code && reg_code < kNumRegisters; }
122  bool is(Register reg) const { return reg_code == reg.reg_code; }
123  int code() const {
124    DCHECK(is_valid());
125    return reg_code;
126  }
127  int bit() const {
128    DCHECK(is_valid());
129    return 1 << reg_code;
130  }
131
132  bool is_byte_register() const { return reg_code <= 3; }
133  // Return the high bit of the register code as a 0 or 1.  Used often
134  // when constructing the REX prefix byte.
135  int high_bit() const { return reg_code >> 3; }
136  // Return the 3 low bits of the register code.  Used when encoding registers
137  // in modR/M, SIB, and opcode bytes.
138  int low_bits() const { return reg_code & 0x7; }
139
140  // Unfortunately we can't make this private in a struct when initializing
141  // by assignment.
142  int reg_code;
143};
144
145
146#define DECLARE_REGISTER(R) const Register R = {Register::kCode_##R};
147GENERAL_REGISTERS(DECLARE_REGISTER)
148#undef DECLARE_REGISTER
149const Register no_reg = {Register::kCode_no_reg};
150
151
152#ifdef _WIN64
153  // Windows calling convention
154const Register arg_reg_1 = {Register::kCode_rcx};
155const Register arg_reg_2 = {Register::kCode_rdx};
156const Register arg_reg_3 = {Register::kCode_r8};
157const Register arg_reg_4 = {Register::kCode_r9};
158#else
159  // AMD64 calling convention
160const Register arg_reg_1 = {Register::kCode_rdi};
161const Register arg_reg_2 = {Register::kCode_rsi};
162const Register arg_reg_3 = {Register::kCode_rdx};
163const Register arg_reg_4 = {Register::kCode_rcx};
164#endif  // _WIN64
165
166
167#define DOUBLE_REGISTERS(V) \
168  V(xmm0)                   \
169  V(xmm1)                   \
170  V(xmm2)                   \
171  V(xmm3)                   \
172  V(xmm4)                   \
173  V(xmm5)                   \
174  V(xmm6)                   \
175  V(xmm7)                   \
176  V(xmm8)                   \
177  V(xmm9)                   \
178  V(xmm10)                  \
179  V(xmm11)                  \
180  V(xmm12)                  \
181  V(xmm13)                  \
182  V(xmm14)                  \
183  V(xmm15)
184
185#define FLOAT_REGISTERS DOUBLE_REGISTERS
186#define SIMD128_REGISTERS DOUBLE_REGISTERS
187
188#define ALLOCATABLE_DOUBLE_REGISTERS(V) \
189  V(xmm0)                               \
190  V(xmm1)                               \
191  V(xmm2)                               \
192  V(xmm3)                               \
193  V(xmm4)                               \
194  V(xmm5)                               \
195  V(xmm6)                               \
196  V(xmm7)                               \
197  V(xmm8)                               \
198  V(xmm9)                               \
199  V(xmm10)                              \
200  V(xmm11)                              \
201  V(xmm12)                              \
202  V(xmm13)                              \
203  V(xmm14)
204
205static const bool kSimpleFPAliasing = true;
206
207struct XMMRegister {
208  enum Code {
209#define REGISTER_CODE(R) kCode_##R,
210    DOUBLE_REGISTERS(REGISTER_CODE)
211#undef REGISTER_CODE
212        kAfterLast,
213    kCode_no_reg = -1
214  };
215
216  static const int kMaxNumRegisters = Code::kAfterLast;
217
218  static XMMRegister from_code(int code) {
219    XMMRegister result = {code};
220    return result;
221  }
222
223  bool is_valid() const { return 0 <= reg_code && reg_code < kMaxNumRegisters; }
224  bool is(XMMRegister reg) const { return reg_code == reg.reg_code; }
225  int code() const {
226    DCHECK(is_valid());
227    return reg_code;
228  }
229
230  // Return the high bit of the register code as a 0 or 1.  Used often
231  // when constructing the REX prefix byte.
232  int high_bit() const { return reg_code >> 3; }
233  // Return the 3 low bits of the register code.  Used when encoding registers
234  // in modR/M, SIB, and opcode bytes.
235  int low_bits() const { return reg_code & 0x7; }
236
237  // Unfortunately we can't make this private in a struct when initializing
238  // by assignment.
239  int reg_code;
240};
241
242typedef XMMRegister FloatRegister;
243
244typedef XMMRegister DoubleRegister;
245
246typedef XMMRegister Simd128Register;
247
248#define DECLARE_REGISTER(R) \
249  const DoubleRegister R = {DoubleRegister::kCode_##R};
250DOUBLE_REGISTERS(DECLARE_REGISTER)
251#undef DECLARE_REGISTER
252const DoubleRegister no_double_reg = {DoubleRegister::kCode_no_reg};
253
254enum Condition {
255  // any value < 0 is considered no_condition
256  no_condition  = -1,
257
258  overflow      =  0,
259  no_overflow   =  1,
260  below         =  2,
261  above_equal   =  3,
262  equal         =  4,
263  not_equal     =  5,
264  below_equal   =  6,
265  above         =  7,
266  negative      =  8,
267  positive      =  9,
268  parity_even   = 10,
269  parity_odd    = 11,
270  less          = 12,
271  greater_equal = 13,
272  less_equal    = 14,
273  greater       = 15,
274
275  // Fake conditions that are handled by the
276  // opcodes using them.
277  always        = 16,
278  never         = 17,
279  // aliases
280  carry         = below,
281  not_carry     = above_equal,
282  zero          = equal,
283  not_zero      = not_equal,
284  sign          = negative,
285  not_sign      = positive,
286  last_condition = greater
287};
288
289
290// Returns the equivalent of !cc.
291// Negation of the default no_condition (-1) results in a non-default
292// no_condition value (-2). As long as tests for no_condition check
293// for condition < 0, this will work as expected.
294inline Condition NegateCondition(Condition cc) {
295  return static_cast<Condition>(cc ^ 1);
296}
297
298
299// Commute a condition such that {a cond b == b cond' a}.
300inline Condition CommuteCondition(Condition cc) {
301  switch (cc) {
302    case below:
303      return above;
304    case above:
305      return below;
306    case above_equal:
307      return below_equal;
308    case below_equal:
309      return above_equal;
310    case less:
311      return greater;
312    case greater:
313      return less;
314    case greater_equal:
315      return less_equal;
316    case less_equal:
317      return greater_equal;
318    default:
319      return cc;
320  }
321}
322
323
324enum RoundingMode {
325  kRoundToNearest = 0x0,
326  kRoundDown = 0x1,
327  kRoundUp = 0x2,
328  kRoundToZero = 0x3
329};
330
331
332// -----------------------------------------------------------------------------
333// Machine instruction Immediates
334
335class Immediate BASE_EMBEDDED {
336 public:
337  explicit Immediate(int32_t value) : value_(value) {}
338  explicit Immediate(int32_t value, RelocInfo::Mode rmode)
339      : value_(value), rmode_(rmode) {}
340  explicit Immediate(Smi* value) {
341    DCHECK(SmiValuesAre31Bits());  // Only available for 31-bit SMI.
342    value_ = static_cast<int32_t>(reinterpret_cast<intptr_t>(value));
343  }
344
345 private:
346  int32_t value_;
347  RelocInfo::Mode rmode_ = RelocInfo::NONE32;
348
349  friend class Assembler;
350};
351
352
353// -----------------------------------------------------------------------------
354// Machine instruction Operands
355
356enum ScaleFactor {
357  times_1 = 0,
358  times_2 = 1,
359  times_4 = 2,
360  times_8 = 3,
361  times_int_size = times_4,
362  times_pointer_size = (kPointerSize == 8) ? times_8 : times_4
363};
364
365
366class Operand BASE_EMBEDDED {
367 public:
368  // [base + disp/r]
369  Operand(Register base, int32_t disp);
370
371  // [base + index*scale + disp/r]
372  Operand(Register base,
373          Register index,
374          ScaleFactor scale,
375          int32_t disp);
376
377  // [index*scale + disp/r]
378  Operand(Register index,
379          ScaleFactor scale,
380          int32_t disp);
381
382  // Offset from existing memory operand.
383  // Offset is added to existing displacement as 32-bit signed values and
384  // this must not overflow.
385  Operand(const Operand& base, int32_t offset);
386
387  // [rip + disp/r]
388  explicit Operand(Label* label);
389
390  // Checks whether either base or index register is the given register.
391  // Does not check the "reg" part of the Operand.
392  bool AddressUsesRegister(Register reg) const;
393
394  // Queries related to the size of the generated instruction.
395  // Whether the generated instruction will have a REX prefix.
396  bool requires_rex() const { return rex_ != 0; }
397  // Size of the ModR/M, SIB and displacement parts of the generated
398  // instruction.
399  int operand_size() const { return len_; }
400
401 private:
402  byte rex_;
403  byte buf_[9];
404  // The number of bytes of buf_ in use.
405  byte len_;
406
407  // Set the ModR/M byte without an encoded 'reg' register. The
408  // register is encoded later as part of the emit_operand operation.
409  // set_modrm can be called before or after set_sib and set_disp*.
410  inline void set_modrm(int mod, Register rm);
411
412  // Set the SIB byte if one is needed. Sets the length to 2 rather than 1.
413  inline void set_sib(ScaleFactor scale, Register index, Register base);
414
415  // Adds operand displacement fields (offsets added to the memory address).
416  // Needs to be called after set_sib, not before it.
417  inline void set_disp8(int disp);
418  inline void set_disp32(int disp);
419  inline void set_disp64(int64_t disp);  // for labels.
420
421  friend class Assembler;
422};
423
424#define ASSEMBLER_INSTRUCTION_LIST(V) \
425  V(add)                              \
426  V(and)                              \
427  V(cmp)                              \
428  V(cmpxchg)                          \
429  V(dec)                              \
430  V(idiv)                             \
431  V(div)                              \
432  V(imul)                             \
433  V(inc)                              \
434  V(lea)                              \
435  V(mov)                              \
436  V(movzxb)                           \
437  V(movzxw)                           \
438  V(neg)                              \
439  V(not)                              \
440  V(or)                               \
441  V(repmovs)                          \
442  V(sbb)                              \
443  V(sub)                              \
444  V(test)                             \
445  V(xchg)                             \
446  V(xor)
447
448// Shift instructions on operands/registers with kPointerSize, kInt32Size and
449// kInt64Size.
450#define SHIFT_INSTRUCTION_LIST(V)       \
451  V(rol, 0x0)                           \
452  V(ror, 0x1)                           \
453  V(rcl, 0x2)                           \
454  V(rcr, 0x3)                           \
455  V(shl, 0x4)                           \
456  V(shr, 0x5)                           \
457  V(sar, 0x7)                           \
458
459
460class Assembler : public AssemblerBase {
461 private:
462  // We check before assembling an instruction that there is sufficient
463  // space to write an instruction and its relocation information.
464  // The relocation writer's position must be kGap bytes above the end of
465  // the generated instructions. This leaves enough space for the
466  // longest possible x64 instruction, 15 bytes, and the longest possible
467  // relocation information encoding, RelocInfoWriter::kMaxLength == 16.
468  // (There is a 15 byte limit on x64 instruction length that rules out some
469  // otherwise valid instructions.)
470  // This allows for a single, fast space check per instruction.
471  static const int kGap = 32;
472
473 public:
474  // Create an assembler. Instructions and relocation information are emitted
475  // into a buffer, with the instructions starting from the beginning and the
476  // relocation information starting from the end of the buffer. See CodeDesc
477  // for a detailed comment on the layout (globals.h).
478  //
479  // If the provided buffer is NULL, the assembler allocates and grows its own
480  // buffer, and buffer_size determines the initial buffer size. The buffer is
481  // owned by the assembler and deallocated upon destruction of the assembler.
482  //
483  // If the provided buffer is not NULL, the assembler uses the provided buffer
484  // for code generation and assumes its size to be buffer_size. If the buffer
485  // is too small, a fatal error occurs. No deallocation of the buffer is done
486  // upon destruction of the assembler.
487  Assembler(Isolate* isolate, void* buffer, int buffer_size);
488  virtual ~Assembler() { }
489
490  // GetCode emits any pending (non-emitted) code and fills the descriptor
491  // desc. GetCode() is idempotent; it returns the same result if no other
492  // Assembler functions are invoked in between GetCode() calls.
493  void GetCode(CodeDesc* desc);
494
495  // Read/Modify the code target in the relative branch/call instruction at pc.
496  // On the x64 architecture, we use relative jumps with a 32-bit displacement
497  // to jump to other Code objects in the Code space in the heap.
498  // Jumps to C functions are done indirectly through a 64-bit register holding
499  // the absolute address of the target.
500  // These functions convert between absolute Addresses of Code objects and
501  // the relative displacements stored in the code.
502  static inline Address target_address_at(Address pc, Address constant_pool);
503  static inline void set_target_address_at(
504      Isolate* isolate, Address pc, Address constant_pool, Address target,
505      ICacheFlushMode icache_flush_mode = FLUSH_ICACHE_IF_NEEDED);
506  static inline Address target_address_at(Address pc, Code* code) {
507    Address constant_pool = code ? code->constant_pool() : NULL;
508    return target_address_at(pc, constant_pool);
509  }
510  static inline void set_target_address_at(
511      Isolate* isolate, Address pc, Code* code, Address target,
512      ICacheFlushMode icache_flush_mode = FLUSH_ICACHE_IF_NEEDED) {
513    Address constant_pool = code ? code->constant_pool() : NULL;
514    set_target_address_at(isolate, pc, constant_pool, target,
515                          icache_flush_mode);
516  }
517
518  // Return the code target address at a call site from the return address
519  // of that call in the instruction stream.
520  static inline Address target_address_from_return_address(Address pc);
521
522  // This sets the branch destination (which is in the instruction on x64).
523  // This is for calls and branches within generated code.
524  inline static void deserialization_set_special_target_at(
525      Isolate* isolate, Address instruction_payload, Code* code,
526      Address target) {
527    set_target_address_at(isolate, instruction_payload, code, target);
528  }
529
530  // This sets the internal reference at the pc.
531  inline static void deserialization_set_target_internal_reference_at(
532      Isolate* isolate, Address pc, Address target,
533      RelocInfo::Mode mode = RelocInfo::INTERNAL_REFERENCE);
534
535  static inline RelocInfo::Mode RelocInfoNone() {
536    if (kPointerSize == kInt64Size) {
537      return RelocInfo::NONE64;
538    } else {
539      DCHECK(kPointerSize == kInt32Size);
540      return RelocInfo::NONE32;
541    }
542  }
543
544  inline Handle<Object> code_target_object_handle_at(Address pc);
545  inline Address runtime_entry_at(Address pc);
546  // Number of bytes taken up by the branch target in the code.
547  static const int kSpecialTargetSize = 4;  // Use 32-bit displacement.
548  // Distance between the address of the code target in the call instruction
549  // and the return address pushed on the stack.
550  static const int kCallTargetAddressOffset = 4;  // Use 32-bit displacement.
551  // The length of call(kScratchRegister).
552  static const int kCallScratchRegisterInstructionLength = 3;
553  // The length of call(Immediate32).
554  static const int kShortCallInstructionLength = 5;
555  // The length of movq(kScratchRegister, address).
556  static const int kMoveAddressIntoScratchRegisterInstructionLength =
557      2 + kPointerSize;
558  // The length of movq(kScratchRegister, address) and call(kScratchRegister).
559  static const int kCallSequenceLength =
560      kMoveAddressIntoScratchRegisterInstructionLength +
561      kCallScratchRegisterInstructionLength;
562
563  // The debug break slot must be able to contain an indirect call sequence.
564  static const int kDebugBreakSlotLength = kCallSequenceLength;
565  // Distance between start of patched debug break slot and the emitted address
566  // to jump to.
567  static const int kPatchDebugBreakSlotAddressOffset =
568      kMoveAddressIntoScratchRegisterInstructionLength - kPointerSize;
569
570  // One byte opcode for test eax,0xXXXXXXXX.
571  static const byte kTestEaxByte = 0xA9;
572  // One byte opcode for test al, 0xXX.
573  static const byte kTestAlByte = 0xA8;
574  // One byte opcode for nop.
575  static const byte kNopByte = 0x90;
576
577  // One byte prefix for a short conditional jump.
578  static const byte kJccShortPrefix = 0x70;
579  static const byte kJncShortOpcode = kJccShortPrefix | not_carry;
580  static const byte kJcShortOpcode = kJccShortPrefix | carry;
581  static const byte kJnzShortOpcode = kJccShortPrefix | not_zero;
582  static const byte kJzShortOpcode = kJccShortPrefix | zero;
583
584  // VEX prefix encodings.
585  enum SIMDPrefix { kNone = 0x0, k66 = 0x1, kF3 = 0x2, kF2 = 0x3 };
586  enum VectorLength { kL128 = 0x0, kL256 = 0x4, kLIG = kL128, kLZ = kL128 };
587  enum VexW { kW0 = 0x0, kW1 = 0x80, kWIG = kW0 };
588  enum LeadingOpcode { k0F = 0x1, k0F38 = 0x2, k0F3A = 0x3 };
589
590  // ---------------------------------------------------------------------------
591  // Code generation
592  //
593  // Function names correspond one-to-one to x64 instruction mnemonics.
594  // Unless specified otherwise, instructions operate on 64-bit operands.
595  //
596  // If we need versions of an assembly instruction that operate on different
597  // width arguments, we add a single-letter suffix specifying the width.
598  // This is done for the following instructions: mov, cmp, inc, dec,
599  // add, sub, and test.
600  // There are no versions of these instructions without the suffix.
601  // - Instructions on 8-bit (byte) operands/registers have a trailing 'b'.
602  // - Instructions on 16-bit (word) operands/registers have a trailing 'w'.
603  // - Instructions on 32-bit (doubleword) operands/registers use 'l'.
604  // - Instructions on 64-bit (quadword) operands/registers use 'q'.
605  // - Instructions on operands/registers with pointer size use 'p'.
606
607  STATIC_ASSERT(kPointerSize == kInt64Size || kPointerSize == kInt32Size);
608
609#define DECLARE_INSTRUCTION(instruction)                \
610  template<class P1>                                    \
611  void instruction##p(P1 p1) {                          \
612    emit_##instruction(p1, kPointerSize);               \
613  }                                                     \
614                                                        \
615  template<class P1>                                    \
616  void instruction##l(P1 p1) {                          \
617    emit_##instruction(p1, kInt32Size);                 \
618  }                                                     \
619                                                        \
620  template<class P1>                                    \
621  void instruction##q(P1 p1) {                          \
622    emit_##instruction(p1, kInt64Size);                 \
623  }                                                     \
624                                                        \
625  template<class P1, class P2>                          \
626  void instruction##p(P1 p1, P2 p2) {                   \
627    emit_##instruction(p1, p2, kPointerSize);           \
628  }                                                     \
629                                                        \
630  template<class P1, class P2>                          \
631  void instruction##l(P1 p1, P2 p2) {                   \
632    emit_##instruction(p1, p2, kInt32Size);             \
633  }                                                     \
634                                                        \
635  template<class P1, class P2>                          \
636  void instruction##q(P1 p1, P2 p2) {                   \
637    emit_##instruction(p1, p2, kInt64Size);             \
638  }                                                     \
639                                                        \
640  template<class P1, class P2, class P3>                \
641  void instruction##p(P1 p1, P2 p2, P3 p3) {            \
642    emit_##instruction(p1, p2, p3, kPointerSize);       \
643  }                                                     \
644                                                        \
645  template<class P1, class P2, class P3>                \
646  void instruction##l(P1 p1, P2 p2, P3 p3) {            \
647    emit_##instruction(p1, p2, p3, kInt32Size);         \
648  }                                                     \
649                                                        \
650  template<class P1, class P2, class P3>                \
651  void instruction##q(P1 p1, P2 p2, P3 p3) {            \
652    emit_##instruction(p1, p2, p3, kInt64Size);         \
653  }
654  ASSEMBLER_INSTRUCTION_LIST(DECLARE_INSTRUCTION)
655#undef DECLARE_INSTRUCTION
656
657  // Insert the smallest number of nop instructions
658  // possible to align the pc offset to a multiple
659  // of m, where m must be a power of 2.
660  void Align(int m);
661  // Insert the smallest number of zero bytes possible to align the pc offset
662  // to a mulitple of m. m must be a power of 2 (>= 2).
663  void DataAlign(int m);
664  void Nop(int bytes = 1);
665  // Aligns code to something that's optimal for a jump target for the platform.
666  void CodeTargetAlign();
667
668  // Stack
669  void pushfq();
670  void popfq();
671
672  void pushq(Immediate value);
673  // Push a 32 bit integer, and guarantee that it is actually pushed as a
674  // 32 bit value, the normal push will optimize the 8 bit case.
675  void pushq_imm32(int32_t imm32);
676  void pushq(Register src);
677  void pushq(const Operand& src);
678
679  void popq(Register dst);
680  void popq(const Operand& dst);
681
682  void enter(Immediate size);
683  void leave();
684
685  // Moves
686  void movb(Register dst, const Operand& src);
687  void movb(Register dst, Immediate imm);
688  void movb(const Operand& dst, Register src);
689  void movb(const Operand& dst, Immediate imm);
690
691  // Move the low 16 bits of a 64-bit register value to a 16-bit
692  // memory location.
693  void movw(Register dst, const Operand& src);
694  void movw(const Operand& dst, Register src);
695  void movw(const Operand& dst, Immediate imm);
696
697  // Move the offset of the label location relative to the current
698  // position (after the move) to the destination.
699  void movl(const Operand& dst, Label* src);
700
701  // Loads a pointer into a register with a relocation mode.
702  void movp(Register dst, void* ptr, RelocInfo::Mode rmode);
703
704  // Loads a 64-bit immediate into a register.
705  void movq(Register dst, int64_t value,
706            RelocInfo::Mode rmode = RelocInfo::NONE64);
707  void movq(Register dst, uint64_t value,
708            RelocInfo::Mode rmode = RelocInfo::NONE64);
709
710  void movsxbl(Register dst, Register src);
711  void movsxbl(Register dst, const Operand& src);
712  void movsxbq(Register dst, Register src);
713  void movsxbq(Register dst, const Operand& src);
714  void movsxwl(Register dst, Register src);
715  void movsxwl(Register dst, const Operand& src);
716  void movsxwq(Register dst, Register src);
717  void movsxwq(Register dst, const Operand& src);
718  void movsxlq(Register dst, Register src);
719  void movsxlq(Register dst, const Operand& src);
720
721  // Repeated moves.
722
723  void repmovsb();
724  void repmovsw();
725  void repmovsp() { emit_repmovs(kPointerSize); }
726  void repmovsl() { emit_repmovs(kInt32Size); }
727  void repmovsq() { emit_repmovs(kInt64Size); }
728
729  // Instruction to load from an immediate 64-bit pointer into RAX.
730  void load_rax(void* ptr, RelocInfo::Mode rmode);
731  void load_rax(ExternalReference ext);
732
733  // Conditional moves.
734  void cmovq(Condition cc, Register dst, Register src);
735  void cmovq(Condition cc, Register dst, const Operand& src);
736  void cmovl(Condition cc, Register dst, Register src);
737  void cmovl(Condition cc, Register dst, const Operand& src);
738
739  void cmpb(Register dst, Immediate src) {
740    immediate_arithmetic_op_8(0x7, dst, src);
741  }
742
743  void cmpb_al(Immediate src);
744
745  void cmpb(Register dst, Register src) {
746    arithmetic_op_8(0x3A, dst, src);
747  }
748
749  void cmpb(Register dst, const Operand& src) {
750    arithmetic_op_8(0x3A, dst, src);
751  }
752
753  void cmpb(const Operand& dst, Register src) {
754    arithmetic_op_8(0x38, src, dst);
755  }
756
757  void cmpb(const Operand& dst, Immediate src) {
758    immediate_arithmetic_op_8(0x7, dst, src);
759  }
760
761  void cmpw(const Operand& dst, Immediate src) {
762    immediate_arithmetic_op_16(0x7, dst, src);
763  }
764
765  void cmpw(Register dst, Immediate src) {
766    immediate_arithmetic_op_16(0x7, dst, src);
767  }
768
769  void cmpw(Register dst, const Operand& src) {
770    arithmetic_op_16(0x3B, dst, src);
771  }
772
773  void cmpw(Register dst, Register src) {
774    arithmetic_op_16(0x3B, dst, src);
775  }
776
777  void cmpw(const Operand& dst, Register src) {
778    arithmetic_op_16(0x39, src, dst);
779  }
780
781  void testb(Register reg, const Operand& op) { testb(op, reg); }
782
783  void testw(Register reg, const Operand& op) { testw(op, reg); }
784
785  void andb(Register dst, Immediate src) {
786    immediate_arithmetic_op_8(0x4, dst, src);
787  }
788
789  void decb(Register dst);
790  void decb(const Operand& dst);
791
792  // Lock prefix.
793  void lock();
794
795  void xchgb(Register reg, const Operand& op);
796  void xchgw(Register reg, const Operand& op);
797
798  void cmpxchgb(const Operand& dst, Register src);
799  void cmpxchgw(const Operand& dst, Register src);
800
801  // Sign-extends rax into rdx:rax.
802  void cqo();
803  // Sign-extends eax into edx:eax.
804  void cdq();
805
806  // Multiply eax by src, put the result in edx:eax.
807  void mull(Register src);
808  void mull(const Operand& src);
809  // Multiply rax by src, put the result in rdx:rax.
810  void mulq(Register src);
811
812#define DECLARE_SHIFT_INSTRUCTION(instruction, subcode)                       \
813  void instruction##p(Register dst, Immediate imm8) {                         \
814    shift(dst, imm8, subcode, kPointerSize);                                  \
815  }                                                                           \
816                                                                              \
817  void instruction##l(Register dst, Immediate imm8) {                         \
818    shift(dst, imm8, subcode, kInt32Size);                                    \
819  }                                                                           \
820                                                                              \
821  void instruction##q(Register dst, Immediate imm8) {                         \
822    shift(dst, imm8, subcode, kInt64Size);                                    \
823  }                                                                           \
824                                                                              \
825  void instruction##p(Operand dst, Immediate imm8) {                          \
826    shift(dst, imm8, subcode, kPointerSize);                                  \
827  }                                                                           \
828                                                                              \
829  void instruction##l(Operand dst, Immediate imm8) {                          \
830    shift(dst, imm8, subcode, kInt32Size);                                    \
831  }                                                                           \
832                                                                              \
833  void instruction##q(Operand dst, Immediate imm8) {                          \
834    shift(dst, imm8, subcode, kInt64Size);                                    \
835  }                                                                           \
836                                                                              \
837  void instruction##p_cl(Register dst) { shift(dst, subcode, kPointerSize); } \
838                                                                              \
839  void instruction##l_cl(Register dst) { shift(dst, subcode, kInt32Size); }   \
840                                                                              \
841  void instruction##q_cl(Register dst) { shift(dst, subcode, kInt64Size); }   \
842                                                                              \
843  void instruction##p_cl(Operand dst) { shift(dst, subcode, kPointerSize); }  \
844                                                                              \
845  void instruction##l_cl(Operand dst) { shift(dst, subcode, kInt32Size); }    \
846                                                                              \
847  void instruction##q_cl(Operand dst) { shift(dst, subcode, kInt64Size); }
848  SHIFT_INSTRUCTION_LIST(DECLARE_SHIFT_INSTRUCTION)
849#undef DECLARE_SHIFT_INSTRUCTION
850
851  // Shifts dst:src left by cl bits, affecting only dst.
852  void shld(Register dst, Register src);
853
854  // Shifts src:dst right by cl bits, affecting only dst.
855  void shrd(Register dst, Register src);
856
857  void store_rax(void* dst, RelocInfo::Mode mode);
858  void store_rax(ExternalReference ref);
859
860  void subb(Register dst, Immediate src) {
861    immediate_arithmetic_op_8(0x5, dst, src);
862  }
863
864  void testb(Register dst, Register src);
865  void testb(Register reg, Immediate mask);
866  void testb(const Operand& op, Immediate mask);
867  void testb(const Operand& op, Register reg);
868
869  void testw(Register dst, Register src);
870  void testw(Register reg, Immediate mask);
871  void testw(const Operand& op, Immediate mask);
872  void testw(const Operand& op, Register reg);
873
874  // Bit operations.
875  void bt(const Operand& dst, Register src);
876  void bts(const Operand& dst, Register src);
877  void bsrq(Register dst, Register src);
878  void bsrq(Register dst, const Operand& src);
879  void bsrl(Register dst, Register src);
880  void bsrl(Register dst, const Operand& src);
881  void bsfq(Register dst, Register src);
882  void bsfq(Register dst, const Operand& src);
883  void bsfl(Register dst, Register src);
884  void bsfl(Register dst, const Operand& src);
885
886  // Miscellaneous
887  void clc();
888  void cld();
889  void cpuid();
890  void hlt();
891  void int3();
892  void nop();
893  void ret(int imm16);
894  void ud2();
895  void setcc(Condition cc, Register reg);
896
897  // Label operations & relative jumps (PPUM Appendix D)
898  //
899  // Takes a branch opcode (cc) and a label (L) and generates
900  // either a backward branch or a forward branch and links it
901  // to the label fixup chain. Usage:
902  //
903  // Label L;    // unbound label
904  // j(cc, &L);  // forward branch to unbound label
905  // bind(&L);   // bind label to the current pc
906  // j(cc, &L);  // backward branch to bound label
907  // bind(&L);   // illegal: a label may be bound only once
908  //
909  // Note: The same Label can be used for forward and backward branches
910  // but it may be bound only once.
911
912  void bind(Label* L);  // binds an unbound label L to the current code position
913
914  // Calls
915  // Call near relative 32-bit displacement, relative to next instruction.
916  void call(Label* L);
917  void call(Address entry, RelocInfo::Mode rmode);
918  void call(Handle<Code> target,
919            RelocInfo::Mode rmode = RelocInfo::CODE_TARGET,
920            TypeFeedbackId ast_id = TypeFeedbackId::None());
921
922  // Calls directly to the given address using a relative offset.
923  // Should only ever be used in Code objects for calls within the
924  // same Code object. Should not be used when generating new code (use labels),
925  // but only when patching existing code.
926  void call(Address target);
927
928  // Call near absolute indirect, address in register
929  void call(Register adr);
930
931  // Jumps
932  // Jump short or near relative.
933  // Use a 32-bit signed displacement.
934  // Unconditional jump to L
935  void jmp(Label* L, Label::Distance distance = Label::kFar);
936  void jmp(Address entry, RelocInfo::Mode rmode);
937  void jmp(Handle<Code> target, RelocInfo::Mode rmode);
938
939  // Jump near absolute indirect (r64)
940  void jmp(Register adr);
941  void jmp(const Operand& src);
942
943  // Conditional jumps
944  void j(Condition cc,
945         Label* L,
946         Label::Distance distance = Label::kFar);
947  void j(Condition cc, Address entry, RelocInfo::Mode rmode);
948  void j(Condition cc, Handle<Code> target, RelocInfo::Mode rmode);
949
950  // Floating-point operations
951  void fld(int i);
952
953  void fld1();
954  void fldz();
955  void fldpi();
956  void fldln2();
957
958  void fld_s(const Operand& adr);
959  void fld_d(const Operand& adr);
960
961  void fstp_s(const Operand& adr);
962  void fstp_d(const Operand& adr);
963  void fstp(int index);
964
965  void fild_s(const Operand& adr);
966  void fild_d(const Operand& adr);
967
968  void fist_s(const Operand& adr);
969
970  void fistp_s(const Operand& adr);
971  void fistp_d(const Operand& adr);
972
973  void fisttp_s(const Operand& adr);
974  void fisttp_d(const Operand& adr);
975
976  void fabs();
977  void fchs();
978
979  void fadd(int i);
980  void fsub(int i);
981  void fmul(int i);
982  void fdiv(int i);
983
984  void fisub_s(const Operand& adr);
985
986  void faddp(int i = 1);
987  void fsubp(int i = 1);
988  void fsubrp(int i = 1);
989  void fmulp(int i = 1);
990  void fdivp(int i = 1);
991  void fprem();
992  void fprem1();
993
994  void fxch(int i = 1);
995  void fincstp();
996  void ffree(int i = 0);
997
998  void ftst();
999  void fucomp(int i);
1000  void fucompp();
1001  void fucomi(int i);
1002  void fucomip();
1003
1004  void fcompp();
1005  void fnstsw_ax();
1006  void fwait();
1007  void fnclex();
1008
1009  void fsin();
1010  void fcos();
1011  void fptan();
1012  void fyl2x();
1013  void f2xm1();
1014  void fscale();
1015  void fninit();
1016
1017  void frndint();
1018
1019  void sahf();
1020
1021  // SSE instructions
1022  void addss(XMMRegister dst, XMMRegister src);
1023  void addss(XMMRegister dst, const Operand& src);
1024  void subss(XMMRegister dst, XMMRegister src);
1025  void subss(XMMRegister dst, const Operand& src);
1026  void mulss(XMMRegister dst, XMMRegister src);
1027  void mulss(XMMRegister dst, const Operand& src);
1028  void divss(XMMRegister dst, XMMRegister src);
1029  void divss(XMMRegister dst, const Operand& src);
1030
1031  void maxss(XMMRegister dst, XMMRegister src);
1032  void maxss(XMMRegister dst, const Operand& src);
1033  void minss(XMMRegister dst, XMMRegister src);
1034  void minss(XMMRegister dst, const Operand& src);
1035
1036  void sqrtss(XMMRegister dst, XMMRegister src);
1037  void sqrtss(XMMRegister dst, const Operand& src);
1038
1039  void ucomiss(XMMRegister dst, XMMRegister src);
1040  void ucomiss(XMMRegister dst, const Operand& src);
1041  void movaps(XMMRegister dst, XMMRegister src);
1042
1043  // Don't use this unless it's important to keep the
1044  // top half of the destination register unchanged.
1045  // Use movaps when moving float values and movd for integer
1046  // values in xmm registers.
1047  void movss(XMMRegister dst, XMMRegister src);
1048
1049  void movss(XMMRegister dst, const Operand& src);
1050  void movss(const Operand& dst, XMMRegister src);
1051  void shufps(XMMRegister dst, XMMRegister src, byte imm8);
1052
1053  void cvttss2si(Register dst, const Operand& src);
1054  void cvttss2si(Register dst, XMMRegister src);
1055  void cvtlsi2ss(XMMRegister dst, const Operand& src);
1056  void cvtlsi2ss(XMMRegister dst, Register src);
1057
1058  void andps(XMMRegister dst, XMMRegister src);
1059  void andps(XMMRegister dst, const Operand& src);
1060  void orps(XMMRegister dst, XMMRegister src);
1061  void orps(XMMRegister dst, const Operand& src);
1062  void xorps(XMMRegister dst, XMMRegister src);
1063  void xorps(XMMRegister dst, const Operand& src);
1064
1065  void addps(XMMRegister dst, XMMRegister src);
1066  void addps(XMMRegister dst, const Operand& src);
1067  void subps(XMMRegister dst, XMMRegister src);
1068  void subps(XMMRegister dst, const Operand& src);
1069  void mulps(XMMRegister dst, XMMRegister src);
1070  void mulps(XMMRegister dst, const Operand& src);
1071  void divps(XMMRegister dst, XMMRegister src);
1072  void divps(XMMRegister dst, const Operand& src);
1073
1074  void movmskps(Register dst, XMMRegister src);
1075
1076  void vinstr(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2,
1077              SIMDPrefix pp, LeadingOpcode m, VexW w);
1078  void vinstr(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2,
1079              SIMDPrefix pp, LeadingOpcode m, VexW w);
1080
1081  // SSE2 instructions
1082  void sse2_instr(XMMRegister dst, XMMRegister src, byte prefix, byte escape,
1083                  byte opcode);
1084  void sse2_instr(XMMRegister dst, const Operand& src, byte prefix, byte escape,
1085                  byte opcode);
1086#define DECLARE_SSE2_INSTRUCTION(instruction, prefix, escape, opcode) \
1087  void instruction(XMMRegister dst, XMMRegister src) {                \
1088    sse2_instr(dst, src, 0x##prefix, 0x##escape, 0x##opcode);         \
1089  }                                                                   \
1090  void instruction(XMMRegister dst, const Operand& src) {             \
1091    sse2_instr(dst, src, 0x##prefix, 0x##escape, 0x##opcode);         \
1092  }
1093
1094  SSE2_INSTRUCTION_LIST(DECLARE_SSE2_INSTRUCTION)
1095#undef DECLARE_SSE2_INSTRUCTION
1096
1097#define DECLARE_SSE2_AVX_INSTRUCTION(instruction, prefix, escape, opcode)    \
1098  void v##instruction(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1099    vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kW0);          \
1100  }                                                                          \
1101  void v##instruction(XMMRegister dst, XMMRegister src1,                     \
1102                      const Operand& src2) {                                 \
1103    vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kW0);          \
1104  }
1105
1106  SSE2_INSTRUCTION_LIST(DECLARE_SSE2_AVX_INSTRUCTION)
1107#undef DECLARE_SSE2_AVX_INSTRUCTION
1108
1109  // SSE3
1110  void lddqu(XMMRegister dst, const Operand& src);
1111
1112  // SSSE3
1113  void ssse3_instr(XMMRegister dst, XMMRegister src, byte prefix, byte escape1,
1114                   byte escape2, byte opcode);
1115  void ssse3_instr(XMMRegister dst, const Operand& src, byte prefix,
1116                   byte escape1, byte escape2, byte opcode);
1117
1118#define DECLARE_SSSE3_INSTRUCTION(instruction, prefix, escape1, escape2,     \
1119                                  opcode)                                    \
1120  void instruction(XMMRegister dst, XMMRegister src) {                       \
1121    ssse3_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
1122  }                                                                          \
1123  void instruction(XMMRegister dst, const Operand& src) {                    \
1124    ssse3_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
1125  }
1126
1127  SSSE3_INSTRUCTION_LIST(DECLARE_SSSE3_INSTRUCTION)
1128#undef DECLARE_SSSE3_INSTRUCTION
1129
1130  // SSE4
1131  void sse4_instr(XMMRegister dst, XMMRegister src, byte prefix, byte escape1,
1132                  byte escape2, byte opcode);
1133  void sse4_instr(XMMRegister dst, const Operand& src, byte prefix,
1134                  byte escape1, byte escape2, byte opcode);
1135#define DECLARE_SSE4_INSTRUCTION(instruction, prefix, escape1, escape2,     \
1136                                 opcode)                                    \
1137  void instruction(XMMRegister dst, XMMRegister src) {                      \
1138    sse4_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
1139  }                                                                         \
1140  void instruction(XMMRegister dst, const Operand& src) {                   \
1141    sse4_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
1142  }
1143
1144  SSE4_INSTRUCTION_LIST(DECLARE_SSE4_INSTRUCTION)
1145#undef DECLARE_SSE4_INSTRUCTION
1146
1147#define DECLARE_SSE34_AVX_INSTRUCTION(instruction, prefix, escape1, escape2,  \
1148                                      opcode)                                 \
1149  void v##instruction(XMMRegister dst, XMMRegister src1, XMMRegister src2) {  \
1150    vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape1##escape2, kW0); \
1151  }                                                                           \
1152  void v##instruction(XMMRegister dst, XMMRegister src1,                      \
1153                      const Operand& src2) {                                  \
1154    vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape1##escape2, kW0); \
1155  }
1156
1157  SSSE3_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION)
1158  SSE4_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION)
1159#undef DECLARE_SSE34_AVX_INSTRUCTION
1160
1161  void movd(XMMRegister dst, Register src);
1162  void movd(XMMRegister dst, const Operand& src);
1163  void movd(Register dst, XMMRegister src);
1164  void movq(XMMRegister dst, Register src);
1165  void movq(Register dst, XMMRegister src);
1166  void movq(XMMRegister dst, XMMRegister src);
1167
1168  // Don't use this unless it's important to keep the
1169  // top half of the destination register unchanged.
1170  // Use movapd when moving double values and movq for integer
1171  // values in xmm registers.
1172  void movsd(XMMRegister dst, XMMRegister src);
1173
1174  void movsd(const Operand& dst, XMMRegister src);
1175  void movsd(XMMRegister dst, const Operand& src);
1176
1177  void movdqa(const Operand& dst, XMMRegister src);
1178  void movdqa(XMMRegister dst, const Operand& src);
1179
1180  void movdqu(const Operand& dst, XMMRegister src);
1181  void movdqu(XMMRegister dst, const Operand& src);
1182
1183  void movapd(XMMRegister dst, XMMRegister src);
1184  void movupd(XMMRegister dst, const Operand& src);
1185  void movupd(const Operand& dst, XMMRegister src);
1186
1187  void psllq(XMMRegister reg, byte imm8);
1188  void psrlq(XMMRegister reg, byte imm8);
1189  void psllw(XMMRegister reg, byte imm8);
1190  void pslld(XMMRegister reg, byte imm8);
1191  void psrlw(XMMRegister reg, byte imm8);
1192  void psrld(XMMRegister reg, byte imm8);
1193  void psraw(XMMRegister reg, byte imm8);
1194  void psrad(XMMRegister reg, byte imm8);
1195
1196  void cvttsd2si(Register dst, const Operand& src);
1197  void cvttsd2si(Register dst, XMMRegister src);
1198  void cvttss2siq(Register dst, XMMRegister src);
1199  void cvttss2siq(Register dst, const Operand& src);
1200  void cvttsd2siq(Register dst, XMMRegister src);
1201  void cvttsd2siq(Register dst, const Operand& src);
1202
1203  void cvtlsi2sd(XMMRegister dst, const Operand& src);
1204  void cvtlsi2sd(XMMRegister dst, Register src);
1205
1206  void cvtqsi2ss(XMMRegister dst, const Operand& src);
1207  void cvtqsi2ss(XMMRegister dst, Register src);
1208
1209  void cvtqsi2sd(XMMRegister dst, const Operand& src);
1210  void cvtqsi2sd(XMMRegister dst, Register src);
1211
1212
1213  void cvtss2sd(XMMRegister dst, XMMRegister src);
1214  void cvtss2sd(XMMRegister dst, const Operand& src);
1215  void cvtsd2ss(XMMRegister dst, XMMRegister src);
1216  void cvtsd2ss(XMMRegister dst, const Operand& src);
1217
1218  void cvtsd2si(Register dst, XMMRegister src);
1219  void cvtsd2siq(Register dst, XMMRegister src);
1220
1221  void addsd(XMMRegister dst, XMMRegister src);
1222  void addsd(XMMRegister dst, const Operand& src);
1223  void subsd(XMMRegister dst, XMMRegister src);
1224  void subsd(XMMRegister dst, const Operand& src);
1225  void mulsd(XMMRegister dst, XMMRegister src);
1226  void mulsd(XMMRegister dst, const Operand& src);
1227  void divsd(XMMRegister dst, XMMRegister src);
1228  void divsd(XMMRegister dst, const Operand& src);
1229
1230  void maxsd(XMMRegister dst, XMMRegister src);
1231  void maxsd(XMMRegister dst, const Operand& src);
1232  void minsd(XMMRegister dst, XMMRegister src);
1233  void minsd(XMMRegister dst, const Operand& src);
1234
1235  void andpd(XMMRegister dst, XMMRegister src);
1236  void andpd(XMMRegister dst, const Operand& src);
1237  void orpd(XMMRegister dst, XMMRegister src);
1238  void orpd(XMMRegister dst, const Operand& src);
1239  void xorpd(XMMRegister dst, XMMRegister src);
1240  void xorpd(XMMRegister dst, const Operand& src);
1241  void sqrtsd(XMMRegister dst, XMMRegister src);
1242  void sqrtsd(XMMRegister dst, const Operand& src);
1243
1244  void ucomisd(XMMRegister dst, XMMRegister src);
1245  void ucomisd(XMMRegister dst, const Operand& src);
1246  void cmpltsd(XMMRegister dst, XMMRegister src);
1247
1248  void movmskpd(Register dst, XMMRegister src);
1249
1250  void punpckldq(XMMRegister dst, XMMRegister src);
1251  void punpckldq(XMMRegister dst, const Operand& src);
1252  void punpckhdq(XMMRegister dst, XMMRegister src);
1253
1254  // SSE 4.1 instruction
1255  void insertps(XMMRegister dst, XMMRegister src, byte imm8);
1256  void extractps(Register dst, XMMRegister src, byte imm8);
1257  void pextrb(Register dst, XMMRegister src, int8_t imm8);
1258  void pextrb(const Operand& dst, XMMRegister src, int8_t imm8);
1259  void pextrw(Register dst, XMMRegister src, int8_t imm8);
1260  void pextrw(const Operand& dst, XMMRegister src, int8_t imm8);
1261  void pextrd(Register dst, XMMRegister src, int8_t imm8);
1262  void pextrd(const Operand& dst, XMMRegister src, int8_t imm8);
1263  void pinsrb(XMMRegister dst, Register src, int8_t imm8);
1264  void pinsrb(XMMRegister dst, const Operand& src, int8_t imm8);
1265  void pinsrw(XMMRegister dst, Register src, int8_t imm8);
1266  void pinsrw(XMMRegister dst, const Operand& src, int8_t imm8);
1267  void pinsrd(XMMRegister dst, Register src, int8_t imm8);
1268  void pinsrd(XMMRegister dst, const Operand& src, int8_t imm8);
1269
1270  void roundss(XMMRegister dst, XMMRegister src, RoundingMode mode);
1271  void roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode);
1272
1273  void cmpps(XMMRegister dst, XMMRegister src, int8_t cmp);
1274  void cmpps(XMMRegister dst, const Operand& src, int8_t cmp);
1275  void cmppd(XMMRegister dst, XMMRegister src, int8_t cmp);
1276  void cmppd(XMMRegister dst, const Operand& src, int8_t cmp);
1277
1278#define SSE_CMP_P(instr, imm8)                                                \
1279  void instr##ps(XMMRegister dst, XMMRegister src) { cmpps(dst, src, imm8); } \
1280  void instr##ps(XMMRegister dst, const Operand& src) {                       \
1281    cmpps(dst, src, imm8);                                                    \
1282  }                                                                           \
1283  void instr##pd(XMMRegister dst, XMMRegister src) { cmppd(dst, src, imm8); } \
1284  void instr##pd(XMMRegister dst, const Operand& src) { cmppd(dst, src, imm8); }
1285
1286  SSE_CMP_P(cmpeq, 0x0);
1287  SSE_CMP_P(cmplt, 0x1);
1288  SSE_CMP_P(cmple, 0x2);
1289  SSE_CMP_P(cmpneq, 0x4);
1290  SSE_CMP_P(cmpnlt, 0x5);
1291  SSE_CMP_P(cmpnle, 0x6);
1292
1293#undef SSE_CMP_P
1294
1295  void minps(XMMRegister dst, XMMRegister src);
1296  void minps(XMMRegister dst, const Operand& src);
1297  void maxps(XMMRegister dst, XMMRegister src);
1298  void maxps(XMMRegister dst, const Operand& src);
1299  void rcpps(XMMRegister dst, XMMRegister src);
1300  void rcpps(XMMRegister dst, const Operand& src);
1301  void rsqrtps(XMMRegister dst, XMMRegister src);
1302  void rsqrtps(XMMRegister dst, const Operand& src);
1303  void sqrtps(XMMRegister dst, XMMRegister src);
1304  void sqrtps(XMMRegister dst, const Operand& src);
1305  void movups(XMMRegister dst, XMMRegister src);
1306  void movups(XMMRegister dst, const Operand& src);
1307  void movups(const Operand& dst, XMMRegister src);
1308  void psrldq(XMMRegister dst, uint8_t shift);
1309  void pshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle);
1310  void pshufd(XMMRegister dst, const Operand& src, uint8_t shuffle);
1311  void cvtdq2ps(XMMRegister dst, XMMRegister src);
1312  void cvtdq2ps(XMMRegister dst, const Operand& src);
1313
1314  // AVX instruction
1315  void vfmadd132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1316    vfmasd(0x99, dst, src1, src2);
1317  }
1318  void vfmadd213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1319    vfmasd(0xa9, dst, src1, src2);
1320  }
1321  void vfmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1322    vfmasd(0xb9, dst, src1, src2);
1323  }
1324  void vfmadd132sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1325    vfmasd(0x99, dst, src1, src2);
1326  }
1327  void vfmadd213sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1328    vfmasd(0xa9, dst, src1, src2);
1329  }
1330  void vfmadd231sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1331    vfmasd(0xb9, dst, src1, src2);
1332  }
1333  void vfmsub132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1334    vfmasd(0x9b, dst, src1, src2);
1335  }
1336  void vfmsub213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1337    vfmasd(0xab, dst, src1, src2);
1338  }
1339  void vfmsub231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1340    vfmasd(0xbb, dst, src1, src2);
1341  }
1342  void vfmsub132sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1343    vfmasd(0x9b, dst, src1, src2);
1344  }
1345  void vfmsub213sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1346    vfmasd(0xab, dst, src1, src2);
1347  }
1348  void vfmsub231sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1349    vfmasd(0xbb, dst, src1, src2);
1350  }
1351  void vfnmadd132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1352    vfmasd(0x9d, dst, src1, src2);
1353  }
1354  void vfnmadd213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1355    vfmasd(0xad, dst, src1, src2);
1356  }
1357  void vfnmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1358    vfmasd(0xbd, dst, src1, src2);
1359  }
1360  void vfnmadd132sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1361    vfmasd(0x9d, dst, src1, src2);
1362  }
1363  void vfnmadd213sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1364    vfmasd(0xad, dst, src1, src2);
1365  }
1366  void vfnmadd231sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1367    vfmasd(0xbd, dst, src1, src2);
1368  }
1369  void vfnmsub132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1370    vfmasd(0x9f, dst, src1, src2);
1371  }
1372  void vfnmsub213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1373    vfmasd(0xaf, dst, src1, src2);
1374  }
1375  void vfnmsub231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1376    vfmasd(0xbf, dst, src1, src2);
1377  }
1378  void vfnmsub132sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1379    vfmasd(0x9f, dst, src1, src2);
1380  }
1381  void vfnmsub213sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1382    vfmasd(0xaf, dst, src1, src2);
1383  }
1384  void vfnmsub231sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1385    vfmasd(0xbf, dst, src1, src2);
1386  }
1387  void vfmasd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
1388  void vfmasd(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);
1389
1390  void vfmadd132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1391    vfmass(0x99, dst, src1, src2);
1392  }
1393  void vfmadd213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1394    vfmass(0xa9, dst, src1, src2);
1395  }
1396  void vfmadd231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1397    vfmass(0xb9, dst, src1, src2);
1398  }
1399  void vfmadd132ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1400    vfmass(0x99, dst, src1, src2);
1401  }
1402  void vfmadd213ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1403    vfmass(0xa9, dst, src1, src2);
1404  }
1405  void vfmadd231ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1406    vfmass(0xb9, dst, src1, src2);
1407  }
1408  void vfmsub132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1409    vfmass(0x9b, dst, src1, src2);
1410  }
1411  void vfmsub213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1412    vfmass(0xab, dst, src1, src2);
1413  }
1414  void vfmsub231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1415    vfmass(0xbb, dst, src1, src2);
1416  }
1417  void vfmsub132ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1418    vfmass(0x9b, dst, src1, src2);
1419  }
1420  void vfmsub213ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1421    vfmass(0xab, dst, src1, src2);
1422  }
1423  void vfmsub231ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1424    vfmass(0xbb, dst, src1, src2);
1425  }
1426  void vfnmadd132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1427    vfmass(0x9d, dst, src1, src2);
1428  }
1429  void vfnmadd213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1430    vfmass(0xad, dst, src1, src2);
1431  }
1432  void vfnmadd231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1433    vfmass(0xbd, dst, src1, src2);
1434  }
1435  void vfnmadd132ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1436    vfmass(0x9d, dst, src1, src2);
1437  }
1438  void vfnmadd213ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1439    vfmass(0xad, dst, src1, src2);
1440  }
1441  void vfnmadd231ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1442    vfmass(0xbd, dst, src1, src2);
1443  }
1444  void vfnmsub132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1445    vfmass(0x9f, dst, src1, src2);
1446  }
1447  void vfnmsub213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1448    vfmass(0xaf, dst, src1, src2);
1449  }
1450  void vfnmsub231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1451    vfmass(0xbf, dst, src1, src2);
1452  }
1453  void vfnmsub132ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1454    vfmass(0x9f, dst, src1, src2);
1455  }
1456  void vfnmsub213ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1457    vfmass(0xaf, dst, src1, src2);
1458  }
1459  void vfnmsub231ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1460    vfmass(0xbf, dst, src1, src2);
1461  }
1462  void vfmass(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
1463  void vfmass(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);
1464
1465  void vmovd(XMMRegister dst, Register src);
1466  void vmovd(XMMRegister dst, const Operand& src);
1467  void vmovd(Register dst, XMMRegister src);
1468  void vmovq(XMMRegister dst, Register src);
1469  void vmovq(XMMRegister dst, const Operand& src);
1470  void vmovq(Register dst, XMMRegister src);
1471
1472  void vmovsd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1473    vsd(0x10, dst, src1, src2);
1474  }
1475  void vmovsd(XMMRegister dst, const Operand& src) {
1476    vsd(0x10, dst, xmm0, src);
1477  }
1478  void vmovsd(const Operand& dst, XMMRegister src) {
1479    vsd(0x11, src, xmm0, dst);
1480  }
1481
1482#define AVX_SP_3(instr, opcode) \
1483  AVX_S_3(instr, opcode)        \
1484  AVX_P_3(instr, opcode)
1485
1486#define AVX_S_3(instr, opcode)  \
1487  AVX_3(instr##ss, opcode, vss) \
1488  AVX_3(instr##sd, opcode, vsd)
1489
1490#define AVX_P_3(instr, opcode)  \
1491  AVX_3(instr##ps, opcode, vps) \
1492  AVX_3(instr##pd, opcode, vpd)
1493
1494#define AVX_3(instr, opcode, impl)                                     \
1495  void instr(XMMRegister dst, XMMRegister src1, XMMRegister src2) {    \
1496    impl(opcode, dst, src1, src2);                                     \
1497  }                                                                    \
1498  void instr(XMMRegister dst, XMMRegister src1, const Operand& src2) { \
1499    impl(opcode, dst, src1, src2);                                     \
1500  }
1501
1502  AVX_SP_3(vsqrt, 0x51);
1503  AVX_SP_3(vadd, 0x58);
1504  AVX_SP_3(vsub, 0x5c);
1505  AVX_SP_3(vmul, 0x59);
1506  AVX_SP_3(vdiv, 0x5e);
1507  AVX_SP_3(vmin, 0x5d);
1508  AVX_SP_3(vmax, 0x5f);
1509  AVX_P_3(vand, 0x54);
1510  AVX_P_3(vor, 0x56);
1511  AVX_P_3(vxor, 0x57);
1512  AVX_3(vcvtsd2ss, 0x5a, vsd);
1513
1514#undef AVX_3
1515#undef AVX_S_3
1516#undef AVX_P_3
1517#undef AVX_SP_3
1518
1519  void vpsrlq(XMMRegister dst, XMMRegister src, byte imm8) {
1520    XMMRegister iop = {2};
1521    vpd(0x73, iop, dst, src);
1522    emit(imm8);
1523  }
1524  void vpsllq(XMMRegister dst, XMMRegister src, byte imm8) {
1525    XMMRegister iop = {6};
1526    vpd(0x73, iop, dst, src);
1527    emit(imm8);
1528  }
1529  void vcvtss2sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1530    vinstr(0x5a, dst, src1, src2, kF3, k0F, kWIG);
1531  }
1532  void vcvtss2sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1533    vinstr(0x5a, dst, src1, src2, kF3, k0F, kWIG);
1534  }
1535  void vcvtlsi2sd(XMMRegister dst, XMMRegister src1, Register src2) {
1536    XMMRegister isrc2 = {src2.code()};
1537    vinstr(0x2a, dst, src1, isrc2, kF2, k0F, kW0);
1538  }
1539  void vcvtlsi2sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1540    vinstr(0x2a, dst, src1, src2, kF2, k0F, kW0);
1541  }
1542  void vcvtlsi2ss(XMMRegister dst, XMMRegister src1, Register src2) {
1543    XMMRegister isrc2 = {src2.code()};
1544    vinstr(0x2a, dst, src1, isrc2, kF3, k0F, kW0);
1545  }
1546  void vcvtlsi2ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1547    vinstr(0x2a, dst, src1, src2, kF3, k0F, kW0);
1548  }
1549  void vcvtqsi2ss(XMMRegister dst, XMMRegister src1, Register src2) {
1550    XMMRegister isrc2 = {src2.code()};
1551    vinstr(0x2a, dst, src1, isrc2, kF3, k0F, kW1);
1552  }
1553  void vcvtqsi2ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1554    vinstr(0x2a, dst, src1, src2, kF3, k0F, kW1);
1555  }
1556  void vcvtqsi2sd(XMMRegister dst, XMMRegister src1, Register src2) {
1557    XMMRegister isrc2 = {src2.code()};
1558    vinstr(0x2a, dst, src1, isrc2, kF2, k0F, kW1);
1559  }
1560  void vcvtqsi2sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1561    vinstr(0x2a, dst, src1, src2, kF2, k0F, kW1);
1562  }
1563  void vcvttss2si(Register dst, XMMRegister src) {
1564    XMMRegister idst = {dst.code()};
1565    vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW0);
1566  }
1567  void vcvttss2si(Register dst, const Operand& src) {
1568    XMMRegister idst = {dst.code()};
1569    vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW0);
1570  }
1571  void vcvttsd2si(Register dst, XMMRegister src) {
1572    XMMRegister idst = {dst.code()};
1573    vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW0);
1574  }
1575  void vcvttsd2si(Register dst, const Operand& src) {
1576    XMMRegister idst = {dst.code()};
1577    vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW0);
1578  }
1579  void vcvttss2siq(Register dst, XMMRegister src) {
1580    XMMRegister idst = {dst.code()};
1581    vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW1);
1582  }
1583  void vcvttss2siq(Register dst, const Operand& src) {
1584    XMMRegister idst = {dst.code()};
1585    vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW1);
1586  }
1587  void vcvttsd2siq(Register dst, XMMRegister src) {
1588    XMMRegister idst = {dst.code()};
1589    vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW1);
1590  }
1591  void vcvttsd2siq(Register dst, const Operand& src) {
1592    XMMRegister idst = {dst.code()};
1593    vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW1);
1594  }
1595  void vcvtsd2si(Register dst, XMMRegister src) {
1596    XMMRegister idst = {dst.code()};
1597    vinstr(0x2d, idst, xmm0, src, kF2, k0F, kW0);
1598  }
1599  void vucomisd(XMMRegister dst, XMMRegister src) {
1600    vinstr(0x2e, dst, xmm0, src, k66, k0F, kWIG);
1601  }
1602  void vucomisd(XMMRegister dst, const Operand& src) {
1603    vinstr(0x2e, dst, xmm0, src, k66, k0F, kWIG);
1604  }
1605  void vroundss(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1606                RoundingMode mode) {
1607    vinstr(0x0a, dst, src1, src2, k66, k0F3A, kWIG);
1608    emit(static_cast<byte>(mode) | 0x8);  // Mask precision exception.
1609  }
1610  void vroundsd(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1611                RoundingMode mode) {
1612    vinstr(0x0b, dst, src1, src2, k66, k0F3A, kWIG);
1613    emit(static_cast<byte>(mode) | 0x8);  // Mask precision exception.
1614  }
1615
1616  void vsd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1617    vinstr(op, dst, src1, src2, kF2, k0F, kWIG);
1618  }
1619  void vsd(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2) {
1620    vinstr(op, dst, src1, src2, kF2, k0F, kWIG);
1621  }
1622
1623  void vmovss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1624    vss(0x10, dst, src1, src2);
1625  }
1626  void vmovss(XMMRegister dst, const Operand& src) {
1627    vss(0x10, dst, xmm0, src);
1628  }
1629  void vmovss(const Operand& dst, XMMRegister src) {
1630    vss(0x11, src, xmm0, dst);
1631  }
1632  void vucomiss(XMMRegister dst, XMMRegister src);
1633  void vucomiss(XMMRegister dst, const Operand& src);
1634  void vss(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
1635  void vss(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);
1636
1637  void vmovaps(XMMRegister dst, XMMRegister src) { vps(0x28, dst, xmm0, src); }
1638  void vmovups(XMMRegister dst, XMMRegister src) { vps(0x10, dst, xmm0, src); }
1639  void vmovups(XMMRegister dst, const Operand& src) {
1640    vps(0x10, dst, xmm0, src);
1641  }
1642  void vmovups(const Operand& dst, XMMRegister src) {
1643    vps(0x11, src, xmm0, dst);
1644  }
1645  void vmovapd(XMMRegister dst, XMMRegister src) { vpd(0x28, dst, xmm0, src); }
1646  void vmovupd(XMMRegister dst, const Operand& src) {
1647    vpd(0x10, dst, xmm0, src);
1648  }
1649  void vmovupd(const Operand& dst, XMMRegister src) {
1650    vpd(0x11, src, xmm0, dst);
1651  }
1652  void vmovmskps(Register dst, XMMRegister src) {
1653    XMMRegister idst = {dst.code()};
1654    vps(0x50, idst, xmm0, src);
1655  }
1656  void vmovmskpd(Register dst, XMMRegister src) {
1657    XMMRegister idst = {dst.code()};
1658    vpd(0x50, idst, xmm0, src);
1659  }
1660  void vcmpps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int8_t cmp) {
1661    vps(0xC2, dst, src1, src2);
1662    emit(cmp);
1663  }
1664  void vcmpps(XMMRegister dst, XMMRegister src1, const Operand& src2,
1665              int8_t cmp) {
1666    vps(0xC2, dst, src1, src2);
1667    emit(cmp);
1668  }
1669  void vcmppd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int8_t cmp) {
1670    vpd(0xC2, dst, src1, src2);
1671    emit(cmp);
1672  }
1673  void vcmppd(XMMRegister dst, XMMRegister src1, const Operand& src2,
1674              int8_t cmp) {
1675    vpd(0xC2, dst, src1, src2);
1676    emit(cmp);
1677  }
1678
1679#define AVX_CMP_P(instr, imm8)                                             \
1680  void instr##ps(XMMRegister dst, XMMRegister src1, XMMRegister src2) {    \
1681    vcmpps(dst, src1, src2, imm8);                                         \
1682  }                                                                        \
1683  void instr##ps(XMMRegister dst, XMMRegister src1, const Operand& src2) { \
1684    vcmpps(dst, src1, src2, imm8);                                         \
1685  }                                                                        \
1686  void instr##pd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {    \
1687    vcmppd(dst, src1, src2, imm8);                                         \
1688  }                                                                        \
1689  void instr##pd(XMMRegister dst, XMMRegister src1, const Operand& src2) { \
1690    vcmppd(dst, src1, src2, imm8);                                         \
1691  }
1692
1693  AVX_CMP_P(vcmpeq, 0x0);
1694  AVX_CMP_P(vcmplt, 0x1);
1695  AVX_CMP_P(vcmple, 0x2);
1696  AVX_CMP_P(vcmpneq, 0x4);
1697  AVX_CMP_P(vcmpnlt, 0x5);
1698  AVX_CMP_P(vcmpnle, 0x6);
1699
1700#undef AVX_CMP_P
1701
1702  void vlddqu(XMMRegister dst, const Operand& src) {
1703    vinstr(0xF0, dst, xmm0, src, kF2, k0F, kWIG);
1704  }
1705  void vpsllw(XMMRegister dst, XMMRegister src, int8_t imm8) {
1706    XMMRegister iop = {6};
1707    vinstr(0x71, iop, dst, src, k66, k0F, kWIG);
1708    emit(imm8);
1709  }
1710  void vpsrlw(XMMRegister dst, XMMRegister src, int8_t imm8) {
1711    XMMRegister iop = {2};
1712    vinstr(0x71, iop, dst, src, k66, k0F, kWIG);
1713    emit(imm8);
1714  }
1715  void vpsraw(XMMRegister dst, XMMRegister src, int8_t imm8) {
1716    XMMRegister iop = {4};
1717    vinstr(0x71, iop, dst, src, k66, k0F, kWIG);
1718    emit(imm8);
1719  }
1720  void vpslld(XMMRegister dst, XMMRegister src, int8_t imm8) {
1721    XMMRegister iop = {6};
1722    vinstr(0x72, iop, dst, src, k66, k0F, kWIG);
1723    emit(imm8);
1724  }
1725  void vpsrld(XMMRegister dst, XMMRegister src, int8_t imm8) {
1726    XMMRegister iop = {2};
1727    vinstr(0x72, iop, dst, src, k66, k0F, kWIG);
1728    emit(imm8);
1729  }
1730  void vpsrad(XMMRegister dst, XMMRegister src, int8_t imm8) {
1731    XMMRegister iop = {4};
1732    vinstr(0x72, iop, dst, src, k66, k0F, kWIG);
1733    emit(imm8);
1734  }
1735  void vpextrb(Register dst, XMMRegister src, int8_t imm8) {
1736    XMMRegister idst = {dst.code()};
1737    vinstr(0x14, src, xmm0, idst, k66, k0F3A, kW0);
1738    emit(imm8);
1739  }
1740  void vpextrb(const Operand& dst, XMMRegister src, int8_t imm8) {
1741    vinstr(0x14, src, xmm0, dst, k66, k0F3A, kW0);
1742    emit(imm8);
1743  }
1744  void vpextrw(Register dst, XMMRegister src, int8_t imm8) {
1745    XMMRegister idst = {dst.code()};
1746    vinstr(0xc5, idst, xmm0, src, k66, k0F, kW0);
1747    emit(imm8);
1748  }
1749  void vpextrw(const Operand& dst, XMMRegister src, int8_t imm8) {
1750    vinstr(0x15, src, xmm0, dst, k66, k0F3A, kW0);
1751    emit(imm8);
1752  }
1753  void vpextrd(Register dst, XMMRegister src, int8_t imm8) {
1754    XMMRegister idst = {dst.code()};
1755    vinstr(0x16, src, xmm0, idst, k66, k0F3A, kW0);
1756    emit(imm8);
1757  }
1758  void vpextrd(const Operand& dst, XMMRegister src, int8_t imm8) {
1759    vinstr(0x16, src, xmm0, dst, k66, k0F3A, kW0);
1760    emit(imm8);
1761  }
1762  void vpinsrb(XMMRegister dst, XMMRegister src1, Register src2, int8_t imm8) {
1763    XMMRegister isrc = {src2.code()};
1764    vinstr(0x20, dst, src1, isrc, k66, k0F3A, kW0);
1765    emit(imm8);
1766  }
1767  void vpinsrb(XMMRegister dst, XMMRegister src1, const Operand& src2,
1768               int8_t imm8) {
1769    vinstr(0x20, dst, src1, src2, k66, k0F3A, kW0);
1770    emit(imm8);
1771  }
1772  void vpinsrw(XMMRegister dst, XMMRegister src1, Register src2, int8_t imm8) {
1773    XMMRegister isrc = {src2.code()};
1774    vinstr(0xc4, dst, src1, isrc, k66, k0F, kW0);
1775    emit(imm8);
1776  }
1777  void vpinsrw(XMMRegister dst, XMMRegister src1, const Operand& src2,
1778               int8_t imm8) {
1779    vinstr(0xc4, dst, src1, src2, k66, k0F, kW0);
1780    emit(imm8);
1781  }
1782  void vpinsrd(XMMRegister dst, XMMRegister src1, Register src2, int8_t imm8) {
1783    XMMRegister isrc = {src2.code()};
1784    vinstr(0x22, dst, src1, isrc, k66, k0F3A, kW0);
1785    emit(imm8);
1786  }
1787  void vpinsrd(XMMRegister dst, XMMRegister src1, const Operand& src2,
1788               int8_t imm8) {
1789    vinstr(0x22, dst, src1, src2, k66, k0F3A, kW0);
1790    emit(imm8);
1791  }
1792  void vpshufd(XMMRegister dst, XMMRegister src, int8_t imm8) {
1793    vinstr(0x70, dst, xmm0, src, k66, k0F, kWIG);
1794    emit(imm8);
1795  }
1796
1797  void vps(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
1798  void vps(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);
1799  void vpd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
1800  void vpd(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);
1801
1802  // BMI instruction
1803  void andnq(Register dst, Register src1, Register src2) {
1804    bmi1q(0xf2, dst, src1, src2);
1805  }
1806  void andnq(Register dst, Register src1, const Operand& src2) {
1807    bmi1q(0xf2, dst, src1, src2);
1808  }
1809  void andnl(Register dst, Register src1, Register src2) {
1810    bmi1l(0xf2, dst, src1, src2);
1811  }
1812  void andnl(Register dst, Register src1, const Operand& src2) {
1813    bmi1l(0xf2, dst, src1, src2);
1814  }
1815  void bextrq(Register dst, Register src1, Register src2) {
1816    bmi1q(0xf7, dst, src2, src1);
1817  }
1818  void bextrq(Register dst, const Operand& src1, Register src2) {
1819    bmi1q(0xf7, dst, src2, src1);
1820  }
1821  void bextrl(Register dst, Register src1, Register src2) {
1822    bmi1l(0xf7, dst, src2, src1);
1823  }
1824  void bextrl(Register dst, const Operand& src1, Register src2) {
1825    bmi1l(0xf7, dst, src2, src1);
1826  }
1827  void blsiq(Register dst, Register src) {
1828    Register ireg = {3};
1829    bmi1q(0xf3, ireg, dst, src);
1830  }
1831  void blsiq(Register dst, const Operand& src) {
1832    Register ireg = {3};
1833    bmi1q(0xf3, ireg, dst, src);
1834  }
1835  void blsil(Register dst, Register src) {
1836    Register ireg = {3};
1837    bmi1l(0xf3, ireg, dst, src);
1838  }
1839  void blsil(Register dst, const Operand& src) {
1840    Register ireg = {3};
1841    bmi1l(0xf3, ireg, dst, src);
1842  }
1843  void blsmskq(Register dst, Register src) {
1844    Register ireg = {2};
1845    bmi1q(0xf3, ireg, dst, src);
1846  }
1847  void blsmskq(Register dst, const Operand& src) {
1848    Register ireg = {2};
1849    bmi1q(0xf3, ireg, dst, src);
1850  }
1851  void blsmskl(Register dst, Register src) {
1852    Register ireg = {2};
1853    bmi1l(0xf3, ireg, dst, src);
1854  }
1855  void blsmskl(Register dst, const Operand& src) {
1856    Register ireg = {2};
1857    bmi1l(0xf3, ireg, dst, src);
1858  }
1859  void blsrq(Register dst, Register src) {
1860    Register ireg = {1};
1861    bmi1q(0xf3, ireg, dst, src);
1862  }
1863  void blsrq(Register dst, const Operand& src) {
1864    Register ireg = {1};
1865    bmi1q(0xf3, ireg, dst, src);
1866  }
1867  void blsrl(Register dst, Register src) {
1868    Register ireg = {1};
1869    bmi1l(0xf3, ireg, dst, src);
1870  }
1871  void blsrl(Register dst, const Operand& src) {
1872    Register ireg = {1};
1873    bmi1l(0xf3, ireg, dst, src);
1874  }
1875  void tzcntq(Register dst, Register src);
1876  void tzcntq(Register dst, const Operand& src);
1877  void tzcntl(Register dst, Register src);
1878  void tzcntl(Register dst, const Operand& src);
1879
1880  void lzcntq(Register dst, Register src);
1881  void lzcntq(Register dst, const Operand& src);
1882  void lzcntl(Register dst, Register src);
1883  void lzcntl(Register dst, const Operand& src);
1884
1885  void popcntq(Register dst, Register src);
1886  void popcntq(Register dst, const Operand& src);
1887  void popcntl(Register dst, Register src);
1888  void popcntl(Register dst, const Operand& src);
1889
1890  void bzhiq(Register dst, Register src1, Register src2) {
1891    bmi2q(kNone, 0xf5, dst, src2, src1);
1892  }
1893  void bzhiq(Register dst, const Operand& src1, Register src2) {
1894    bmi2q(kNone, 0xf5, dst, src2, src1);
1895  }
1896  void bzhil(Register dst, Register src1, Register src2) {
1897    bmi2l(kNone, 0xf5, dst, src2, src1);
1898  }
1899  void bzhil(Register dst, const Operand& src1, Register src2) {
1900    bmi2l(kNone, 0xf5, dst, src2, src1);
1901  }
1902  void mulxq(Register dst1, Register dst2, Register src) {
1903    bmi2q(kF2, 0xf6, dst1, dst2, src);
1904  }
1905  void mulxq(Register dst1, Register dst2, const Operand& src) {
1906    bmi2q(kF2, 0xf6, dst1, dst2, src);
1907  }
1908  void mulxl(Register dst1, Register dst2, Register src) {
1909    bmi2l(kF2, 0xf6, dst1, dst2, src);
1910  }
1911  void mulxl(Register dst1, Register dst2, const Operand& src) {
1912    bmi2l(kF2, 0xf6, dst1, dst2, src);
1913  }
1914  void pdepq(Register dst, Register src1, Register src2) {
1915    bmi2q(kF2, 0xf5, dst, src1, src2);
1916  }
1917  void pdepq(Register dst, Register src1, const Operand& src2) {
1918    bmi2q(kF2, 0xf5, dst, src1, src2);
1919  }
1920  void pdepl(Register dst, Register src1, Register src2) {
1921    bmi2l(kF2, 0xf5, dst, src1, src2);
1922  }
1923  void pdepl(Register dst, Register src1, const Operand& src2) {
1924    bmi2l(kF2, 0xf5, dst, src1, src2);
1925  }
1926  void pextq(Register dst, Register src1, Register src2) {
1927    bmi2q(kF3, 0xf5, dst, src1, src2);
1928  }
1929  void pextq(Register dst, Register src1, const Operand& src2) {
1930    bmi2q(kF3, 0xf5, dst, src1, src2);
1931  }
1932  void pextl(Register dst, Register src1, Register src2) {
1933    bmi2l(kF3, 0xf5, dst, src1, src2);
1934  }
1935  void pextl(Register dst, Register src1, const Operand& src2) {
1936    bmi2l(kF3, 0xf5, dst, src1, src2);
1937  }
1938  void sarxq(Register dst, Register src1, Register src2) {
1939    bmi2q(kF3, 0xf7, dst, src2, src1);
1940  }
1941  void sarxq(Register dst, const Operand& src1, Register src2) {
1942    bmi2q(kF3, 0xf7, dst, src2, src1);
1943  }
1944  void sarxl(Register dst, Register src1, Register src2) {
1945    bmi2l(kF3, 0xf7, dst, src2, src1);
1946  }
1947  void sarxl(Register dst, const Operand& src1, Register src2) {
1948    bmi2l(kF3, 0xf7, dst, src2, src1);
1949  }
1950  void shlxq(Register dst, Register src1, Register src2) {
1951    bmi2q(k66, 0xf7, dst, src2, src1);
1952  }
1953  void shlxq(Register dst, const Operand& src1, Register src2) {
1954    bmi2q(k66, 0xf7, dst, src2, src1);
1955  }
1956  void shlxl(Register dst, Register src1, Register src2) {
1957    bmi2l(k66, 0xf7, dst, src2, src1);
1958  }
1959  void shlxl(Register dst, const Operand& src1, Register src2) {
1960    bmi2l(k66, 0xf7, dst, src2, src1);
1961  }
1962  void shrxq(Register dst, Register src1, Register src2) {
1963    bmi2q(kF2, 0xf7, dst, src2, src1);
1964  }
1965  void shrxq(Register dst, const Operand& src1, Register src2) {
1966    bmi2q(kF2, 0xf7, dst, src2, src1);
1967  }
1968  void shrxl(Register dst, Register src1, Register src2) {
1969    bmi2l(kF2, 0xf7, dst, src2, src1);
1970  }
1971  void shrxl(Register dst, const Operand& src1, Register src2) {
1972    bmi2l(kF2, 0xf7, dst, src2, src1);
1973  }
1974  void rorxq(Register dst, Register src, byte imm8);
1975  void rorxq(Register dst, const Operand& src, byte imm8);
1976  void rorxl(Register dst, Register src, byte imm8);
1977  void rorxl(Register dst, const Operand& src, byte imm8);
1978
1979  // Check the code size generated from label to here.
1980  int SizeOfCodeGeneratedSince(Label* label) {
1981    return pc_offset() - label->pos();
1982  }
1983
1984  // Mark generator continuation.
1985  void RecordGeneratorContinuation();
1986
1987  // Mark address of a debug break slot.
1988  void RecordDebugBreakSlot(RelocInfo::Mode mode);
1989
1990  // Record a comment relocation entry that can be used by a disassembler.
1991  // Use --code-comments to enable.
1992  void RecordComment(const char* msg);
1993
1994  // Record a deoptimization reason that can be used by a log or cpu profiler.
1995  // Use --trace-deopt to enable.
1996  void RecordDeoptReason(DeoptimizeReason reason, SourcePosition position,
1997                         int id);
1998
1999  void PatchConstantPoolAccessInstruction(int pc_offset, int offset,
2000                                          ConstantPoolEntry::Access access,
2001                                          ConstantPoolEntry::Type type) {
2002    // No embedded constant pool support.
2003    UNREACHABLE();
2004  }
2005
2006  // Writes a single word of data in the code stream.
2007  // Used for inline tables, e.g., jump-tables.
2008  void db(uint8_t data);
2009  void dd(uint32_t data);
2010  void dq(uint64_t data);
2011  void dp(uintptr_t data) { dq(data); }
2012  void dq(Label* label);
2013
2014  // Check if there is less than kGap bytes available in the buffer.
2015  // If this is the case, we need to grow the buffer before emitting
2016  // an instruction or relocation information.
2017  inline bool buffer_overflow() const {
2018    return pc_ >= reloc_info_writer.pos() - kGap;
2019  }
2020
2021  // Get the number of bytes available in the buffer.
2022  inline int available_space() const {
2023    return static_cast<int>(reloc_info_writer.pos() - pc_);
2024  }
2025
2026  static bool IsNop(Address addr);
2027
2028  // Avoid overflows for displacements etc.
2029  static const int kMaximalBufferSize = 512*MB;
2030
2031  byte byte_at(int pos)  { return buffer_[pos]; }
2032  void set_byte_at(int pos, byte value) { buffer_[pos] = value; }
2033
2034  Address pc() const { return pc_; }
2035
2036 protected:
2037  // Call near indirect
2038  void call(const Operand& operand);
2039
2040 private:
2041  byte* addr_at(int pos)  { return buffer_ + pos; }
2042  uint32_t long_at(int pos)  {
2043    return *reinterpret_cast<uint32_t*>(addr_at(pos));
2044  }
2045  void long_at_put(int pos, uint32_t x)  {
2046    *reinterpret_cast<uint32_t*>(addr_at(pos)) = x;
2047  }
2048
2049  // code emission
2050  void GrowBuffer();
2051
2052  void emit(byte x) { *pc_++ = x; }
2053  inline void emitl(uint32_t x);
2054  inline void emitp(void* x, RelocInfo::Mode rmode);
2055  inline void emitq(uint64_t x);
2056  inline void emitw(uint16_t x);
2057  inline void emit_code_target(Handle<Code> target,
2058                               RelocInfo::Mode rmode,
2059                               TypeFeedbackId ast_id = TypeFeedbackId::None());
2060  inline void emit_runtime_entry(Address entry, RelocInfo::Mode rmode);
2061  void emit(Immediate x) {
2062    if (!RelocInfo::IsNone(x.rmode_)) {
2063      RecordRelocInfo(x.rmode_);
2064    }
2065    emitl(x.value_);
2066  }
2067
2068  // Emits a REX prefix that encodes a 64-bit operand size and
2069  // the top bit of both register codes.
2070  // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B.
2071  // REX.W is set.
2072  inline void emit_rex_64(XMMRegister reg, Register rm_reg);
2073  inline void emit_rex_64(Register reg, XMMRegister rm_reg);
2074  inline void emit_rex_64(Register reg, Register rm_reg);
2075
2076  // Emits a REX prefix that encodes a 64-bit operand size and
2077  // the top bit of the destination, index, and base register codes.
2078  // The high bit of reg is used for REX.R, the high bit of op's base
2079  // register is used for REX.B, and the high bit of op's index register
2080  // is used for REX.X.  REX.W is set.
2081  inline void emit_rex_64(Register reg, const Operand& op);
2082  inline void emit_rex_64(XMMRegister reg, const Operand& op);
2083
2084  // Emits a REX prefix that encodes a 64-bit operand size and
2085  // the top bit of the register code.
2086  // The high bit of register is used for REX.B.
2087  // REX.W is set and REX.R and REX.X are clear.
2088  inline void emit_rex_64(Register rm_reg);
2089
2090  // Emits a REX prefix that encodes a 64-bit operand size and
2091  // the top bit of the index and base register codes.
2092  // The high bit of op's base register is used for REX.B, and the high
2093  // bit of op's index register is used for REX.X.
2094  // REX.W is set and REX.R clear.
2095  inline void emit_rex_64(const Operand& op);
2096
2097  // Emit a REX prefix that only sets REX.W to choose a 64-bit operand size.
2098  void emit_rex_64() { emit(0x48); }
2099
2100  // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B.
2101  // REX.W is clear.
2102  inline void emit_rex_32(Register reg, Register rm_reg);
2103
2104  // The high bit of reg is used for REX.R, the high bit of op's base
2105  // register is used for REX.B, and the high bit of op's index register
2106  // is used for REX.X.  REX.W is cleared.
2107  inline void emit_rex_32(Register reg, const Operand& op);
2108
2109  // High bit of rm_reg goes to REX.B.
2110  // REX.W, REX.R and REX.X are clear.
2111  inline void emit_rex_32(Register rm_reg);
2112
2113  // High bit of base goes to REX.B and high bit of index to REX.X.
2114  // REX.W and REX.R are clear.
2115  inline void emit_rex_32(const Operand& op);
2116
2117  // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B.
2118  // REX.W is cleared.  If no REX bits are set, no byte is emitted.
2119  inline void emit_optional_rex_32(Register reg, Register rm_reg);
2120
2121  // The high bit of reg is used for REX.R, the high bit of op's base
2122  // register is used for REX.B, and the high bit of op's index register
2123  // is used for REX.X.  REX.W is cleared.  If no REX bits are set, nothing
2124  // is emitted.
2125  inline void emit_optional_rex_32(Register reg, const Operand& op);
2126
2127  // As for emit_optional_rex_32(Register, Register), except that
2128  // the registers are XMM registers.
2129  inline void emit_optional_rex_32(XMMRegister reg, XMMRegister base);
2130
2131  // As for emit_optional_rex_32(Register, Register), except that
2132  // one of the registers is an XMM registers.
2133  inline void emit_optional_rex_32(XMMRegister reg, Register base);
2134
2135  // As for emit_optional_rex_32(Register, Register), except that
2136  // one of the registers is an XMM registers.
2137  inline void emit_optional_rex_32(Register reg, XMMRegister base);
2138
2139  // As for emit_optional_rex_32(Register, const Operand&), except that
2140  // the register is an XMM register.
2141  inline void emit_optional_rex_32(XMMRegister reg, const Operand& op);
2142
2143  // Optionally do as emit_rex_32(Register) if the register number has
2144  // the high bit set.
2145  inline void emit_optional_rex_32(Register rm_reg);
2146  inline void emit_optional_rex_32(XMMRegister rm_reg);
2147
2148  // Optionally do as emit_rex_32(const Operand&) if the operand register
2149  // numbers have a high bit set.
2150  inline void emit_optional_rex_32(const Operand& op);
2151
2152  void emit_rex(int size) {
2153    if (size == kInt64Size) {
2154      emit_rex_64();
2155    } else {
2156      DCHECK(size == kInt32Size);
2157    }
2158  }
2159
2160  template<class P1>
2161  void emit_rex(P1 p1, int size) {
2162    if (size == kInt64Size) {
2163      emit_rex_64(p1);
2164    } else {
2165      DCHECK(size == kInt32Size);
2166      emit_optional_rex_32(p1);
2167    }
2168  }
2169
2170  template<class P1, class P2>
2171  void emit_rex(P1 p1, P2 p2, int size) {
2172    if (size == kInt64Size) {
2173      emit_rex_64(p1, p2);
2174    } else {
2175      DCHECK(size == kInt32Size);
2176      emit_optional_rex_32(p1, p2);
2177    }
2178  }
2179
2180  // Emit vex prefix
2181  void emit_vex2_byte0() { emit(0xc5); }
2182  inline void emit_vex2_byte1(XMMRegister reg, XMMRegister v, VectorLength l,
2183                              SIMDPrefix pp);
2184  void emit_vex3_byte0() { emit(0xc4); }
2185  inline void emit_vex3_byte1(XMMRegister reg, XMMRegister rm, LeadingOpcode m);
2186  inline void emit_vex3_byte1(XMMRegister reg, const Operand& rm,
2187                              LeadingOpcode m);
2188  inline void emit_vex3_byte2(VexW w, XMMRegister v, VectorLength l,
2189                              SIMDPrefix pp);
2190  inline void emit_vex_prefix(XMMRegister reg, XMMRegister v, XMMRegister rm,
2191                              VectorLength l, SIMDPrefix pp, LeadingOpcode m,
2192                              VexW w);
2193  inline void emit_vex_prefix(Register reg, Register v, Register rm,
2194                              VectorLength l, SIMDPrefix pp, LeadingOpcode m,
2195                              VexW w);
2196  inline void emit_vex_prefix(XMMRegister reg, XMMRegister v, const Operand& rm,
2197                              VectorLength l, SIMDPrefix pp, LeadingOpcode m,
2198                              VexW w);
2199  inline void emit_vex_prefix(Register reg, Register v, const Operand& rm,
2200                              VectorLength l, SIMDPrefix pp, LeadingOpcode m,
2201                              VexW w);
2202
2203  // Emit the ModR/M byte, and optionally the SIB byte and
2204  // 1- or 4-byte offset for a memory operand.  Also encodes
2205  // the second operand of the operation, a register or operation
2206  // subcode, into the reg field of the ModR/M byte.
2207  void emit_operand(Register reg, const Operand& adr) {
2208    emit_operand(reg.low_bits(), adr);
2209  }
2210
2211  // Emit the ModR/M byte, and optionally the SIB byte and
2212  // 1- or 4-byte offset for a memory operand.  Also used to encode
2213  // a three-bit opcode extension into the ModR/M byte.
2214  void emit_operand(int rm, const Operand& adr);
2215
2216  // Emit a ModR/M byte with registers coded in the reg and rm_reg fields.
2217  void emit_modrm(Register reg, Register rm_reg) {
2218    emit(0xC0 | reg.low_bits() << 3 | rm_reg.low_bits());
2219  }
2220
2221  // Emit a ModR/M byte with an operation subcode in the reg field and
2222  // a register in the rm_reg field.
2223  void emit_modrm(int code, Register rm_reg) {
2224    DCHECK(is_uint3(code));
2225    emit(0xC0 | code << 3 | rm_reg.low_bits());
2226  }
2227
2228  // Emit the code-object-relative offset of the label's position
2229  inline void emit_code_relative_offset(Label* label);
2230
2231  // The first argument is the reg field, the second argument is the r/m field.
2232  void emit_sse_operand(XMMRegister dst, XMMRegister src);
2233  void emit_sse_operand(XMMRegister reg, const Operand& adr);
2234  void emit_sse_operand(Register reg, const Operand& adr);
2235  void emit_sse_operand(XMMRegister dst, Register src);
2236  void emit_sse_operand(Register dst, XMMRegister src);
2237  void emit_sse_operand(XMMRegister dst);
2238
2239  // Emit machine code for one of the operations ADD, ADC, SUB, SBC,
2240  // AND, OR, XOR, or CMP.  The encodings of these operations are all
2241  // similar, differing just in the opcode or in the reg field of the
2242  // ModR/M byte.
2243  void arithmetic_op_8(byte opcode, Register reg, Register rm_reg);
2244  void arithmetic_op_8(byte opcode, Register reg, const Operand& rm_reg);
2245  void arithmetic_op_16(byte opcode, Register reg, Register rm_reg);
2246  void arithmetic_op_16(byte opcode, Register reg, const Operand& rm_reg);
2247  // Operate on operands/registers with pointer size, 32-bit or 64-bit size.
2248  void arithmetic_op(byte opcode, Register reg, Register rm_reg, int size);
2249  void arithmetic_op(byte opcode,
2250                     Register reg,
2251                     const Operand& rm_reg,
2252                     int size);
2253  // Operate on a byte in memory or register.
2254  void immediate_arithmetic_op_8(byte subcode,
2255                                 Register dst,
2256                                 Immediate src);
2257  void immediate_arithmetic_op_8(byte subcode,
2258                                 const Operand& dst,
2259                                 Immediate src);
2260  // Operate on a word in memory or register.
2261  void immediate_arithmetic_op_16(byte subcode,
2262                                  Register dst,
2263                                  Immediate src);
2264  void immediate_arithmetic_op_16(byte subcode,
2265                                  const Operand& dst,
2266                                  Immediate src);
2267  // Operate on operands/registers with pointer size, 32-bit or 64-bit size.
2268  void immediate_arithmetic_op(byte subcode,
2269                               Register dst,
2270                               Immediate src,
2271                               int size);
2272  void immediate_arithmetic_op(byte subcode,
2273                               const Operand& dst,
2274                               Immediate src,
2275                               int size);
2276
2277  // Emit machine code for a shift operation.
2278  void shift(Operand dst, Immediate shift_amount, int subcode, int size);
2279  void shift(Register dst, Immediate shift_amount, int subcode, int size);
2280  // Shift dst by cl % 64 bits.
2281  void shift(Register dst, int subcode, int size);
2282  void shift(Operand dst, int subcode, int size);
2283
2284  void emit_farith(int b1, int b2, int i);
2285
2286  // labels
2287  // void print(Label* L);
2288  void bind_to(Label* L, int pos);
2289
2290  // record reloc info for current pc_
2291  void RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data = 0);
2292
2293  // Arithmetics
2294  void emit_add(Register dst, Register src, int size) {
2295    arithmetic_op(0x03, dst, src, size);
2296  }
2297
2298  void emit_add(Register dst, Immediate src, int size) {
2299    immediate_arithmetic_op(0x0, dst, src, size);
2300  }
2301
2302  void emit_add(Register dst, const Operand& src, int size) {
2303    arithmetic_op(0x03, dst, src, size);
2304  }
2305
2306  void emit_add(const Operand& dst, Register src, int size) {
2307    arithmetic_op(0x1, src, dst, size);
2308  }
2309
2310  void emit_add(const Operand& dst, Immediate src, int size) {
2311    immediate_arithmetic_op(0x0, dst, src, size);
2312  }
2313
2314  void emit_and(Register dst, Register src, int size) {
2315    arithmetic_op(0x23, dst, src, size);
2316  }
2317
2318  void emit_and(Register dst, const Operand& src, int size) {
2319    arithmetic_op(0x23, dst, src, size);
2320  }
2321
2322  void emit_and(const Operand& dst, Register src, int size) {
2323    arithmetic_op(0x21, src, dst, size);
2324  }
2325
2326  void emit_and(Register dst, Immediate src, int size) {
2327    immediate_arithmetic_op(0x4, dst, src, size);
2328  }
2329
2330  void emit_and(const Operand& dst, Immediate src, int size) {
2331    immediate_arithmetic_op(0x4, dst, src, size);
2332  }
2333
2334  void emit_cmp(Register dst, Register src, int size) {
2335    arithmetic_op(0x3B, dst, src, size);
2336  }
2337
2338  void emit_cmp(Register dst, const Operand& src, int size) {
2339    arithmetic_op(0x3B, dst, src, size);
2340  }
2341
2342  void emit_cmp(const Operand& dst, Register src, int size) {
2343    arithmetic_op(0x39, src, dst, size);
2344  }
2345
2346  void emit_cmp(Register dst, Immediate src, int size) {
2347    immediate_arithmetic_op(0x7, dst, src, size);
2348  }
2349
2350  void emit_cmp(const Operand& dst, Immediate src, int size) {
2351    immediate_arithmetic_op(0x7, dst, src, size);
2352  }
2353
2354  // Compare {al,ax,eax,rax} with src.  If equal, set ZF and write dst into
2355  // src. Otherwise clear ZF and write src into {al,ax,eax,rax}.  This
2356  // operation is only atomic if prefixed by the lock instruction.
2357  void emit_cmpxchg(const Operand& dst, Register src, int size);
2358
2359  void emit_dec(Register dst, int size);
2360  void emit_dec(const Operand& dst, int size);
2361
2362  // Divide rdx:rax by src.  Quotient in rax, remainder in rdx when size is 64.
2363  // Divide edx:eax by lower 32 bits of src.  Quotient in eax, remainder in edx
2364  // when size is 32.
2365  void emit_idiv(Register src, int size);
2366  void emit_div(Register src, int size);
2367
2368  // Signed multiply instructions.
2369  // rdx:rax = rax * src when size is 64 or edx:eax = eax * src when size is 32.
2370  void emit_imul(Register src, int size);
2371  void emit_imul(const Operand& src, int size);
2372  void emit_imul(Register dst, Register src, int size);
2373  void emit_imul(Register dst, const Operand& src, int size);
2374  void emit_imul(Register dst, Register src, Immediate imm, int size);
2375  void emit_imul(Register dst, const Operand& src, Immediate imm, int size);
2376
2377  void emit_inc(Register dst, int size);
2378  void emit_inc(const Operand& dst, int size);
2379
2380  void emit_lea(Register dst, const Operand& src, int size);
2381
2382  void emit_mov(Register dst, const Operand& src, int size);
2383  void emit_mov(Register dst, Register src, int size);
2384  void emit_mov(const Operand& dst, Register src, int size);
2385  void emit_mov(Register dst, Immediate value, int size);
2386  void emit_mov(const Operand& dst, Immediate value, int size);
2387
2388  void emit_movzxb(Register dst, const Operand& src, int size);
2389  void emit_movzxb(Register dst, Register src, int size);
2390  void emit_movzxw(Register dst, const Operand& src, int size);
2391  void emit_movzxw(Register dst, Register src, int size);
2392
2393  void emit_neg(Register dst, int size);
2394  void emit_neg(const Operand& dst, int size);
2395
2396  void emit_not(Register dst, int size);
2397  void emit_not(const Operand& dst, int size);
2398
2399  void emit_or(Register dst, Register src, int size) {
2400    arithmetic_op(0x0B, dst, src, size);
2401  }
2402
2403  void emit_or(Register dst, const Operand& src, int size) {
2404    arithmetic_op(0x0B, dst, src, size);
2405  }
2406
2407  void emit_or(const Operand& dst, Register src, int size) {
2408    arithmetic_op(0x9, src, dst, size);
2409  }
2410
2411  void emit_or(Register dst, Immediate src, int size) {
2412    immediate_arithmetic_op(0x1, dst, src, size);
2413  }
2414
2415  void emit_or(const Operand& dst, Immediate src, int size) {
2416    immediate_arithmetic_op(0x1, dst, src, size);
2417  }
2418
2419  void emit_repmovs(int size);
2420
2421  void emit_sbb(Register dst, Register src, int size) {
2422    arithmetic_op(0x1b, dst, src, size);
2423  }
2424
2425  void emit_sub(Register dst, Register src, int size) {
2426    arithmetic_op(0x2B, dst, src, size);
2427  }
2428
2429  void emit_sub(Register dst, Immediate src, int size) {
2430    immediate_arithmetic_op(0x5, dst, src, size);
2431  }
2432
2433  void emit_sub(Register dst, const Operand& src, int size) {
2434    arithmetic_op(0x2B, dst, src, size);
2435  }
2436
2437  void emit_sub(const Operand& dst, Register src, int size) {
2438    arithmetic_op(0x29, src, dst, size);
2439  }
2440
2441  void emit_sub(const Operand& dst, Immediate src, int size) {
2442    immediate_arithmetic_op(0x5, dst, src, size);
2443  }
2444
2445  void emit_test(Register dst, Register src, int size);
2446  void emit_test(Register reg, Immediate mask, int size);
2447  void emit_test(const Operand& op, Register reg, int size);
2448  void emit_test(const Operand& op, Immediate mask, int size);
2449  void emit_test(Register reg, const Operand& op, int size) {
2450    return emit_test(op, reg, size);
2451  }
2452
2453  void emit_xchg(Register dst, Register src, int size);
2454  void emit_xchg(Register dst, const Operand& src, int size);
2455
2456  void emit_xor(Register dst, Register src, int size) {
2457    if (size == kInt64Size && dst.code() == src.code()) {
2458    // 32 bit operations zero the top 32 bits of 64 bit registers. Therefore
2459    // there is no need to make this a 64 bit operation.
2460      arithmetic_op(0x33, dst, src, kInt32Size);
2461    } else {
2462      arithmetic_op(0x33, dst, src, size);
2463    }
2464  }
2465
2466  void emit_xor(Register dst, const Operand& src, int size) {
2467    arithmetic_op(0x33, dst, src, size);
2468  }
2469
2470  void emit_xor(Register dst, Immediate src, int size) {
2471    immediate_arithmetic_op(0x6, dst, src, size);
2472  }
2473
2474  void emit_xor(const Operand& dst, Immediate src, int size) {
2475    immediate_arithmetic_op(0x6, dst, src, size);
2476  }
2477
2478  void emit_xor(const Operand& dst, Register src, int size) {
2479    arithmetic_op(0x31, src, dst, size);
2480  }
2481
2482  // Most BMI instructions are similiar.
2483  void bmi1q(byte op, Register reg, Register vreg, Register rm);
2484  void bmi1q(byte op, Register reg, Register vreg, const Operand& rm);
2485  void bmi1l(byte op, Register reg, Register vreg, Register rm);
2486  void bmi1l(byte op, Register reg, Register vreg, const Operand& rm);
2487  void bmi2q(SIMDPrefix pp, byte op, Register reg, Register vreg, Register rm);
2488  void bmi2q(SIMDPrefix pp, byte op, Register reg, Register vreg,
2489             const Operand& rm);
2490  void bmi2l(SIMDPrefix pp, byte op, Register reg, Register vreg, Register rm);
2491  void bmi2l(SIMDPrefix pp, byte op, Register reg, Register vreg,
2492             const Operand& rm);
2493
2494  friend class CodePatcher;
2495  friend class EnsureSpace;
2496  friend class RegExpMacroAssemblerX64;
2497
2498  // code generation
2499  RelocInfoWriter reloc_info_writer;
2500
2501  // Internal reference positions, required for (potential) patching in
2502  // GrowBuffer(); contains only those internal references whose labels
2503  // are already bound.
2504  std::deque<int> internal_reference_positions_;
2505
2506  List< Handle<Code> > code_targets_;
2507};
2508
2509
2510// Helper class that ensures that there is enough space for generating
2511// instructions and relocation information.  The constructor makes
2512// sure that there is enough space and (in debug mode) the destructor
2513// checks that we did not generate too much.
2514class EnsureSpace BASE_EMBEDDED {
2515 public:
2516  explicit EnsureSpace(Assembler* assembler) : assembler_(assembler) {
2517    if (assembler_->buffer_overflow()) assembler_->GrowBuffer();
2518#ifdef DEBUG
2519    space_before_ = assembler_->available_space();
2520#endif
2521  }
2522
2523#ifdef DEBUG
2524  ~EnsureSpace() {
2525    int bytes_generated = space_before_ - assembler_->available_space();
2526    DCHECK(bytes_generated < assembler_->kGap);
2527  }
2528#endif
2529
2530 private:
2531  Assembler* assembler_;
2532#ifdef DEBUG
2533  int space_before_;
2534#endif
2535};
2536
2537}  // namespace internal
2538}  // namespace v8
2539
2540#endif  // V8_X64_ASSEMBLER_X64_H_
2541