1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_COMPILER_UTILS_X86_64_ASSEMBLER_X86_64_H_
18#define ART_COMPILER_UTILS_X86_64_ASSEMBLER_X86_64_H_
19
20#include <vector>
21
22#include "base/arena_containers.h"
23#include "base/bit_utils.h"
24#include "base/macros.h"
25#include "constants_x86_64.h"
26#include "globals.h"
27#include "managed_register_x86_64.h"
28#include "offsets.h"
29#include "utils/array_ref.h"
30#include "utils/assembler.h"
31
32namespace art {
33namespace x86_64 {
34
35// Encodes an immediate value for operands.
36//
37// Note: Immediates can be 64b on x86-64 for certain instructions, but are often restricted
38// to 32b.
39//
40// Note: As we support cross-compilation, the value type must be int64_t. Please be aware of
41// conversion rules in expressions regarding negation, especially size_t on 32b.
42class Immediate : public ValueObject {
43 public:
44  explicit Immediate(int64_t value_in) : value_(value_in) {}
45
46  int64_t value() const { return value_; }
47
48  bool is_int8() const { return IsInt<8>(value_); }
49  bool is_uint8() const { return IsUint<8>(value_); }
50  bool is_int16() const { return IsInt<16>(value_); }
51  bool is_uint16() const { return IsUint<16>(value_); }
52  bool is_int32() const { return IsInt<32>(value_); }
53
54 private:
55  const int64_t value_;
56};
57
58
59class Operand : public ValueObject {
60 public:
61  uint8_t mod() const {
62    return (encoding_at(0) >> 6) & 3;
63  }
64
65  Register rm() const {
66    return static_cast<Register>(encoding_at(0) & 7);
67  }
68
69  ScaleFactor scale() const {
70    return static_cast<ScaleFactor>((encoding_at(1) >> 6) & 3);
71  }
72
73  Register index() const {
74    return static_cast<Register>((encoding_at(1) >> 3) & 7);
75  }
76
77  Register base() const {
78    return static_cast<Register>(encoding_at(1) & 7);
79  }
80
81  uint8_t rex() const {
82    return rex_;
83  }
84
85  int8_t disp8() const {
86    CHECK_GE(length_, 2);
87    return static_cast<int8_t>(encoding_[length_ - 1]);
88  }
89
90  int32_t disp32() const {
91    CHECK_GE(length_, 5);
92    int32_t value;
93    memcpy(&value, &encoding_[length_ - 4], sizeof(value));
94    return value;
95  }
96
97  bool IsRegister(CpuRegister reg) const {
98    return ((encoding_[0] & 0xF8) == 0xC0)  // Addressing mode is register only.
99        && ((encoding_[0] & 0x07) == reg.LowBits())  // Register codes match.
100        && (reg.NeedsRex() == ((rex_ & 1) != 0));  // REX.000B bits match.
101  }
102
103  AssemblerFixup* GetFixup() const {
104    return fixup_;
105  }
106
107 protected:
108  // Operand can be sub classed (e.g: Address).
109  Operand() : rex_(0), length_(0), fixup_(nullptr) { }
110
111  void SetModRM(uint8_t mod_in, CpuRegister rm_in) {
112    CHECK_EQ(mod_in & ~3, 0);
113    if (rm_in.NeedsRex()) {
114      rex_ |= 0x41;  // REX.000B
115    }
116    encoding_[0] = (mod_in << 6) | rm_in.LowBits();
117    length_ = 1;
118  }
119
120  void SetSIB(ScaleFactor scale_in, CpuRegister index_in, CpuRegister base_in) {
121    CHECK_EQ(length_, 1);
122    CHECK_EQ(scale_in & ~3, 0);
123    if (base_in.NeedsRex()) {
124      rex_ |= 0x41;  // REX.000B
125    }
126    if (index_in.NeedsRex()) {
127      rex_ |= 0x42;  // REX.00X0
128    }
129    encoding_[1] = (scale_in << 6) | (static_cast<uint8_t>(index_in.LowBits()) << 3) |
130        static_cast<uint8_t>(base_in.LowBits());
131    length_ = 2;
132  }
133
134  void SetDisp8(int8_t disp) {
135    CHECK(length_ == 1 || length_ == 2);
136    encoding_[length_++] = static_cast<uint8_t>(disp);
137  }
138
139  void SetDisp32(int32_t disp) {
140    CHECK(length_ == 1 || length_ == 2);
141    int disp_size = sizeof(disp);
142    memmove(&encoding_[length_], &disp, disp_size);
143    length_ += disp_size;
144  }
145
146  void SetFixup(AssemblerFixup* fixup) {
147    fixup_ = fixup;
148  }
149
150 private:
151  uint8_t rex_;
152  uint8_t length_;
153  uint8_t encoding_[6];
154  AssemblerFixup* fixup_;
155
156  explicit Operand(CpuRegister reg) : rex_(0), length_(0), fixup_(nullptr) { SetModRM(3, reg); }
157
158  // Get the operand encoding byte at the given index.
159  uint8_t encoding_at(int index_in) const {
160    CHECK_GE(index_in, 0);
161    CHECK_LT(index_in, length_);
162    return encoding_[index_in];
163  }
164
165  friend class X86_64Assembler;
166};
167
168
169class Address : public Operand {
170 public:
171  Address(CpuRegister base_in, int32_t disp) {
172    Init(base_in, disp);
173  }
174
175  Address(CpuRegister base_in, Offset disp) {
176    Init(base_in, disp.Int32Value());
177  }
178
179  Address(CpuRegister base_in, FrameOffset disp) {
180    CHECK_EQ(base_in.AsRegister(), RSP);
181    Init(CpuRegister(RSP), disp.Int32Value());
182  }
183
184  Address(CpuRegister base_in, MemberOffset disp) {
185    Init(base_in, disp.Int32Value());
186  }
187
188  void Init(CpuRegister base_in, int32_t disp) {
189    if (disp == 0 && base_in.LowBits() != RBP) {
190      SetModRM(0, base_in);
191      if (base_in.LowBits() == RSP) {
192        SetSIB(TIMES_1, CpuRegister(RSP), base_in);
193      }
194    } else if (disp >= -128 && disp <= 127) {
195      SetModRM(1, base_in);
196      if (base_in.LowBits() == RSP) {
197        SetSIB(TIMES_1, CpuRegister(RSP), base_in);
198      }
199      SetDisp8(disp);
200    } else {
201      SetModRM(2, base_in);
202      if (base_in.LowBits() == RSP) {
203        SetSIB(TIMES_1, CpuRegister(RSP), base_in);
204      }
205      SetDisp32(disp);
206    }
207  }
208
209
210  Address(CpuRegister index_in, ScaleFactor scale_in, int32_t disp) {
211    CHECK_NE(index_in.AsRegister(), RSP);  // Illegal addressing mode.
212    SetModRM(0, CpuRegister(RSP));
213    SetSIB(scale_in, index_in, CpuRegister(RBP));
214    SetDisp32(disp);
215  }
216
217  Address(CpuRegister base_in, CpuRegister index_in, ScaleFactor scale_in, int32_t disp) {
218    CHECK_NE(index_in.AsRegister(), RSP);  // Illegal addressing mode.
219    if (disp == 0 && base_in.LowBits() != RBP) {
220      SetModRM(0, CpuRegister(RSP));
221      SetSIB(scale_in, index_in, base_in);
222    } else if (disp >= -128 && disp <= 127) {
223      SetModRM(1, CpuRegister(RSP));
224      SetSIB(scale_in, index_in, base_in);
225      SetDisp8(disp);
226    } else {
227      SetModRM(2, CpuRegister(RSP));
228      SetSIB(scale_in, index_in, base_in);
229      SetDisp32(disp);
230    }
231  }
232
233  // If no_rip is true then the Absolute address isn't RIP relative.
234  static Address Absolute(uintptr_t addr, bool no_rip = false) {
235    Address result;
236    if (no_rip) {
237      result.SetModRM(0, CpuRegister(RSP));
238      result.SetSIB(TIMES_1, CpuRegister(RSP), CpuRegister(RBP));
239      result.SetDisp32(addr);
240    } else {
241      // RIP addressing is done using RBP as the base register.
242      // The value in RBP isn't used.  Instead the offset is added to RIP.
243      result.SetModRM(0, CpuRegister(RBP));
244      result.SetDisp32(addr);
245    }
246    return result;
247  }
248
249  // An RIP relative address that will be fixed up later.
250  static Address RIP(AssemblerFixup* fixup) {
251    Address result;
252    // RIP addressing is done using RBP as the base register.
253    // The value in RBP isn't used.  Instead the offset is added to RIP.
254    result.SetModRM(0, CpuRegister(RBP));
255    result.SetDisp32(0);
256    result.SetFixup(fixup);
257    return result;
258  }
259
260  // If no_rip is true then the Absolute address isn't RIP relative.
261  static Address Absolute(ThreadOffset<8> addr, bool no_rip = false) {
262    return Absolute(addr.Int32Value(), no_rip);
263  }
264
265 private:
266  Address() {}
267};
268
269
270/**
271 * Class to handle constant area values.
272 */
273class ConstantArea {
274 public:
275  explicit ConstantArea(ArenaAllocator* arena) : buffer_(arena->Adapter(kArenaAllocAssembler)) {}
276
277  // Add a double to the constant area, returning the offset into
278  // the constant area where the literal resides.
279  size_t AddDouble(double v);
280
281  // Add a float to the constant area, returning the offset into
282  // the constant area where the literal resides.
283  size_t AddFloat(float v);
284
285  // Add an int32_t to the constant area, returning the offset into
286  // the constant area where the literal resides.
287  size_t AddInt32(int32_t v);
288
289  // Add an int32_t to the end of the constant area, returning the offset into
290  // the constant area where the literal resides.
291  size_t AppendInt32(int32_t v);
292
293  // Add an int64_t to the constant area, returning the offset into
294  // the constant area where the literal resides.
295  size_t AddInt64(int64_t v);
296
297  size_t GetSize() const {
298    return buffer_.size() * elem_size_;
299  }
300
301  ArrayRef<const int32_t> GetBuffer() const {
302    return ArrayRef<const int32_t>(buffer_);
303  }
304
305 private:
306  static constexpr size_t elem_size_ = sizeof(int32_t);
307  ArenaVector<int32_t> buffer_;
308};
309
310
311// This is equivalent to the Label class, used in a slightly different context. We
312// inherit the functionality of the Label class, but prevent unintended
313// derived-to-base conversions by making the base class private.
314class NearLabel : private Label {
315 public:
316  NearLabel() : Label() {}
317
318  // Expose the Label routines that we need.
319  using Label::Position;
320  using Label::LinkPosition;
321  using Label::IsBound;
322  using Label::IsUnused;
323  using Label::IsLinked;
324
325 private:
326  using Label::BindTo;
327  using Label::LinkTo;
328
329  friend class x86_64::X86_64Assembler;
330
331  DISALLOW_COPY_AND_ASSIGN(NearLabel);
332};
333
334
335class X86_64Assembler FINAL : public Assembler {
336 public:
337  explicit X86_64Assembler(ArenaAllocator* arena) : Assembler(arena), constant_area_(arena) {}
338  virtual ~X86_64Assembler() {}
339
340  /*
341   * Emit Machine Instructions.
342   */
343  void call(CpuRegister reg);
344  void call(const Address& address);
345  void call(Label* label);
346
347  void pushq(CpuRegister reg);
348  void pushq(const Address& address);
349  void pushq(const Immediate& imm);
350
351  void popq(CpuRegister reg);
352  void popq(const Address& address);
353
354  void movq(CpuRegister dst, const Immediate& src);
355  void movl(CpuRegister dst, const Immediate& src);
356  void movq(CpuRegister dst, CpuRegister src);
357  void movl(CpuRegister dst, CpuRegister src);
358
359  void movntl(const Address& dst, CpuRegister src);
360  void movntq(const Address& dst, CpuRegister src);
361
362  void movq(CpuRegister dst, const Address& src);
363  void movl(CpuRegister dst, const Address& src);
364  void movq(const Address& dst, CpuRegister src);
365  void movq(const Address& dst, const Immediate& imm);
366  void movl(const Address& dst, CpuRegister src);
367  void movl(const Address& dst, const Immediate& imm);
368
369  void cmov(Condition c, CpuRegister dst, CpuRegister src);  // This is the 64b version.
370  void cmov(Condition c, CpuRegister dst, CpuRegister src, bool is64bit);
371  void cmov(Condition c, CpuRegister dst, const Address& src, bool is64bit);
372
373  void movzxb(CpuRegister dst, CpuRegister src);
374  void movzxb(CpuRegister dst, const Address& src);
375  void movsxb(CpuRegister dst, CpuRegister src);
376  void movsxb(CpuRegister dst, const Address& src);
377  void movb(CpuRegister dst, const Address& src);
378  void movb(const Address& dst, CpuRegister src);
379  void movb(const Address& dst, const Immediate& imm);
380
381  void movzxw(CpuRegister dst, CpuRegister src);
382  void movzxw(CpuRegister dst, const Address& src);
383  void movsxw(CpuRegister dst, CpuRegister src);
384  void movsxw(CpuRegister dst, const Address& src);
385  void movw(CpuRegister dst, const Address& src);
386  void movw(const Address& dst, CpuRegister src);
387  void movw(const Address& dst, const Immediate& imm);
388
389  void leaq(CpuRegister dst, const Address& src);
390  void leal(CpuRegister dst, const Address& src);
391
392  void movaps(XmmRegister dst, XmmRegister src);
393
394  void movss(XmmRegister dst, const Address& src);
395  void movss(const Address& dst, XmmRegister src);
396  void movss(XmmRegister dst, XmmRegister src);
397
398  void movsxd(CpuRegister dst, CpuRegister src);
399  void movsxd(CpuRegister dst, const Address& src);
400
401  void movd(XmmRegister dst, CpuRegister src);  // Note: this is the r64 version, formally movq.
402  void movd(CpuRegister dst, XmmRegister src);  // Note: this is the r64 version, formally movq.
403  void movd(XmmRegister dst, CpuRegister src, bool is64bit);
404  void movd(CpuRegister dst, XmmRegister src, bool is64bit);
405
406  void addss(XmmRegister dst, XmmRegister src);
407  void addss(XmmRegister dst, const Address& src);
408  void subss(XmmRegister dst, XmmRegister src);
409  void subss(XmmRegister dst, const Address& src);
410  void mulss(XmmRegister dst, XmmRegister src);
411  void mulss(XmmRegister dst, const Address& src);
412  void divss(XmmRegister dst, XmmRegister src);
413  void divss(XmmRegister dst, const Address& src);
414
415  void movsd(XmmRegister dst, const Address& src);
416  void movsd(const Address& dst, XmmRegister src);
417  void movsd(XmmRegister dst, XmmRegister src);
418
419  void addsd(XmmRegister dst, XmmRegister src);
420  void addsd(XmmRegister dst, const Address& src);
421  void subsd(XmmRegister dst, XmmRegister src);
422  void subsd(XmmRegister dst, const Address& src);
423  void mulsd(XmmRegister dst, XmmRegister src);
424  void mulsd(XmmRegister dst, const Address& src);
425  void divsd(XmmRegister dst, XmmRegister src);
426  void divsd(XmmRegister dst, const Address& src);
427
428  void cvtsi2ss(XmmRegister dst, CpuRegister src);  // Note: this is the r/m32 version.
429  void cvtsi2ss(XmmRegister dst, CpuRegister src, bool is64bit);
430  void cvtsi2ss(XmmRegister dst, const Address& src, bool is64bit);
431  void cvtsi2sd(XmmRegister dst, CpuRegister src);  // Note: this is the r/m32 version.
432  void cvtsi2sd(XmmRegister dst, CpuRegister src, bool is64bit);
433  void cvtsi2sd(XmmRegister dst, const Address& src, bool is64bit);
434
435  void cvtss2si(CpuRegister dst, XmmRegister src);  // Note: this is the r32 version.
436  void cvtss2sd(XmmRegister dst, XmmRegister src);
437  void cvtss2sd(XmmRegister dst, const Address& src);
438
439  void cvtsd2si(CpuRegister dst, XmmRegister src);  // Note: this is the r32 version.
440  void cvtsd2ss(XmmRegister dst, XmmRegister src);
441  void cvtsd2ss(XmmRegister dst, const Address& src);
442
443  void cvttss2si(CpuRegister dst, XmmRegister src);  // Note: this is the r32 version.
444  void cvttss2si(CpuRegister dst, XmmRegister src, bool is64bit);
445  void cvttsd2si(CpuRegister dst, XmmRegister src);  // Note: this is the r32 version.
446  void cvttsd2si(CpuRegister dst, XmmRegister src, bool is64bit);
447
448  void cvtdq2pd(XmmRegister dst, XmmRegister src);
449
450  void comiss(XmmRegister a, XmmRegister b);
451  void comiss(XmmRegister a, const Address& b);
452  void comisd(XmmRegister a, XmmRegister b);
453  void comisd(XmmRegister a, const Address& b);
454  void ucomiss(XmmRegister a, XmmRegister b);
455  void ucomiss(XmmRegister a, const Address& b);
456  void ucomisd(XmmRegister a, XmmRegister b);
457  void ucomisd(XmmRegister a, const Address& b);
458
459  void roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm);
460  void roundss(XmmRegister dst, XmmRegister src, const Immediate& imm);
461
462  void sqrtsd(XmmRegister dst, XmmRegister src);
463  void sqrtss(XmmRegister dst, XmmRegister src);
464
465  void xorpd(XmmRegister dst, const Address& src);
466  void xorpd(XmmRegister dst, XmmRegister src);
467  void xorps(XmmRegister dst, const Address& src);
468  void xorps(XmmRegister dst, XmmRegister src);
469
470  void andpd(XmmRegister dst, const Address& src);
471  void andpd(XmmRegister dst, XmmRegister src);
472  void andps(XmmRegister dst, XmmRegister src);
473
474  void orpd(XmmRegister dst, XmmRegister src);
475  void orps(XmmRegister dst, XmmRegister src);
476
477  void flds(const Address& src);
478  void fstps(const Address& dst);
479  void fsts(const Address& dst);
480
481  void fldl(const Address& src);
482  void fstpl(const Address& dst);
483  void fstl(const Address& dst);
484
485  void fstsw();
486
487  void fucompp();
488
489  void fnstcw(const Address& dst);
490  void fldcw(const Address& src);
491
492  void fistpl(const Address& dst);
493  void fistps(const Address& dst);
494  void fildl(const Address& src);
495  void filds(const Address& src);
496
497  void fincstp();
498  void ffree(const Immediate& index);
499
500  void fsin();
501  void fcos();
502  void fptan();
503  void fprem();
504
505  void xchgl(CpuRegister dst, CpuRegister src);
506  void xchgq(CpuRegister dst, CpuRegister src);
507  void xchgl(CpuRegister reg, const Address& address);
508
509  void cmpw(const Address& address, const Immediate& imm);
510
511  void cmpl(CpuRegister reg, const Immediate& imm);
512  void cmpl(CpuRegister reg0, CpuRegister reg1);
513  void cmpl(CpuRegister reg, const Address& address);
514  void cmpl(const Address& address, CpuRegister reg);
515  void cmpl(const Address& address, const Immediate& imm);
516
517  void cmpq(CpuRegister reg0, CpuRegister reg1);
518  void cmpq(CpuRegister reg0, const Immediate& imm);
519  void cmpq(CpuRegister reg0, const Address& address);
520  void cmpq(const Address& address, const Immediate& imm);
521
522  void testl(CpuRegister reg1, CpuRegister reg2);
523  void testl(CpuRegister reg, const Address& address);
524  void testl(CpuRegister reg, const Immediate& imm);
525
526  void testq(CpuRegister reg1, CpuRegister reg2);
527  void testq(CpuRegister reg, const Address& address);
528
529  void andl(CpuRegister dst, const Immediate& imm);
530  void andl(CpuRegister dst, CpuRegister src);
531  void andl(CpuRegister reg, const Address& address);
532  void andq(CpuRegister dst, const Immediate& imm);
533  void andq(CpuRegister dst, CpuRegister src);
534  void andq(CpuRegister reg, const Address& address);
535
536  void orl(CpuRegister dst, const Immediate& imm);
537  void orl(CpuRegister dst, CpuRegister src);
538  void orl(CpuRegister reg, const Address& address);
539  void orq(CpuRegister dst, CpuRegister src);
540  void orq(CpuRegister dst, const Immediate& imm);
541  void orq(CpuRegister reg, const Address& address);
542
543  void xorl(CpuRegister dst, CpuRegister src);
544  void xorl(CpuRegister dst, const Immediate& imm);
545  void xorl(CpuRegister reg, const Address& address);
546  void xorq(CpuRegister dst, const Immediate& imm);
547  void xorq(CpuRegister dst, CpuRegister src);
548  void xorq(CpuRegister reg, const Address& address);
549
550  void addl(CpuRegister dst, CpuRegister src);
551  void addl(CpuRegister reg, const Immediate& imm);
552  void addl(CpuRegister reg, const Address& address);
553  void addl(const Address& address, CpuRegister reg);
554  void addl(const Address& address, const Immediate& imm);
555
556  void addq(CpuRegister reg, const Immediate& imm);
557  void addq(CpuRegister dst, CpuRegister src);
558  void addq(CpuRegister dst, const Address& address);
559
560  void subl(CpuRegister dst, CpuRegister src);
561  void subl(CpuRegister reg, const Immediate& imm);
562  void subl(CpuRegister reg, const Address& address);
563
564  void subq(CpuRegister reg, const Immediate& imm);
565  void subq(CpuRegister dst, CpuRegister src);
566  void subq(CpuRegister dst, const Address& address);
567
568  void cdq();
569  void cqo();
570
571  void idivl(CpuRegister reg);
572  void idivq(CpuRegister reg);
573
574  void imull(CpuRegister dst, CpuRegister src);
575  void imull(CpuRegister reg, const Immediate& imm);
576  void imull(CpuRegister dst, CpuRegister src, const Immediate& imm);
577  void imull(CpuRegister reg, const Address& address);
578
579  void imulq(CpuRegister src);
580  void imulq(CpuRegister dst, CpuRegister src);
581  void imulq(CpuRegister reg, const Immediate& imm);
582  void imulq(CpuRegister reg, const Address& address);
583  void imulq(CpuRegister dst, CpuRegister reg, const Immediate& imm);
584
585  void imull(CpuRegister reg);
586  void imull(const Address& address);
587
588  void mull(CpuRegister reg);
589  void mull(const Address& address);
590
591  void shll(CpuRegister reg, const Immediate& imm);
592  void shll(CpuRegister operand, CpuRegister shifter);
593  void shrl(CpuRegister reg, const Immediate& imm);
594  void shrl(CpuRegister operand, CpuRegister shifter);
595  void sarl(CpuRegister reg, const Immediate& imm);
596  void sarl(CpuRegister operand, CpuRegister shifter);
597
598  void shlq(CpuRegister reg, const Immediate& imm);
599  void shlq(CpuRegister operand, CpuRegister shifter);
600  void shrq(CpuRegister reg, const Immediate& imm);
601  void shrq(CpuRegister operand, CpuRegister shifter);
602  void sarq(CpuRegister reg, const Immediate& imm);
603  void sarq(CpuRegister operand, CpuRegister shifter);
604
605  void negl(CpuRegister reg);
606  void negq(CpuRegister reg);
607
608  void notl(CpuRegister reg);
609  void notq(CpuRegister reg);
610
611  void enter(const Immediate& imm);
612  void leave();
613
614  void ret();
615  void ret(const Immediate& imm);
616
617  void nop();
618  void int3();
619  void hlt();
620
621  void j(Condition condition, Label* label);
622  void j(Condition condition, NearLabel* label);
623  void jrcxz(NearLabel* label);
624
625  void jmp(CpuRegister reg);
626  void jmp(const Address& address);
627  void jmp(Label* label);
628  void jmp(NearLabel* label);
629
630  X86_64Assembler* lock();
631  void cmpxchgl(const Address& address, CpuRegister reg);
632  void cmpxchgq(const Address& address, CpuRegister reg);
633
634  void mfence();
635
636  X86_64Assembler* gs();
637
638  void setcc(Condition condition, CpuRegister dst);
639
640  void bswapl(CpuRegister dst);
641  void bswapq(CpuRegister dst);
642
643  void bsfl(CpuRegister dst, CpuRegister src);
644  void bsfl(CpuRegister dst, const Address& src);
645  void bsfq(CpuRegister dst, CpuRegister src);
646  void bsfq(CpuRegister dst, const Address& src);
647
648  void bsrl(CpuRegister dst, CpuRegister src);
649  void bsrl(CpuRegister dst, const Address& src);
650  void bsrq(CpuRegister dst, CpuRegister src);
651  void bsrq(CpuRegister dst, const Address& src);
652
653  void popcntl(CpuRegister dst, CpuRegister src);
654  void popcntl(CpuRegister dst, const Address& src);
655  void popcntq(CpuRegister dst, CpuRegister src);
656  void popcntq(CpuRegister dst, const Address& src);
657
658  void rorl(CpuRegister reg, const Immediate& imm);
659  void rorl(CpuRegister operand, CpuRegister shifter);
660  void roll(CpuRegister reg, const Immediate& imm);
661  void roll(CpuRegister operand, CpuRegister shifter);
662
663  void rorq(CpuRegister reg, const Immediate& imm);
664  void rorq(CpuRegister operand, CpuRegister shifter);
665  void rolq(CpuRegister reg, const Immediate& imm);
666  void rolq(CpuRegister operand, CpuRegister shifter);
667
668  void repne_scasw();
669  void repe_cmpsw();
670  void repe_cmpsl();
671  void repe_cmpsq();
672  void rep_movsw();
673
674  //
675  // Macros for High-level operations.
676  //
677
678  void AddImmediate(CpuRegister reg, const Immediate& imm);
679
680  void LoadDoubleConstant(XmmRegister dst, double value);
681
682  void LockCmpxchgl(const Address& address, CpuRegister reg) {
683    lock()->cmpxchgl(address, reg);
684  }
685
686  void LockCmpxchgq(const Address& address, CpuRegister reg) {
687    lock()->cmpxchgq(address, reg);
688  }
689
690  //
691  // Misc. functionality
692  //
693  int PreferredLoopAlignment() { return 16; }
694  void Align(int alignment, int offset);
695  void Bind(Label* label) OVERRIDE;
696  void Jump(Label* label) OVERRIDE {
697    jmp(label);
698  }
699  void Bind(NearLabel* label);
700
701  //
702  // Overridden common assembler high-level functionality
703  //
704
705  // Emit code that will create an activation on the stack
706  void BuildFrame(size_t frame_size, ManagedRegister method_reg,
707                  const std::vector<ManagedRegister>& callee_save_regs,
708                  const ManagedRegisterEntrySpills& entry_spills) OVERRIDE;
709
710  // Emit code that will remove an activation from the stack
711  void RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs)
712      OVERRIDE;
713
714  void IncreaseFrameSize(size_t adjust) OVERRIDE;
715  void DecreaseFrameSize(size_t adjust) OVERRIDE;
716
717  // Store routines
718  void Store(FrameOffset offs, ManagedRegister src, size_t size) OVERRIDE;
719  void StoreRef(FrameOffset dest, ManagedRegister src) OVERRIDE;
720  void StoreRawPtr(FrameOffset dest, ManagedRegister src) OVERRIDE;
721
722  void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) OVERRIDE;
723
724  void StoreImmediateToThread64(ThreadOffset<8> dest, uint32_t imm, ManagedRegister scratch)
725      OVERRIDE;
726
727  void StoreStackOffsetToThread64(ThreadOffset<8> thr_offs, FrameOffset fr_offs,
728                                  ManagedRegister scratch) OVERRIDE;
729
730  void StoreStackPointerToThread64(ThreadOffset<8> thr_offs) OVERRIDE;
731
732  void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off,
733                     ManagedRegister scratch) OVERRIDE;
734
735  // Load routines
736  void Load(ManagedRegister dest, FrameOffset src, size_t size) OVERRIDE;
737
738  void LoadFromThread64(ManagedRegister dest, ThreadOffset<8> src, size_t size) OVERRIDE;
739
740  void LoadRef(ManagedRegister dest, FrameOffset  src) OVERRIDE;
741
742  void LoadRef(ManagedRegister dest, ManagedRegister base, MemberOffset offs,
743               bool unpoison_reference) OVERRIDE;
744
745  void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) OVERRIDE;
746
747  void LoadRawPtrFromThread64(ManagedRegister dest, ThreadOffset<8> offs) OVERRIDE;
748
749  // Copying routines
750  void Move(ManagedRegister dest, ManagedRegister src, size_t size);
751
752  void CopyRawPtrFromThread64(FrameOffset fr_offs, ThreadOffset<8> thr_offs,
753                              ManagedRegister scratch) OVERRIDE;
754
755  void CopyRawPtrToThread64(ThreadOffset<8> thr_offs, FrameOffset fr_offs, ManagedRegister scratch)
756      OVERRIDE;
757
758  void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) OVERRIDE;
759
760  void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) OVERRIDE;
761
762  void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, ManagedRegister scratch,
763            size_t size) OVERRIDE;
764
765  void Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src, ManagedRegister scratch,
766            size_t size) OVERRIDE;
767
768  void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset, ManagedRegister scratch,
769            size_t size) OVERRIDE;
770
771  void Copy(ManagedRegister dest, Offset dest_offset, ManagedRegister src, Offset src_offset,
772            ManagedRegister scratch, size_t size) OVERRIDE;
773
774  void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset,
775            ManagedRegister scratch, size_t size) OVERRIDE;
776
777  void MemoryBarrier(ManagedRegister) OVERRIDE;
778
779  // Sign extension
780  void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE;
781
782  // Zero extension
783  void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE;
784
785  // Exploit fast access in managed code to Thread::Current()
786  void GetCurrentThread(ManagedRegister tr) OVERRIDE;
787  void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) OVERRIDE;
788
789  // Set up out_reg to hold a Object** into the handle scope, or to be null if the
790  // value is null and null_allowed. in_reg holds a possibly stale reference
791  // that can be used to avoid loading the handle scope entry to see if the value is
792  // null.
793  void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset,
794                              ManagedRegister in_reg, bool null_allowed) OVERRIDE;
795
796  // Set up out_off to hold a Object** into the handle scope, or to be null if the
797  // value is null and null_allowed.
798  void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset,
799                              ManagedRegister scratch, bool null_allowed) OVERRIDE;
800
801  // src holds a handle scope entry (Object**) load this into dst
802  virtual void LoadReferenceFromHandleScope(ManagedRegister dst,
803                                     ManagedRegister src);
804
805  // Heap::VerifyObject on src. In some cases (such as a reference to this) we
806  // know that src may not be null.
807  void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE;
808  void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE;
809
810  // Call to address held at [base+offset]
811  void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) OVERRIDE;
812  void Call(FrameOffset base, Offset offset, ManagedRegister scratch) OVERRIDE;
813  void CallFromThread64(ThreadOffset<8> offset, ManagedRegister scratch) OVERRIDE;
814
815  // Generate code to check if Thread::Current()->exception_ is non-null
816  // and branch to a ExceptionSlowPath if it is.
817  void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE;
818
819  // Add a double to the constant area, returning the offset into
820  // the constant area where the literal resides.
821  size_t AddDouble(double v) { return constant_area_.AddDouble(v); }
822
823  // Add a float to the constant area, returning the offset into
824  // the constant area where the literal resides.
825  size_t AddFloat(float v)   { return constant_area_.AddFloat(v); }
826
827  // Add an int32_t to the constant area, returning the offset into
828  // the constant area where the literal resides.
829  size_t AddInt32(int32_t v) {
830    return constant_area_.AddInt32(v);
831  }
832
833  // Add an int32_t to the end of the constant area, returning the offset into
834  // the constant area where the literal resides.
835  size_t AppendInt32(int32_t v) {
836    return constant_area_.AppendInt32(v);
837  }
838
839  // Add an int64_t to the constant area, returning the offset into
840  // the constant area where the literal resides.
841  size_t AddInt64(int64_t v) { return constant_area_.AddInt64(v); }
842
843  // Add the contents of the constant area to the assembler buffer.
844  void AddConstantArea();
845
846  // Is the constant area empty? Return true if there are no literals in the constant area.
847  bool IsConstantAreaEmpty() const { return constant_area_.GetSize() == 0; }
848
849  // Return the current size of the constant area.
850  size_t ConstantAreaSize() const { return constant_area_.GetSize(); }
851
852  //
853  // Heap poisoning.
854  //
855
856  // Poison a heap reference contained in `reg`.
857  void PoisonHeapReference(CpuRegister reg) { negl(reg); }
858  // Unpoison a heap reference contained in `reg`.
859  void UnpoisonHeapReference(CpuRegister reg) { negl(reg); }
860  // Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
861  void MaybeUnpoisonHeapReference(CpuRegister reg) {
862    if (kPoisonHeapReferences) {
863      UnpoisonHeapReference(reg);
864    }
865  }
866
867 private:
868  void EmitUint8(uint8_t value);
869  void EmitInt32(int32_t value);
870  void EmitInt64(int64_t value);
871  void EmitRegisterOperand(uint8_t rm, uint8_t reg);
872  void EmitXmmRegisterOperand(uint8_t rm, XmmRegister reg);
873  void EmitFixup(AssemblerFixup* fixup);
874  void EmitOperandSizeOverride();
875
876  void EmitOperand(uint8_t rm, const Operand& operand);
877  void EmitImmediate(const Immediate& imm);
878  void EmitComplex(uint8_t rm, const Operand& operand, const Immediate& immediate);
879  void EmitLabel(Label* label, int instruction_size);
880  void EmitLabelLink(Label* label);
881  void EmitLabelLink(NearLabel* label);
882
883  void EmitGenericShift(bool wide, int rm, CpuRegister reg, const Immediate& imm);
884  void EmitGenericShift(bool wide, int rm, CpuRegister operand, CpuRegister shifter);
885
886  // If any input is not false, output the necessary rex prefix.
887  void EmitOptionalRex(bool force, bool w, bool r, bool x, bool b);
888
889  // Emit a rex prefix byte if necessary for reg. ie if reg is a register in the range R8 to R15.
890  void EmitOptionalRex32(CpuRegister reg);
891  void EmitOptionalRex32(CpuRegister dst, CpuRegister src);
892  void EmitOptionalRex32(XmmRegister dst, XmmRegister src);
893  void EmitOptionalRex32(CpuRegister dst, XmmRegister src);
894  void EmitOptionalRex32(XmmRegister dst, CpuRegister src);
895  void EmitOptionalRex32(const Operand& operand);
896  void EmitOptionalRex32(CpuRegister dst, const Operand& operand);
897  void EmitOptionalRex32(XmmRegister dst, const Operand& operand);
898
899  // Emit a REX.W prefix plus necessary register bit encodings.
900  void EmitRex64();
901  void EmitRex64(CpuRegister reg);
902  void EmitRex64(const Operand& operand);
903  void EmitRex64(CpuRegister dst, CpuRegister src);
904  void EmitRex64(CpuRegister dst, const Operand& operand);
905  void EmitRex64(XmmRegister dst, const Operand& operand);
906  void EmitRex64(XmmRegister dst, CpuRegister src);
907  void EmitRex64(CpuRegister dst, XmmRegister src);
908
909  // Emit a REX prefix to normalize byte registers plus necessary register bit encodings.
910  void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, CpuRegister src);
911  void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const Operand& operand);
912
913  ConstantArea constant_area_;
914
915  DISALLOW_COPY_AND_ASSIGN(X86_64Assembler);
916};
917
918inline void X86_64Assembler::EmitUint8(uint8_t value) {
919  buffer_.Emit<uint8_t>(value);
920}
921
922inline void X86_64Assembler::EmitInt32(int32_t value) {
923  buffer_.Emit<int32_t>(value);
924}
925
926inline void X86_64Assembler::EmitInt64(int64_t value) {
927  // Write this 64-bit value as two 32-bit words for alignment reasons
928  // (this is essentially when running on ARM, which does not allow
929  // 64-bit unaligned accesses).  We assume little-endianness here.
930  EmitInt32(Low32Bits(value));
931  EmitInt32(High32Bits(value));
932}
933
934inline void X86_64Assembler::EmitRegisterOperand(uint8_t rm, uint8_t reg) {
935  CHECK_GE(rm, 0);
936  CHECK_LT(rm, 8);
937  buffer_.Emit<uint8_t>((0xC0 | (reg & 7)) + (rm << 3));
938}
939
940inline void X86_64Assembler::EmitXmmRegisterOperand(uint8_t rm, XmmRegister reg) {
941  EmitRegisterOperand(rm, static_cast<uint8_t>(reg.AsFloatRegister()));
942}
943
944inline void X86_64Assembler::EmitFixup(AssemblerFixup* fixup) {
945  buffer_.EmitFixup(fixup);
946}
947
948inline void X86_64Assembler::EmitOperandSizeOverride() {
949  EmitUint8(0x66);
950}
951
952}  // namespace x86_64
953}  // namespace art
954
955#endif  // ART_COMPILER_UTILS_X86_64_ASSEMBLER_X86_64_H_
956