target_x86.cc revision 9ee801f5308aa3c62ae3bedae2658612762ffb91
1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <string>
18#include <inttypes.h>
19
20#include "codegen_x86.h"
21#include "dex/compiler_internals.h"
22#include "dex/quick/mir_to_lir-inl.h"
23#include "mirror/array.h"
24#include "mirror/string.h"
25#include "x86_lir.h"
26
27namespace art {
28
29static const RegStorage core_regs_arr_32[] = {
30    rs_rAX, rs_rCX, rs_rDX, rs_rBX, rs_rX86_SP_32, rs_rBP, rs_rSI, rs_rDI,
31};
32static const RegStorage core_regs_arr_64[] = {
33    rs_rAX, rs_rCX, rs_rDX, rs_rBX, rs_rX86_SP_64, rs_rBP, rs_rSI, rs_rDI,
34#ifdef TARGET_REX_SUPPORT
35    rs_r8, rs_r9, rs_r10, rs_r11, rs_r12, rs_r13, rs_r14, rs_r15
36#endif
37};
38static const RegStorage sp_regs_arr_32[] = {
39    rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
40};
41static const RegStorage sp_regs_arr_64[] = {
42    rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
43#ifdef TARGET_REX_SUPPORT
44    rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15
45#endif
46};
47static const RegStorage dp_regs_arr_32[] = {
48    rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
49};
50static const RegStorage dp_regs_arr_64[] = {
51    rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
52#ifdef TARGET_REX_SUPPORT
53    rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15
54#endif
55};
56static const RegStorage reserved_regs_arr_32[] = {rs_rX86_SP_32};
57static const RegStorage reserved_regs_arr_64[] = {rs_rX86_SP_64};
58static const RegStorage core_temps_arr_32[] = {rs_rAX, rs_rCX, rs_rDX, rs_rBX};
59static const RegStorage core_temps_arr_64[] = {
60    rs_rAX, rs_rCX, rs_rDX, rs_rSI, rs_rDI,
61#ifdef TARGET_REX_SUPPORT
62    rs_r8, rs_r9, rs_r10, rs_r11
63#endif
64};
65static const RegStorage sp_temps_arr_32[] = {
66    rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
67};
68static const RegStorage sp_temps_arr_64[] = {
69    rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
70#ifdef TARGET_REX_SUPPORT
71    rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15
72#endif
73};
74static const RegStorage dp_temps_arr_32[] = {
75    rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
76};
77static const RegStorage dp_temps_arr_64[] = {
78    rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
79#ifdef TARGET_REX_SUPPORT
80    rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15
81#endif
82};
83
84static const std::vector<RegStorage> core_regs_32(core_regs_arr_32,
85    core_regs_arr_32 + sizeof(core_regs_arr_32) / sizeof(core_regs_arr_32[0]));
86static const std::vector<RegStorage> core_regs_64(core_regs_arr_64,
87    core_regs_arr_64 + sizeof(core_regs_arr_64) / sizeof(core_regs_arr_64[0]));
88static const std::vector<RegStorage> sp_regs_32(sp_regs_arr_32,
89    sp_regs_arr_32 + sizeof(sp_regs_arr_32) / sizeof(sp_regs_arr_32[0]));
90static const std::vector<RegStorage> sp_regs_64(sp_regs_arr_64,
91    sp_regs_arr_64 + sizeof(sp_regs_arr_64) / sizeof(sp_regs_arr_64[0]));
92static const std::vector<RegStorage> dp_regs_32(dp_regs_arr_32,
93    dp_regs_arr_32 + sizeof(dp_regs_arr_32) / sizeof(dp_regs_arr_32[0]));
94static const std::vector<RegStorage> dp_regs_64(dp_regs_arr_64,
95    dp_regs_arr_64 + sizeof(dp_regs_arr_64) / sizeof(dp_regs_arr_64[0]));
96static const std::vector<RegStorage> reserved_regs_32(reserved_regs_arr_32,
97    reserved_regs_arr_32 + sizeof(reserved_regs_arr_32) / sizeof(reserved_regs_arr_32[0]));
98static const std::vector<RegStorage> reserved_regs_64(reserved_regs_arr_64,
99    reserved_regs_arr_64 + sizeof(reserved_regs_arr_64) / sizeof(reserved_regs_arr_64[0]));
100static const std::vector<RegStorage> core_temps_32(core_temps_arr_32,
101    core_temps_arr_32 + sizeof(core_temps_arr_32) / sizeof(core_temps_arr_32[0]));
102static const std::vector<RegStorage> core_temps_64(core_temps_arr_64,
103    core_temps_arr_64 + sizeof(core_temps_arr_64) / sizeof(core_temps_arr_64[0]));
104static const std::vector<RegStorage> sp_temps_32(sp_temps_arr_32,
105    sp_temps_arr_32 + sizeof(sp_temps_arr_32) / sizeof(sp_temps_arr_32[0]));
106static const std::vector<RegStorage> sp_temps_64(sp_temps_arr_64,
107    sp_temps_arr_64 + sizeof(sp_temps_arr_64) / sizeof(sp_temps_arr_64[0]));
108static const std::vector<RegStorage> dp_temps_32(dp_temps_arr_32,
109    dp_temps_arr_32 + sizeof(dp_temps_arr_32) / sizeof(dp_temps_arr_32[0]));
110static const std::vector<RegStorage> dp_temps_64(dp_temps_arr_64,
111    dp_temps_arr_64 + sizeof(dp_temps_arr_64) / sizeof(dp_temps_arr_64[0]));
112
113RegStorage rs_rX86_SP;
114
115X86NativeRegisterPool rX86_ARG0;
116X86NativeRegisterPool rX86_ARG1;
117X86NativeRegisterPool rX86_ARG2;
118X86NativeRegisterPool rX86_ARG3;
119X86NativeRegisterPool rX86_FARG0;
120X86NativeRegisterPool rX86_FARG1;
121X86NativeRegisterPool rX86_FARG2;
122X86NativeRegisterPool rX86_FARG3;
123X86NativeRegisterPool rX86_RET0;
124X86NativeRegisterPool rX86_RET1;
125X86NativeRegisterPool rX86_INVOKE_TGT;
126X86NativeRegisterPool rX86_COUNT;
127
128RegStorage rs_rX86_ARG0;
129RegStorage rs_rX86_ARG1;
130RegStorage rs_rX86_ARG2;
131RegStorage rs_rX86_ARG3;
132RegStorage rs_rX86_FARG0;
133RegStorage rs_rX86_FARG1;
134RegStorage rs_rX86_FARG2;
135RegStorage rs_rX86_FARG3;
136RegStorage rs_rX86_RET0;
137RegStorage rs_rX86_RET1;
138RegStorage rs_rX86_INVOKE_TGT;
139RegStorage rs_rX86_COUNT;
140
141RegLocation X86Mir2Lir::LocCReturn() {
142  return x86_loc_c_return;
143}
144
145RegLocation X86Mir2Lir::LocCReturnWide() {
146  return x86_loc_c_return_wide;
147}
148
149RegLocation X86Mir2Lir::LocCReturnFloat() {
150  return x86_loc_c_return_float;
151}
152
153RegLocation X86Mir2Lir::LocCReturnDouble() {
154  return x86_loc_c_return_double;
155}
156
157// Return a target-dependent special register.
158RegStorage X86Mir2Lir::TargetReg(SpecialTargetRegister reg) {
159  RegStorage res_reg = RegStorage::InvalidReg();
160  switch (reg) {
161    case kSelf: res_reg = RegStorage::InvalidReg(); break;
162    case kSuspend: res_reg =  RegStorage::InvalidReg(); break;
163    case kLr: res_reg =  RegStorage::InvalidReg(); break;
164    case kPc: res_reg =  RegStorage::InvalidReg(); break;
165    case kSp: res_reg =  rs_rX86_SP; break;
166    case kArg0: res_reg = rs_rX86_ARG0; break;
167    case kArg1: res_reg = rs_rX86_ARG1; break;
168    case kArg2: res_reg = rs_rX86_ARG2; break;
169    case kArg3: res_reg = rs_rX86_ARG3; break;
170    case kFArg0: res_reg = rs_rX86_FARG0; break;
171    case kFArg1: res_reg = rs_rX86_FARG1; break;
172    case kFArg2: res_reg = rs_rX86_FARG2; break;
173    case kFArg3: res_reg = rs_rX86_FARG3; break;
174    case kRet0: res_reg = rs_rX86_RET0; break;
175    case kRet1: res_reg = rs_rX86_RET1; break;
176    case kInvokeTgt: res_reg = rs_rX86_INVOKE_TGT; break;
177    case kHiddenArg: res_reg = rs_rAX; break;
178    case kHiddenFpArg: res_reg = rs_fr0; break;
179    case kCount: res_reg = rs_rX86_COUNT; break;
180  }
181  return res_reg;
182}
183
184RegStorage X86Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
185  // For the 32-bit internal ABI, the first 3 arguments are passed in registers.
186  // TODO: This is not 64-bit compliant and depends on new internal ABI.
187  switch (arg_num) {
188    case 0:
189      return rs_rX86_ARG1;
190    case 1:
191      return rs_rX86_ARG2;
192    case 2:
193      return rs_rX86_ARG3;
194    default:
195      return RegStorage::InvalidReg();
196  }
197}
198
199/*
200 * Decode the register id.
201 */
202uint64_t X86Mir2Lir::GetRegMaskCommon(RegStorage reg) {
203  uint64_t seed;
204  int shift;
205  int reg_id;
206
207  reg_id = reg.GetRegNum();
208  /* Double registers in x86 are just a single FP register */
209  seed = 1;
210  /* FP register starts at bit position 16 */
211  shift = reg.IsFloat() ? kX86FPReg0 : 0;
212  /* Expand the double register id into single offset */
213  shift += reg_id;
214  return (seed << shift);
215}
216
217uint64_t X86Mir2Lir::GetPCUseDefEncoding() {
218  /*
219   * FIXME: might make sense to use a virtual resource encoding bit for pc.  Might be
220   * able to clean up some of the x86/Arm_Mips differences
221   */
222  LOG(FATAL) << "Unexpected call to GetPCUseDefEncoding for x86";
223  return 0ULL;
224}
225
226void X86Mir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags) {
227  DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64);
228  DCHECK(!lir->flags.use_def_invalid);
229
230  // X86-specific resource map setup here.
231  if (flags & REG_USE_SP) {
232    lir->u.m.use_mask |= ENCODE_X86_REG_SP;
233  }
234
235  if (flags & REG_DEF_SP) {
236    lir->u.m.def_mask |= ENCODE_X86_REG_SP;
237  }
238
239  if (flags & REG_DEFA) {
240    SetupRegMask(&lir->u.m.def_mask, rs_rAX.GetReg());
241  }
242
243  if (flags & REG_DEFD) {
244    SetupRegMask(&lir->u.m.def_mask, rs_rDX.GetReg());
245  }
246  if (flags & REG_USEA) {
247    SetupRegMask(&lir->u.m.use_mask, rs_rAX.GetReg());
248  }
249
250  if (flags & REG_USEC) {
251    SetupRegMask(&lir->u.m.use_mask, rs_rCX.GetReg());
252  }
253
254  if (flags & REG_USED) {
255    SetupRegMask(&lir->u.m.use_mask, rs_rDX.GetReg());
256  }
257
258  if (flags & REG_USEB) {
259    SetupRegMask(&lir->u.m.use_mask, rs_rBX.GetReg());
260  }
261
262  // Fixup hard to describe instruction: Uses rAX, rCX, rDI; sets rDI.
263  if (lir->opcode == kX86RepneScasw) {
264    SetupRegMask(&lir->u.m.use_mask, rs_rAX.GetReg());
265    SetupRegMask(&lir->u.m.use_mask, rs_rCX.GetReg());
266    SetupRegMask(&lir->u.m.use_mask, rs_rDI.GetReg());
267    SetupRegMask(&lir->u.m.def_mask, rs_rDI.GetReg());
268  }
269
270  if (flags & USE_FP_STACK) {
271    lir->u.m.use_mask |= ENCODE_X86_FP_STACK;
272    lir->u.m.def_mask |= ENCODE_X86_FP_STACK;
273  }
274}
275
276/* For dumping instructions */
277static const char* x86RegName[] = {
278  "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi",
279  "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
280};
281
282static const char* x86CondName[] = {
283  "O",
284  "NO",
285  "B/NAE/C",
286  "NB/AE/NC",
287  "Z/EQ",
288  "NZ/NE",
289  "BE/NA",
290  "NBE/A",
291  "S",
292  "NS",
293  "P/PE",
294  "NP/PO",
295  "L/NGE",
296  "NL/GE",
297  "LE/NG",
298  "NLE/G"
299};
300
301/*
302 * Interpret a format string and build a string no longer than size
303 * See format key in Assemble.cc.
304 */
305std::string X86Mir2Lir::BuildInsnString(const char *fmt, LIR *lir, unsigned char* base_addr) {
306  std::string buf;
307  size_t i = 0;
308  size_t fmt_len = strlen(fmt);
309  while (i < fmt_len) {
310    if (fmt[i] != '!') {
311      buf += fmt[i];
312      i++;
313    } else {
314      i++;
315      DCHECK_LT(i, fmt_len);
316      char operand_number_ch = fmt[i];
317      i++;
318      if (operand_number_ch == '!') {
319        buf += "!";
320      } else {
321        int operand_number = operand_number_ch - '0';
322        DCHECK_LT(operand_number, 6);  // Expect upto 6 LIR operands.
323        DCHECK_LT(i, fmt_len);
324        int operand = lir->operands[operand_number];
325        switch (fmt[i]) {
326          case 'c':
327            DCHECK_LT(static_cast<size_t>(operand), sizeof(x86CondName));
328            buf += x86CondName[operand];
329            break;
330          case 'd':
331            buf += StringPrintf("%d", operand);
332            break;
333          case 'p': {
334            EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(operand));
335            buf += StringPrintf("0x%08x", tab_rec->offset);
336            break;
337          }
338          case 'r':
339            if (RegStorage::IsFloat(operand)) {
340              int fp_reg = RegStorage::RegNum(operand);
341              buf += StringPrintf("xmm%d", fp_reg);
342            } else {
343              int reg_num = RegStorage::RegNum(operand);
344              DCHECK_LT(static_cast<size_t>(reg_num), sizeof(x86RegName));
345              buf += x86RegName[reg_num];
346            }
347            break;
348          case 't':
349            buf += StringPrintf("0x%08" PRIxPTR " (L%p)",
350                                reinterpret_cast<uintptr_t>(base_addr) + lir->offset + operand,
351                                lir->target);
352            break;
353          default:
354            buf += StringPrintf("DecodeError '%c'", fmt[i]);
355            break;
356        }
357        i++;
358      }
359    }
360  }
361  return buf;
362}
363
364void X86Mir2Lir::DumpResourceMask(LIR *x86LIR, uint64_t mask, const char *prefix) {
365  char buf[256];
366  buf[0] = 0;
367
368  if (mask == ENCODE_ALL) {
369    strcpy(buf, "all");
370  } else {
371    char num[8];
372    int i;
373
374    for (i = 0; i < kX86RegEnd; i++) {
375      if (mask & (1ULL << i)) {
376        snprintf(num, arraysize(num), "%d ", i);
377        strcat(buf, num);
378      }
379    }
380
381    if (mask & ENCODE_CCODE) {
382      strcat(buf, "cc ");
383    }
384    /* Memory bits */
385    if (x86LIR && (mask & ENCODE_DALVIK_REG)) {
386      snprintf(buf + strlen(buf), arraysize(buf) - strlen(buf), "dr%d%s",
387               DECODE_ALIAS_INFO_REG(x86LIR->flags.alias_info),
388               (DECODE_ALIAS_INFO_WIDE(x86LIR->flags.alias_info)) ? "(+1)" : "");
389    }
390    if (mask & ENCODE_LITERAL) {
391      strcat(buf, "lit ");
392    }
393
394    if (mask & ENCODE_HEAP_REF) {
395      strcat(buf, "heap ");
396    }
397    if (mask & ENCODE_MUST_NOT_ALIAS) {
398      strcat(buf, "noalias ");
399    }
400  }
401  if (buf[0]) {
402    LOG(INFO) << prefix << ": " <<  buf;
403  }
404}
405
406void X86Mir2Lir::AdjustSpillMask() {
407  // Adjustment for LR spilling, x86 has no LR so nothing to do here
408  core_spill_mask_ |= (1 << rs_rRET.GetRegNum());
409  num_core_spills_++;
410}
411
412/*
413 * Mark a callee-save fp register as promoted.  Note that
414 * vpush/vpop uses contiguous register lists so we must
415 * include any holes in the mask.  Associate holes with
416 * Dalvik register INVALID_VREG (0xFFFFU).
417 */
418void X86Mir2Lir::MarkPreservedSingle(int v_reg, RegStorage reg) {
419  UNIMPLEMENTED(FATAL) << "MarkPreservedSingle";
420}
421
422void X86Mir2Lir::MarkPreservedDouble(int v_reg, RegStorage reg) {
423  UNIMPLEMENTED(FATAL) << "MarkPreservedDouble";
424}
425
426/* Clobber all regs that might be used by an external C call */
427void X86Mir2Lir::ClobberCallerSave() {
428  Clobber(rs_rAX);
429  Clobber(rs_rCX);
430  Clobber(rs_rDX);
431  Clobber(rs_rBX);
432}
433
434RegLocation X86Mir2Lir::GetReturnWideAlt() {
435  RegLocation res = LocCReturnWide();
436  DCHECK(res.reg.GetLowReg() == rs_rAX.GetReg());
437  DCHECK(res.reg.GetHighReg() == rs_rDX.GetReg());
438  Clobber(rs_rAX);
439  Clobber(rs_rDX);
440  MarkInUse(rs_rAX);
441  MarkInUse(rs_rDX);
442  MarkWide(res.reg);
443  return res;
444}
445
446RegLocation X86Mir2Lir::GetReturnAlt() {
447  RegLocation res = LocCReturn();
448  res.reg.SetReg(rs_rDX.GetReg());
449  Clobber(rs_rDX);
450  MarkInUse(rs_rDX);
451  return res;
452}
453
454/* To be used when explicitly managing register use */
455void X86Mir2Lir::LockCallTemps() {
456  LockTemp(rs_rX86_ARG0);
457  LockTemp(rs_rX86_ARG1);
458  LockTemp(rs_rX86_ARG2);
459  LockTemp(rs_rX86_ARG3);
460}
461
462/* To be used when explicitly managing register use */
463void X86Mir2Lir::FreeCallTemps() {
464  FreeTemp(rs_rX86_ARG0);
465  FreeTemp(rs_rX86_ARG1);
466  FreeTemp(rs_rX86_ARG2);
467  FreeTemp(rs_rX86_ARG3);
468}
469
470bool X86Mir2Lir::ProvidesFullMemoryBarrier(X86OpCode opcode) {
471    switch (opcode) {
472      case kX86LockCmpxchgMR:
473      case kX86LockCmpxchgAR:
474      case kX86LockCmpxchg8bM:
475      case kX86LockCmpxchg8bA:
476      case kX86XchgMR:
477      case kX86Mfence:
478        // Atomic memory instructions provide full barrier.
479        return true;
480      default:
481        break;
482    }
483
484    // Conservative if cannot prove it provides full barrier.
485    return false;
486}
487
488void X86Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
489#if ANDROID_SMP != 0
490  // Start off with using the last LIR as the barrier. If it is not enough, then we will update it.
491  LIR* mem_barrier = last_lir_insn_;
492
493  /*
494   * According to the JSR-133 Cookbook, for x86 only StoreLoad barriers need memory fence. All other barriers
495   * (LoadLoad, LoadStore, StoreStore) are nops due to the x86 memory model. For those cases, all we need
496   * to ensure is that there is a scheduling barrier in place.
497   */
498  if (barrier_kind == kStoreLoad) {
499    // If no LIR exists already that can be used a barrier, then generate an mfence.
500    if (mem_barrier == nullptr) {
501      mem_barrier = NewLIR0(kX86Mfence);
502    }
503
504    // If last instruction does not provide full barrier, then insert an mfence.
505    if (ProvidesFullMemoryBarrier(static_cast<X86OpCode>(mem_barrier->opcode)) == false) {
506      mem_barrier = NewLIR0(kX86Mfence);
507    }
508  }
509
510  // Now ensure that a scheduling barrier is in place.
511  if (mem_barrier == nullptr) {
512    GenBarrier();
513  } else {
514    // Mark as a scheduling barrier.
515    DCHECK(!mem_barrier->flags.use_def_invalid);
516    mem_barrier->u.m.def_mask = ENCODE_ALL;
517  }
518#endif
519}
520
521// Alloc a pair of core registers, or a double.
522RegStorage X86Mir2Lir::AllocTypedTempWide(bool fp_hint, int reg_class) {
523  if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) {
524    return AllocTempDouble();
525  }
526  RegStorage low_reg = AllocTemp();
527  RegStorage high_reg = AllocTemp();
528  return RegStorage::MakeRegPair(low_reg, high_reg);
529}
530
531RegStorage X86Mir2Lir::AllocTypedTemp(bool fp_hint, int reg_class) {
532  if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) {
533    return AllocTempSingle();
534  }
535  return AllocTemp();
536}
537
538void X86Mir2Lir::CompilerInitializeRegAlloc() {
539  if (Gen64Bit()) {
540    reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs_64, sp_regs_64, dp_regs_64, reserved_regs_64,
541                                        core_temps_64, sp_temps_64, dp_temps_64);
542  } else {
543    reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs_32, sp_regs_32, dp_regs_32, reserved_regs_32,
544                                        core_temps_32, sp_temps_32, dp_temps_32);
545  }
546
547  // Target-specific adjustments.
548
549  // Alias single precision xmm to double xmms.
550  // TODO: as needed, add larger vector sizes - alias all to the largest.
551  GrowableArray<RegisterInfo*>::Iterator it(&reg_pool_->sp_regs_);
552  for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
553    int sp_reg_num = info->GetReg().GetRegNum();
554    RegStorage dp_reg = RegStorage::Solo64(RegStorage::kFloatingPoint | sp_reg_num);
555    RegisterInfo* dp_reg_info = GetRegInfo(dp_reg);
556    // 64-bit xmm vector register's master storage should refer to itself.
557    DCHECK_EQ(dp_reg_info, dp_reg_info->Master());
558    // Redirect 32-bit vector's master storage to 64-bit vector.
559    info->SetMaster(dp_reg_info);
560  }
561
562  // Don't start allocating temps at r0/s0/d0 or you may clobber return regs in early-exit methods.
563  // TODO: adjust for x86/hard float calling convention.
564  reg_pool_->next_core_reg_ = 2;
565  reg_pool_->next_sp_reg_ = 2;
566  reg_pool_->next_dp_reg_ = 1;
567}
568
569void X86Mir2Lir::FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free) {
570  DCHECK(rl_keep.wide);
571  DCHECK(rl_free.wide);
572  int free_low = rl_free.reg.GetLowReg();
573  int free_high = rl_free.reg.GetHighReg();
574  int keep_low = rl_keep.reg.GetLowReg();
575  int keep_high = rl_keep.reg.GetHighReg();
576  if ((free_low != keep_low) && (free_low != keep_high) &&
577      (free_high != keep_low) && (free_high != keep_high)) {
578    // No overlap, free both
579    FreeTemp(rl_free.reg);
580  }
581}
582
583void X86Mir2Lir::SpillCoreRegs() {
584  if (num_core_spills_ == 0) {
585    return;
586  }
587  // Spill mask not including fake return address register
588  uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum());
589  int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_);
590  for (int reg = 0; mask; mask >>= 1, reg++) {
591    if (mask & 0x1) {
592      StoreWordDisp(rs_rX86_SP, offset, RegStorage::Solo32(reg));
593      offset += GetInstructionSetPointerSize(cu_->instruction_set);
594    }
595  }
596}
597
598void X86Mir2Lir::UnSpillCoreRegs() {
599  if (num_core_spills_ == 0) {
600    return;
601  }
602  // Spill mask not including fake return address register
603  uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum());
604  int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_);
605  for (int reg = 0; mask; mask >>= 1, reg++) {
606    if (mask & 0x1) {
607      LoadWordDisp(rs_rX86_SP, offset, RegStorage::Solo32(reg));
608      offset += GetInstructionSetPointerSize(cu_->instruction_set);
609    }
610  }
611}
612
613bool X86Mir2Lir::IsUnconditionalBranch(LIR* lir) {
614  return (lir->opcode == kX86Jmp8 || lir->opcode == kX86Jmp32);
615}
616
617bool X86Mir2Lir::SupportsVolatileLoadStore(OpSize size) {
618  return true;
619}
620
621RegisterClass X86Mir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volatile) {
622  if (UNLIKELY(is_volatile)) {
623    // On x86, atomic 64-bit load/store requires an fp register.
624    // Smaller aligned load/store is atomic for both core and fp registers.
625    if (size == k64 || size == kDouble) {
626      return kFPReg;
627    }
628  }
629  return RegClassBySize(size);
630}
631
632X86Mir2Lir::X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena, bool gen64bit)
633    : Mir2Lir(cu, mir_graph, arena),
634      base_of_code_(nullptr), store_method_addr_(false), store_method_addr_used_(false),
635      method_address_insns_(arena, 100, kGrowableArrayMisc),
636      class_type_address_insns_(arena, 100, kGrowableArrayMisc),
637      call_method_insns_(arena, 100, kGrowableArrayMisc),
638      stack_decrement_(nullptr), stack_increment_(nullptr), gen64bit_(gen64bit) {
639  if (kIsDebugBuild) {
640    for (int i = 0; i < kX86Last; i++) {
641      if (X86Mir2Lir::EncodingMap[i].opcode != i) {
642        LOG(FATAL) << "Encoding order for " << X86Mir2Lir::EncodingMap[i].name
643            << " is wrong: expecting " << i << ", seeing "
644            << static_cast<int>(X86Mir2Lir::EncodingMap[i].opcode);
645      }
646    }
647  }
648  if (Gen64Bit()) {
649    rs_rX86_SP = rs_rX86_SP_64;
650
651    rs_rX86_ARG0 = rs_rDI;
652    rs_rX86_ARG1 = rs_rSI;
653    rs_rX86_ARG2 = rs_rDX;
654    rs_rX86_ARG3 = rs_rCX;
655    rX86_ARG0 = rDI;
656    rX86_ARG1 = rSI;
657    rX86_ARG2 = rDX;
658    rX86_ARG3 = rCX;
659    // TODO: ARG4(r8), ARG5(r9), floating point args.
660  } else {
661    rs_rX86_SP = rs_rX86_SP_32;
662
663    rs_rX86_ARG0 = rs_rAX;
664    rs_rX86_ARG1 = rs_rCX;
665    rs_rX86_ARG2 = rs_rDX;
666    rs_rX86_ARG3 = rs_rBX;
667    rX86_ARG0 = rAX;
668    rX86_ARG1 = rCX;
669    rX86_ARG2 = rDX;
670    rX86_ARG3 = rBX;
671  }
672  rs_rX86_FARG0 = rs_rAX;
673  rs_rX86_FARG1 = rs_rCX;
674  rs_rX86_FARG2 = rs_rDX;
675  rs_rX86_FARG3 = rs_rBX;
676  rs_rX86_RET0 = rs_rAX;
677  rs_rX86_RET1 = rs_rDX;
678  rs_rX86_INVOKE_TGT = rs_rAX;
679  rs_rX86_COUNT = rs_rCX;
680  rX86_FARG0 = rAX;
681  rX86_FARG1 = rCX;
682  rX86_FARG2 = rDX;
683  rX86_FARG3 = rBX;
684  rX86_RET0 = rAX;
685  rX86_RET1 = rDX;
686  rX86_INVOKE_TGT = rAX;
687  rX86_COUNT = rCX;
688}
689
690Mir2Lir* X86CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
691                          ArenaAllocator* const arena) {
692  return new X86Mir2Lir(cu, mir_graph, arena, false);
693}
694
695Mir2Lir* X86_64CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
696                          ArenaAllocator* const arena) {
697  return new X86Mir2Lir(cu, mir_graph, arena, true);
698}
699
700// Not used in x86
701RegStorage X86Mir2Lir::LoadHelper(ThreadOffset<4> offset) {
702  LOG(FATAL) << "Unexpected use of LoadHelper in x86";
703  return RegStorage::InvalidReg();
704}
705
706// Not used in x86
707RegStorage X86Mir2Lir::LoadHelper(ThreadOffset<8> offset) {
708  LOG(FATAL) << "Unexpected use of LoadHelper in x86";
709  return RegStorage::InvalidReg();
710}
711
712LIR* X86Mir2Lir::CheckSuspendUsingLoad() {
713  LOG(FATAL) << "Unexpected use of CheckSuspendUsingLoad in x86";
714  return nullptr;
715}
716
717uint64_t X86Mir2Lir::GetTargetInstFlags(int opcode) {
718  DCHECK(!IsPseudoLirOp(opcode));
719  return X86Mir2Lir::EncodingMap[opcode].flags;
720}
721
722const char* X86Mir2Lir::GetTargetInstName(int opcode) {
723  DCHECK(!IsPseudoLirOp(opcode));
724  return X86Mir2Lir::EncodingMap[opcode].name;
725}
726
727const char* X86Mir2Lir::GetTargetInstFmt(int opcode) {
728  DCHECK(!IsPseudoLirOp(opcode));
729  return X86Mir2Lir::EncodingMap[opcode].fmt;
730}
731
732void X86Mir2Lir::GenConstWide(RegLocation rl_dest, int64_t value) {
733  // Can we do this directly to memory?
734  rl_dest = UpdateLocWide(rl_dest);
735  if ((rl_dest.location == kLocDalvikFrame) ||
736      (rl_dest.location == kLocCompilerTemp)) {
737    int32_t val_lo = Low32Bits(value);
738    int32_t val_hi = High32Bits(value);
739    int r_base = TargetReg(kSp).GetReg();
740    int displacement = SRegOffset(rl_dest.s_reg_low);
741
742    LIR * store = NewLIR3(kX86Mov32MI, r_base, displacement + LOWORD_OFFSET, val_lo);
743    AnnotateDalvikRegAccess(store, (displacement + LOWORD_OFFSET) >> 2,
744                              false /* is_load */, true /* is64bit */);
745    store = NewLIR3(kX86Mov32MI, r_base, displacement + HIWORD_OFFSET, val_hi);
746    AnnotateDalvikRegAccess(store, (displacement + HIWORD_OFFSET) >> 2,
747                              false /* is_load */, true /* is64bit */);
748    return;
749  }
750
751  // Just use the standard code to do the generation.
752  Mir2Lir::GenConstWide(rl_dest, value);
753}
754
755// TODO: Merge with existing RegLocation dumper in vreg_analysis.cc
756void X86Mir2Lir::DumpRegLocation(RegLocation loc) {
757  LOG(INFO)  << "location: " << loc.location << ','
758             << (loc.wide ? " w" : "  ")
759             << (loc.defined ? " D" : "  ")
760             << (loc.is_const ? " c" : "  ")
761             << (loc.fp ? " F" : "  ")
762             << (loc.core ? " C" : "  ")
763             << (loc.ref ? " r" : "  ")
764             << (loc.high_word ? " h" : "  ")
765             << (loc.home ? " H" : "  ")
766             << ", low: " << static_cast<int>(loc.reg.GetLowReg())
767             << ", high: " << static_cast<int>(loc.reg.GetHighReg())
768             << ", s_reg: " << loc.s_reg_low
769             << ", orig: " << loc.orig_sreg;
770}
771
772void X86Mir2Lir::Materialize() {
773  // A good place to put the analysis before starting.
774  AnalyzeMIR();
775
776  // Now continue with regular code generation.
777  Mir2Lir::Materialize();
778}
779
780void X86Mir2Lir::LoadMethodAddress(const MethodReference& target_method, InvokeType type,
781                                   SpecialTargetRegister symbolic_reg) {
782  /*
783   * For x86, just generate a 32 bit move immediate instruction, that will be filled
784   * in at 'link time'.  For now, put a unique value based on target to ensure that
785   * code deduplication works.
786   */
787  int target_method_idx = target_method.dex_method_index;
788  const DexFile* target_dex_file = target_method.dex_file;
789  const DexFile::MethodId& target_method_id = target_dex_file->GetMethodId(target_method_idx);
790  uintptr_t target_method_id_ptr = reinterpret_cast<uintptr_t>(&target_method_id);
791
792  // Generate the move instruction with the unique pointer and save index, dex_file, and type.
793  LIR *move = RawLIR(current_dalvik_offset_, kX86Mov32RI, TargetReg(symbolic_reg).GetReg(),
794                     static_cast<int>(target_method_id_ptr), target_method_idx,
795                     WrapPointer(const_cast<DexFile*>(target_dex_file)), type);
796  AppendLIR(move);
797  method_address_insns_.Insert(move);
798}
799
800void X86Mir2Lir::LoadClassType(uint32_t type_idx, SpecialTargetRegister symbolic_reg) {
801  /*
802   * For x86, just generate a 32 bit move immediate instruction, that will be filled
803   * in at 'link time'.  For now, put a unique value based on target to ensure that
804   * code deduplication works.
805   */
806  const DexFile::TypeId& id = cu_->dex_file->GetTypeId(type_idx);
807  uintptr_t ptr = reinterpret_cast<uintptr_t>(&id);
808
809  // Generate the move instruction with the unique pointer and save index and type.
810  LIR *move = RawLIR(current_dalvik_offset_, kX86Mov32RI, TargetReg(symbolic_reg).GetReg(),
811                     static_cast<int>(ptr), type_idx);
812  AppendLIR(move);
813  class_type_address_insns_.Insert(move);
814}
815
816LIR *X86Mir2Lir::CallWithLinkerFixup(const MethodReference& target_method, InvokeType type) {
817  /*
818   * For x86, just generate a 32 bit call relative instruction, that will be filled
819   * in at 'link time'.  For now, put a unique value based on target to ensure that
820   * code deduplication works.
821   */
822  int target_method_idx = target_method.dex_method_index;
823  const DexFile* target_dex_file = target_method.dex_file;
824  const DexFile::MethodId& target_method_id = target_dex_file->GetMethodId(target_method_idx);
825  uintptr_t target_method_id_ptr = reinterpret_cast<uintptr_t>(&target_method_id);
826
827  // Generate the call instruction with the unique pointer and save index, dex_file, and type.
828  LIR *call = RawLIR(current_dalvik_offset_, kX86CallI, static_cast<int>(target_method_id_ptr),
829                     target_method_idx, WrapPointer(const_cast<DexFile*>(target_dex_file)), type);
830  AppendLIR(call);
831  call_method_insns_.Insert(call);
832  return call;
833}
834
835void X86Mir2Lir::InstallLiteralPools() {
836  // These are handled differently for x86.
837  DCHECK(code_literal_list_ == nullptr);
838  DCHECK(method_literal_list_ == nullptr);
839  DCHECK(class_literal_list_ == nullptr);
840
841  // Handle the fixups for methods.
842  for (uint32_t i = 0; i < method_address_insns_.Size(); i++) {
843      LIR* p = method_address_insns_.Get(i);
844      DCHECK_EQ(p->opcode, kX86Mov32RI);
845      uint32_t target_method_idx = p->operands[2];
846      const DexFile* target_dex_file =
847          reinterpret_cast<const DexFile*>(UnwrapPointer(p->operands[3]));
848
849      // The offset to patch is the last 4 bytes of the instruction.
850      int patch_offset = p->offset + p->flags.size - 4;
851      cu_->compiler_driver->AddMethodPatch(cu_->dex_file, cu_->class_def_idx,
852                                           cu_->method_idx, cu_->invoke_type,
853                                           target_method_idx, target_dex_file,
854                                           static_cast<InvokeType>(p->operands[4]),
855                                           patch_offset);
856  }
857
858  // Handle the fixups for class types.
859  for (uint32_t i = 0; i < class_type_address_insns_.Size(); i++) {
860      LIR* p = class_type_address_insns_.Get(i);
861      DCHECK_EQ(p->opcode, kX86Mov32RI);
862      uint32_t target_method_idx = p->operands[2];
863
864      // The offset to patch is the last 4 bytes of the instruction.
865      int patch_offset = p->offset + p->flags.size - 4;
866      cu_->compiler_driver->AddClassPatch(cu_->dex_file, cu_->class_def_idx,
867                                          cu_->method_idx, target_method_idx, patch_offset);
868  }
869
870  // And now the PC-relative calls to methods.
871  for (uint32_t i = 0; i < call_method_insns_.Size(); i++) {
872      LIR* p = call_method_insns_.Get(i);
873      DCHECK_EQ(p->opcode, kX86CallI);
874      uint32_t target_method_idx = p->operands[1];
875      const DexFile* target_dex_file =
876          reinterpret_cast<const DexFile*>(UnwrapPointer(p->operands[2]));
877
878      // The offset to patch is the last 4 bytes of the instruction.
879      int patch_offset = p->offset + p->flags.size - 4;
880      cu_->compiler_driver->AddRelativeCodePatch(cu_->dex_file, cu_->class_def_idx,
881                                                 cu_->method_idx, cu_->invoke_type,
882                                                 target_method_idx, target_dex_file,
883                                                 static_cast<InvokeType>(p->operands[3]),
884                                                 patch_offset, -4 /* offset */);
885  }
886
887  // And do the normal processing.
888  Mir2Lir::InstallLiteralPools();
889}
890
891/*
892 * Fast string.index_of(I) & (II).  Inline check for simple case of char <= 0xffff,
893 * otherwise bails to standard library code.
894 */
895bool X86Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) {
896  ClobberCallerSave();
897  LockCallTemps();  // Using fixed registers
898
899  // EAX: 16 bit character being searched.
900  // ECX: count: number of words to be searched.
901  // EDI: String being searched.
902  // EDX: temporary during execution.
903  // EBX: temporary during execution.
904
905  RegLocation rl_obj = info->args[0];
906  RegLocation rl_char = info->args[1];
907  RegLocation rl_start;  // Note: only present in III flavor or IndexOf.
908
909  uint32_t char_value =
910    rl_char.is_const ? mir_graph_->ConstantValue(rl_char.orig_sreg) : 0;
911
912  if (char_value > 0xFFFF) {
913    // We have to punt to the real String.indexOf.
914    return false;
915  }
916
917  // Okay, we are commited to inlining this.
918  RegLocation rl_return = GetReturn(false);
919  RegLocation rl_dest = InlineTarget(info);
920
921  // Is the string non-NULL?
922  LoadValueDirectFixed(rl_obj, rs_rDX);
923  GenNullCheck(rs_rDX, info->opt_flags);
924  info->opt_flags |= MIR_IGNORE_NULL_CHECK;  // Record that we've null checked.
925
926  // Does the character fit in 16 bits?
927  LIR* slowpath_branch = nullptr;
928  if (rl_char.is_const) {
929    // We need the value in EAX.
930    LoadConstantNoClobber(rs_rAX, char_value);
931  } else {
932    // Character is not a constant; compare at runtime.
933    LoadValueDirectFixed(rl_char, rs_rAX);
934    slowpath_branch = OpCmpImmBranch(kCondGt, rs_rAX, 0xFFFF, nullptr);
935  }
936
937  // From here down, we know that we are looking for a char that fits in 16 bits.
938  // Location of reference to data array within the String object.
939  int value_offset = mirror::String::ValueOffset().Int32Value();
940  // Location of count within the String object.
941  int count_offset = mirror::String::CountOffset().Int32Value();
942  // Starting offset within data array.
943  int offset_offset = mirror::String::OffsetOffset().Int32Value();
944  // Start of char data with array_.
945  int data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value();
946
947  // Character is in EAX.
948  // Object pointer is in EDX.
949
950  // We need to preserve EDI, but have no spare registers, so push it on the stack.
951  // We have to remember that all stack addresses after this are offset by sizeof(EDI).
952  NewLIR1(kX86Push32R, rs_rDI.GetReg());
953
954  // Compute the number of words to search in to rCX.
955  Load32Disp(rs_rDX, count_offset, rs_rCX);
956  LIR *length_compare = nullptr;
957  int start_value = 0;
958  bool is_index_on_stack = false;
959  if (zero_based) {
960    // We have to handle an empty string.  Use special instruction JECXZ.
961    length_compare = NewLIR0(kX86Jecxz8);
962  } else {
963    rl_start = info->args[2];
964    // We have to offset by the start index.
965    if (rl_start.is_const) {
966      start_value = mir_graph_->ConstantValue(rl_start.orig_sreg);
967      start_value = std::max(start_value, 0);
968
969      // Is the start > count?
970      length_compare = OpCmpImmBranch(kCondLe, rs_rCX, start_value, nullptr);
971
972      if (start_value != 0) {
973        OpRegImm(kOpSub, rs_rCX, start_value);
974      }
975    } else {
976      // Runtime start index.
977      rl_start = UpdateLocTyped(rl_start, kCoreReg);
978      if (rl_start.location == kLocPhysReg) {
979        // Handle "start index < 0" case.
980        OpRegReg(kOpXor, rs_rBX, rs_rBX);
981        OpRegReg(kOpCmp, rl_start.reg, rs_rBX);
982        OpCondRegReg(kOpCmov, kCondLt, rl_start.reg, rs_rBX);
983
984        // The length of the string should be greater than the start index.
985        length_compare = OpCmpBranch(kCondLe, rs_rCX, rl_start.reg, nullptr);
986        OpRegReg(kOpSub, rs_rCX, rl_start.reg);
987        if (rl_start.reg == rs_rDI) {
988          // The special case. We will use EDI further, so lets put start index to stack.
989          NewLIR1(kX86Push32R, rs_rDI.GetReg());
990          is_index_on_stack = true;
991        }
992      } else {
993        // Load the start index from stack, remembering that we pushed EDI.
994        int displacement = SRegOffset(rl_start.s_reg_low) + sizeof(uint32_t);
995        Load32Disp(rs_rX86_SP, displacement, rs_rBX);
996        OpRegReg(kOpXor, rs_rDI, rs_rDI);
997        OpRegReg(kOpCmp, rs_rBX, rs_rDI);
998        OpCondRegReg(kOpCmov, kCondLt, rs_rBX, rs_rDI);
999
1000        length_compare = OpCmpBranch(kCondLe, rs_rCX, rs_rBX, nullptr);
1001        OpRegReg(kOpSub, rs_rCX, rs_rBX);
1002        // Put the start index to stack.
1003        NewLIR1(kX86Push32R, rs_rBX.GetReg());
1004        is_index_on_stack = true;
1005      }
1006    }
1007  }
1008  DCHECK(length_compare != nullptr);
1009
1010  // ECX now contains the count in words to be searched.
1011
1012  // Load the address of the string into EBX.
1013  // The string starts at VALUE(String) + 2 * OFFSET(String) + DATA_OFFSET.
1014  Load32Disp(rs_rDX, value_offset, rs_rDI);
1015  Load32Disp(rs_rDX, offset_offset, rs_rBX);
1016  OpLea(rs_rBX, rs_rDI, rs_rBX, 1, data_offset);
1017
1018  // Now compute into EDI where the search will start.
1019  if (zero_based || rl_start.is_const) {
1020    if (start_value == 0) {
1021      OpRegCopy(rs_rDI, rs_rBX);
1022    } else {
1023      NewLIR3(kX86Lea32RM, rs_rDI.GetReg(), rs_rBX.GetReg(), 2 * start_value);
1024    }
1025  } else {
1026    if (is_index_on_stack == true) {
1027      // Load the start index from stack.
1028      NewLIR1(kX86Pop32R, rs_rDX.GetReg());
1029      OpLea(rs_rDI, rs_rBX, rs_rDX, 1, 0);
1030    } else {
1031      OpLea(rs_rDI, rs_rBX, rl_start.reg, 1, 0);
1032    }
1033  }
1034
1035  // EDI now contains the start of the string to be searched.
1036  // We are all prepared to do the search for the character.
1037  NewLIR0(kX86RepneScasw);
1038
1039  // Did we find a match?
1040  LIR* failed_branch = OpCondBranch(kCondNe, nullptr);
1041
1042  // yes, we matched.  Compute the index of the result.
1043  // index = ((curr_ptr - orig_ptr) / 2) - 1.
1044  OpRegReg(kOpSub, rs_rDI, rs_rBX);
1045  OpRegImm(kOpAsr, rs_rDI, 1);
1046  NewLIR3(kX86Lea32RM, rl_return.reg.GetReg(), rs_rDI.GetReg(), -1);
1047  LIR *all_done = NewLIR1(kX86Jmp8, 0);
1048
1049  // Failed to match; return -1.
1050  LIR *not_found = NewLIR0(kPseudoTargetLabel);
1051  length_compare->target = not_found;
1052  failed_branch->target = not_found;
1053  LoadConstantNoClobber(rl_return.reg, -1);
1054
1055  // And join up at the end.
1056  all_done->target = NewLIR0(kPseudoTargetLabel);
1057  // Restore EDI from the stack.
1058  NewLIR1(kX86Pop32R, rs_rDI.GetReg());
1059
1060  // Out of line code returns here.
1061  if (slowpath_branch != nullptr) {
1062    LIR *return_point = NewLIR0(kPseudoTargetLabel);
1063    AddIntrinsicSlowPath(info, slowpath_branch, return_point);
1064  }
1065
1066  StoreValue(rl_dest, rl_return);
1067  return true;
1068}
1069
1070/*
1071 * @brief Enter a 32 bit quantity into the FDE buffer
1072 * @param buf FDE buffer.
1073 * @param data Data value.
1074 */
1075static void PushWord(std::vector<uint8_t>&buf, int data) {
1076  buf.push_back(data & 0xff);
1077  buf.push_back((data >> 8) & 0xff);
1078  buf.push_back((data >> 16) & 0xff);
1079  buf.push_back((data >> 24) & 0xff);
1080}
1081
1082/*
1083 * @brief Enter an 'advance LOC' into the FDE buffer
1084 * @param buf FDE buffer.
1085 * @param increment Amount by which to increase the current location.
1086 */
1087static void AdvanceLoc(std::vector<uint8_t>&buf, uint32_t increment) {
1088  if (increment < 64) {
1089    // Encoding in opcode.
1090    buf.push_back(0x1 << 6 | increment);
1091  } else if (increment < 256) {
1092    // Single byte delta.
1093    buf.push_back(0x02);
1094    buf.push_back(increment);
1095  } else if (increment < 256 * 256) {
1096    // Two byte delta.
1097    buf.push_back(0x03);
1098    buf.push_back(increment & 0xff);
1099    buf.push_back((increment >> 8) & 0xff);
1100  } else {
1101    // Four byte delta.
1102    buf.push_back(0x04);
1103    PushWord(buf, increment);
1104  }
1105}
1106
1107
1108std::vector<uint8_t>* X86CFIInitialization() {
1109  return X86Mir2Lir::ReturnCommonCallFrameInformation();
1110}
1111
1112std::vector<uint8_t>* X86Mir2Lir::ReturnCommonCallFrameInformation() {
1113  std::vector<uint8_t>*cfi_info = new std::vector<uint8_t>;
1114
1115  // Length of the CIE (except for this field).
1116  PushWord(*cfi_info, 16);
1117
1118  // CIE id.
1119  PushWord(*cfi_info, 0xFFFFFFFFU);
1120
1121  // Version: 3.
1122  cfi_info->push_back(0x03);
1123
1124  // Augmentation: empty string.
1125  cfi_info->push_back(0x0);
1126
1127  // Code alignment: 1.
1128  cfi_info->push_back(0x01);
1129
1130  // Data alignment: -4.
1131  cfi_info->push_back(0x7C);
1132
1133  // Return address register (R8).
1134  cfi_info->push_back(0x08);
1135
1136  // Initial return PC is 4(ESP): DW_CFA_def_cfa R4 4.
1137  cfi_info->push_back(0x0C);
1138  cfi_info->push_back(0x04);
1139  cfi_info->push_back(0x04);
1140
1141  // Return address location: 0(SP): DW_CFA_offset R8 1 (* -4);.
1142  cfi_info->push_back(0x2 << 6 | 0x08);
1143  cfi_info->push_back(0x01);
1144
1145  // And 2 Noops to align to 4 byte boundary.
1146  cfi_info->push_back(0x0);
1147  cfi_info->push_back(0x0);
1148
1149  DCHECK_EQ(cfi_info->size() & 3, 0U);
1150  return cfi_info;
1151}
1152
1153static void EncodeUnsignedLeb128(std::vector<uint8_t>& buf, uint32_t value) {
1154  uint8_t buffer[12];
1155  uint8_t *ptr = EncodeUnsignedLeb128(buffer, value);
1156  for (uint8_t *p = buffer; p < ptr; p++) {
1157    buf.push_back(*p);
1158  }
1159}
1160
1161std::vector<uint8_t>* X86Mir2Lir::ReturnCallFrameInformation() {
1162  std::vector<uint8_t>*cfi_info = new std::vector<uint8_t>;
1163
1164  // Generate the FDE for the method.
1165  DCHECK_NE(data_offset_, 0U);
1166
1167  // Length (will be filled in later in this routine).
1168  PushWord(*cfi_info, 0);
1169
1170  // CIE_pointer (can be filled in by linker); might be left at 0 if there is only
1171  // one CIE for the whole debug_frame section.
1172  PushWord(*cfi_info, 0);
1173
1174  // 'initial_location' (filled in by linker).
1175  PushWord(*cfi_info, 0);
1176
1177  // 'address_range' (number of bytes in the method).
1178  PushWord(*cfi_info, data_offset_);
1179
1180  // The instructions in the FDE.
1181  if (stack_decrement_ != nullptr) {
1182    // Advance LOC to just past the stack decrement.
1183    uint32_t pc = NEXT_LIR(stack_decrement_)->offset;
1184    AdvanceLoc(*cfi_info, pc);
1185
1186    // Now update the offset to the call frame: DW_CFA_def_cfa_offset frame_size.
1187    cfi_info->push_back(0x0e);
1188    EncodeUnsignedLeb128(*cfi_info, frame_size_);
1189
1190    // We continue with that stack until the epilogue.
1191    if (stack_increment_ != nullptr) {
1192      uint32_t new_pc = NEXT_LIR(stack_increment_)->offset;
1193      AdvanceLoc(*cfi_info, new_pc - pc);
1194
1195      // We probably have code snippets after the epilogue, so save the
1196      // current state: DW_CFA_remember_state.
1197      cfi_info->push_back(0x0a);
1198
1199      // We have now popped the stack: DW_CFA_def_cfa_offset 4.  There is only the return
1200      // PC on the stack now.
1201      cfi_info->push_back(0x0e);
1202      EncodeUnsignedLeb128(*cfi_info, 4);
1203
1204      // Everything after that is the same as before the epilogue.
1205      // Stack bump was followed by RET instruction.
1206      LIR *post_ret_insn = NEXT_LIR(NEXT_LIR(stack_increment_));
1207      if (post_ret_insn != nullptr) {
1208        pc = new_pc;
1209        new_pc = post_ret_insn->offset;
1210        AdvanceLoc(*cfi_info, new_pc - pc);
1211        // Restore the state: DW_CFA_restore_state.
1212        cfi_info->push_back(0x0b);
1213      }
1214    }
1215  }
1216
1217  // Padding to a multiple of 4
1218  while ((cfi_info->size() & 3) != 0) {
1219    // DW_CFA_nop is encoded as 0.
1220    cfi_info->push_back(0);
1221  }
1222
1223  // Set the length of the FDE inside the generated bytes.
1224  uint32_t length = cfi_info->size() - 4;
1225  (*cfi_info)[0] = length;
1226  (*cfi_info)[1] = length >> 8;
1227  (*cfi_info)[2] = length >> 16;
1228  (*cfi_info)[3] = length >> 24;
1229  return cfi_info;
1230}
1231
1232}  // namespace art
1233