target_x86.cc revision 091cc408e9dc87e60fb64c61e186bea568fc3d3a
1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <string>
18#include <inttypes.h>
19
20#include "codegen_x86.h"
21#include "dex/compiler_internals.h"
22#include "dex/quick/mir_to_lir-inl.h"
23#include "mirror/array.h"
24#include "mirror/string.h"
25#include "x86_lir.h"
26
27namespace art {
28
29static const RegStorage core_regs_arr[] = {
30    rs_rAX, rs_rCX, rs_rDX, rs_rBX, rs_rX86_SP, rs_rBP, rs_rSI, rs_rDI
31#ifdef TARGET_REX_SUPPORT
32    rs_r8, rs_r9, rs_r10, rs_r11, rs_r12, rs_r13, rs_r14, rs_r15
33#endif
34};
35static const RegStorage sp_regs_arr[] = {
36    rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
37#ifdef TARGET_REX_SUPPORT
38    rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15
39#endif
40};
41static const RegStorage dp_regs_arr[] = {
42    rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
43#ifdef TARGET_REX_SUPPORT
44    rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15
45#endif
46};
47static const RegStorage reserved_regs_arr[] = {rs_rX86_SP};
48static const RegStorage core_temps_arr[] = {rs_rAX, rs_rCX, rs_rDX, rs_rBX};
49static const RegStorage sp_temps_arr[] = {
50    rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
51#ifdef TARGET_REX_SUPPORT
52    rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15
53#endif
54};
55static const RegStorage dp_temps_arr[] = {
56    rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
57#ifdef TARGET_REX_SUPPORT
58    rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15
59#endif
60};
61
62static const std::vector<RegStorage> core_regs(core_regs_arr,
63    core_regs_arr + sizeof(core_regs_arr) / sizeof(core_regs_arr[0]));
64static const std::vector<RegStorage> sp_regs(sp_regs_arr,
65    sp_regs_arr + sizeof(sp_regs_arr) / sizeof(sp_regs_arr[0]));
66static const std::vector<RegStorage> dp_regs(dp_regs_arr,
67    dp_regs_arr + sizeof(dp_regs_arr) / sizeof(dp_regs_arr[0]));
68static const std::vector<RegStorage> reserved_regs(reserved_regs_arr,
69    reserved_regs_arr + sizeof(reserved_regs_arr) / sizeof(reserved_regs_arr[0]));
70static const std::vector<RegStorage> core_temps(core_temps_arr,
71    core_temps_arr + sizeof(core_temps_arr) / sizeof(core_temps_arr[0]));
72static const std::vector<RegStorage> sp_temps(sp_temps_arr,
73    sp_temps_arr + sizeof(sp_temps_arr) / sizeof(sp_temps_arr[0]));
74static const std::vector<RegStorage> dp_temps(dp_temps_arr,
75    dp_temps_arr + sizeof(dp_temps_arr) / sizeof(dp_temps_arr[0]));
76
77RegLocation X86Mir2Lir::LocCReturn() {
78  return x86_loc_c_return;
79}
80
81RegLocation X86Mir2Lir::LocCReturnWide() {
82  return x86_loc_c_return_wide;
83}
84
85RegLocation X86Mir2Lir::LocCReturnFloat() {
86  return x86_loc_c_return_float;
87}
88
89RegLocation X86Mir2Lir::LocCReturnDouble() {
90  return x86_loc_c_return_double;
91}
92
93// Return a target-dependent special register.
94RegStorage X86Mir2Lir::TargetReg(SpecialTargetRegister reg) {
95  RegStorage res_reg = RegStorage::InvalidReg();
96  switch (reg) {
97    case kSelf: res_reg = RegStorage::InvalidReg(); break;
98    case kSuspend: res_reg =  RegStorage::InvalidReg(); break;
99    case kLr: res_reg =  RegStorage::InvalidReg(); break;
100    case kPc: res_reg =  RegStorage::InvalidReg(); break;
101    case kSp: res_reg =  rs_rX86_SP; break;
102    case kArg0: res_reg = rs_rX86_ARG0; break;
103    case kArg1: res_reg = rs_rX86_ARG1; break;
104    case kArg2: res_reg = rs_rX86_ARG2; break;
105    case kArg3: res_reg = rs_rX86_ARG3; break;
106    case kFArg0: res_reg = rs_rX86_FARG0; break;
107    case kFArg1: res_reg = rs_rX86_FARG1; break;
108    case kFArg2: res_reg = rs_rX86_FARG2; break;
109    case kFArg3: res_reg = rs_rX86_FARG3; break;
110    case kRet0: res_reg = rs_rX86_RET0; break;
111    case kRet1: res_reg = rs_rX86_RET1; break;
112    case kInvokeTgt: res_reg = rs_rX86_INVOKE_TGT; break;
113    case kHiddenArg: res_reg = rs_rAX; break;
114    case kHiddenFpArg: res_reg = rs_fr0; break;
115    case kCount: res_reg = rs_rX86_COUNT; break;
116  }
117  return res_reg;
118}
119
120RegStorage X86Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
121  // For the 32-bit internal ABI, the first 3 arguments are passed in registers.
122  // TODO: This is not 64-bit compliant and depends on new internal ABI.
123  switch (arg_num) {
124    case 0:
125      return rs_rX86_ARG1;
126    case 1:
127      return rs_rX86_ARG2;
128    case 2:
129      return rs_rX86_ARG3;
130    default:
131      return RegStorage::InvalidReg();
132  }
133}
134
135/*
136 * Decode the register id.
137 */
138uint64_t X86Mir2Lir::GetRegMaskCommon(RegStorage reg) {
139  uint64_t seed;
140  int shift;
141  int reg_id;
142
143  reg_id = reg.GetRegNum();
144  /* Double registers in x86 are just a single FP register */
145  seed = 1;
146  /* FP register starts at bit position 16 */
147  shift = reg.IsFloat() ? kX86FPReg0 : 0;
148  /* Expand the double register id into single offset */
149  shift += reg_id;
150  return (seed << shift);
151}
152
153uint64_t X86Mir2Lir::GetPCUseDefEncoding() {
154  /*
155   * FIXME: might make sense to use a virtual resource encoding bit for pc.  Might be
156   * able to clean up some of the x86/Arm_Mips differences
157   */
158  LOG(FATAL) << "Unexpected call to GetPCUseDefEncoding for x86";
159  return 0ULL;
160}
161
162void X86Mir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags) {
163  DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64);
164  DCHECK(!lir->flags.use_def_invalid);
165
166  // X86-specific resource map setup here.
167  if (flags & REG_USE_SP) {
168    lir->u.m.use_mask |= ENCODE_X86_REG_SP;
169  }
170
171  if (flags & REG_DEF_SP) {
172    lir->u.m.def_mask |= ENCODE_X86_REG_SP;
173  }
174
175  if (flags & REG_DEFA) {
176    SetupRegMask(&lir->u.m.def_mask, rs_rAX.GetReg());
177  }
178
179  if (flags & REG_DEFD) {
180    SetupRegMask(&lir->u.m.def_mask, rs_rDX.GetReg());
181  }
182  if (flags & REG_USEA) {
183    SetupRegMask(&lir->u.m.use_mask, rs_rAX.GetReg());
184  }
185
186  if (flags & REG_USEC) {
187    SetupRegMask(&lir->u.m.use_mask, rs_rCX.GetReg());
188  }
189
190  if (flags & REG_USED) {
191    SetupRegMask(&lir->u.m.use_mask, rs_rDX.GetReg());
192  }
193
194  if (flags & REG_USEB) {
195    SetupRegMask(&lir->u.m.use_mask, rs_rBX.GetReg());
196  }
197
198  // Fixup hard to describe instruction: Uses rAX, rCX, rDI; sets rDI.
199  if (lir->opcode == kX86RepneScasw) {
200    SetupRegMask(&lir->u.m.use_mask, rs_rAX.GetReg());
201    SetupRegMask(&lir->u.m.use_mask, rs_rCX.GetReg());
202    SetupRegMask(&lir->u.m.use_mask, rs_rDI.GetReg());
203    SetupRegMask(&lir->u.m.def_mask, rs_rDI.GetReg());
204  }
205
206  if (flags & USE_FP_STACK) {
207    lir->u.m.use_mask |= ENCODE_X86_FP_STACK;
208    lir->u.m.def_mask |= ENCODE_X86_FP_STACK;
209  }
210}
211
212/* For dumping instructions */
213static const char* x86RegName[] = {
214  "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi",
215  "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
216};
217
218static const char* x86CondName[] = {
219  "O",
220  "NO",
221  "B/NAE/C",
222  "NB/AE/NC",
223  "Z/EQ",
224  "NZ/NE",
225  "BE/NA",
226  "NBE/A",
227  "S",
228  "NS",
229  "P/PE",
230  "NP/PO",
231  "L/NGE",
232  "NL/GE",
233  "LE/NG",
234  "NLE/G"
235};
236
237/*
238 * Interpret a format string and build a string no longer than size
239 * See format key in Assemble.cc.
240 */
241std::string X86Mir2Lir::BuildInsnString(const char *fmt, LIR *lir, unsigned char* base_addr) {
242  std::string buf;
243  size_t i = 0;
244  size_t fmt_len = strlen(fmt);
245  while (i < fmt_len) {
246    if (fmt[i] != '!') {
247      buf += fmt[i];
248      i++;
249    } else {
250      i++;
251      DCHECK_LT(i, fmt_len);
252      char operand_number_ch = fmt[i];
253      i++;
254      if (operand_number_ch == '!') {
255        buf += "!";
256      } else {
257        int operand_number = operand_number_ch - '0';
258        DCHECK_LT(operand_number, 6);  // Expect upto 6 LIR operands.
259        DCHECK_LT(i, fmt_len);
260        int operand = lir->operands[operand_number];
261        switch (fmt[i]) {
262          case 'c':
263            DCHECK_LT(static_cast<size_t>(operand), sizeof(x86CondName));
264            buf += x86CondName[operand];
265            break;
266          case 'd':
267            buf += StringPrintf("%d", operand);
268            break;
269          case 'p': {
270            EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(operand));
271            buf += StringPrintf("0x%08x", tab_rec->offset);
272            break;
273          }
274          case 'r':
275            if (RegStorage::IsFloat(operand)) {
276              int fp_reg = RegStorage::RegNum(operand);
277              buf += StringPrintf("xmm%d", fp_reg);
278            } else {
279              int reg_num = RegStorage::RegNum(operand);
280              DCHECK_LT(static_cast<size_t>(reg_num), sizeof(x86RegName));
281              buf += x86RegName[reg_num];
282            }
283            break;
284          case 't':
285            buf += StringPrintf("0x%08" PRIxPTR " (L%p)",
286                                reinterpret_cast<uintptr_t>(base_addr) + lir->offset + operand,
287                                lir->target);
288            break;
289          default:
290            buf += StringPrintf("DecodeError '%c'", fmt[i]);
291            break;
292        }
293        i++;
294      }
295    }
296  }
297  return buf;
298}
299
300void X86Mir2Lir::DumpResourceMask(LIR *x86LIR, uint64_t mask, const char *prefix) {
301  char buf[256];
302  buf[0] = 0;
303
304  if (mask == ENCODE_ALL) {
305    strcpy(buf, "all");
306  } else {
307    char num[8];
308    int i;
309
310    for (i = 0; i < kX86RegEnd; i++) {
311      if (mask & (1ULL << i)) {
312        snprintf(num, arraysize(num), "%d ", i);
313        strcat(buf, num);
314      }
315    }
316
317    if (mask & ENCODE_CCODE) {
318      strcat(buf, "cc ");
319    }
320    /* Memory bits */
321    if (x86LIR && (mask & ENCODE_DALVIK_REG)) {
322      snprintf(buf + strlen(buf), arraysize(buf) - strlen(buf), "dr%d%s",
323               DECODE_ALIAS_INFO_REG(x86LIR->flags.alias_info),
324               (DECODE_ALIAS_INFO_WIDE(x86LIR->flags.alias_info)) ? "(+1)" : "");
325    }
326    if (mask & ENCODE_LITERAL) {
327      strcat(buf, "lit ");
328    }
329
330    if (mask & ENCODE_HEAP_REF) {
331      strcat(buf, "heap ");
332    }
333    if (mask & ENCODE_MUST_NOT_ALIAS) {
334      strcat(buf, "noalias ");
335    }
336  }
337  if (buf[0]) {
338    LOG(INFO) << prefix << ": " <<  buf;
339  }
340}
341
342void X86Mir2Lir::AdjustSpillMask() {
343  // Adjustment for LR spilling, x86 has no LR so nothing to do here
344  core_spill_mask_ |= (1 << rs_rRET.GetRegNum());
345  num_core_spills_++;
346}
347
348/*
349 * Mark a callee-save fp register as promoted.  Note that
350 * vpush/vpop uses contiguous register lists so we must
351 * include any holes in the mask.  Associate holes with
352 * Dalvik register INVALID_VREG (0xFFFFU).
353 */
354void X86Mir2Lir::MarkPreservedSingle(int v_reg, RegStorage reg) {
355  UNIMPLEMENTED(FATAL) << "MarkPreservedSingle";
356}
357
358void X86Mir2Lir::MarkPreservedDouble(int v_reg, RegStorage reg) {
359  UNIMPLEMENTED(FATAL) << "MarkPreservedDouble";
360}
361
362/* Clobber all regs that might be used by an external C call */
363void X86Mir2Lir::ClobberCallerSave() {
364  Clobber(rs_rAX);
365  Clobber(rs_rCX);
366  Clobber(rs_rDX);
367  Clobber(rs_rBX);
368}
369
370RegLocation X86Mir2Lir::GetReturnWideAlt() {
371  RegLocation res = LocCReturnWide();
372  DCHECK(res.reg.GetLowReg() == rs_rAX.GetReg());
373  DCHECK(res.reg.GetHighReg() == rs_rDX.GetReg());
374  Clobber(rs_rAX);
375  Clobber(rs_rDX);
376  MarkInUse(rs_rAX);
377  MarkInUse(rs_rDX);
378  MarkWide(res.reg);
379  return res;
380}
381
382RegLocation X86Mir2Lir::GetReturnAlt() {
383  RegLocation res = LocCReturn();
384  res.reg.SetReg(rs_rDX.GetReg());
385  Clobber(rs_rDX);
386  MarkInUse(rs_rDX);
387  return res;
388}
389
390/* To be used when explicitly managing register use */
391void X86Mir2Lir::LockCallTemps() {
392  LockTemp(rs_rX86_ARG0);
393  LockTemp(rs_rX86_ARG1);
394  LockTemp(rs_rX86_ARG2);
395  LockTemp(rs_rX86_ARG3);
396}
397
398/* To be used when explicitly managing register use */
399void X86Mir2Lir::FreeCallTemps() {
400  FreeTemp(rs_rX86_ARG0);
401  FreeTemp(rs_rX86_ARG1);
402  FreeTemp(rs_rX86_ARG2);
403  FreeTemp(rs_rX86_ARG3);
404}
405
406bool X86Mir2Lir::ProvidesFullMemoryBarrier(X86OpCode opcode) {
407    switch (opcode) {
408      case kX86LockCmpxchgMR:
409      case kX86LockCmpxchgAR:
410      case kX86LockCmpxchg8bM:
411      case kX86LockCmpxchg8bA:
412      case kX86XchgMR:
413      case kX86Mfence:
414        // Atomic memory instructions provide full barrier.
415        return true;
416      default:
417        break;
418    }
419
420    // Conservative if cannot prove it provides full barrier.
421    return false;
422}
423
424void X86Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
425#if ANDROID_SMP != 0
426  // Start off with using the last LIR as the barrier. If it is not enough, then we will update it.
427  LIR* mem_barrier = last_lir_insn_;
428
429  /*
430   * According to the JSR-133 Cookbook, for x86 only StoreLoad barriers need memory fence. All other barriers
431   * (LoadLoad, LoadStore, StoreStore) are nops due to the x86 memory model. For those cases, all we need
432   * to ensure is that there is a scheduling barrier in place.
433   */
434  if (barrier_kind == kStoreLoad) {
435    // If no LIR exists already that can be used a barrier, then generate an mfence.
436    if (mem_barrier == nullptr) {
437      mem_barrier = NewLIR0(kX86Mfence);
438    }
439
440    // If last instruction does not provide full barrier, then insert an mfence.
441    if (ProvidesFullMemoryBarrier(static_cast<X86OpCode>(mem_barrier->opcode)) == false) {
442      mem_barrier = NewLIR0(kX86Mfence);
443    }
444  }
445
446  // Now ensure that a scheduling barrier is in place.
447  if (mem_barrier == nullptr) {
448    GenBarrier();
449  } else {
450    // Mark as a scheduling barrier.
451    DCHECK(!mem_barrier->flags.use_def_invalid);
452    mem_barrier->u.m.def_mask = ENCODE_ALL;
453  }
454#endif
455}
456
457// Alloc a pair of core registers, or a double.
458RegStorage X86Mir2Lir::AllocTypedTempWide(bool fp_hint, int reg_class) {
459  if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) {
460    return AllocTempDouble();
461  }
462  RegStorage low_reg = AllocTemp();
463  RegStorage high_reg = AllocTemp();
464  return RegStorage::MakeRegPair(low_reg, high_reg);
465}
466
467RegStorage X86Mir2Lir::AllocTypedTemp(bool fp_hint, int reg_class) {
468  if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) {
469    return AllocTempSingle();
470  }
471  return AllocTemp();
472}
473
474void X86Mir2Lir::CompilerInitializeRegAlloc() {
475  reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs, sp_regs, dp_regs, reserved_regs,
476                                        core_temps, sp_temps, dp_temps);
477
478  // Target-specific adjustments.
479
480  // Alias single precision xmm to double xmms.
481  // TODO: as needed, add larger vector sizes - alias all to the largest.
482  GrowableArray<RegisterInfo*>::Iterator it(&reg_pool_->sp_regs_);
483  for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
484    int sp_reg_num = info->GetReg().GetRegNum();
485    RegStorage dp_reg = RegStorage::Solo64(RegStorage::kFloatingPoint | sp_reg_num);
486    RegisterInfo* dp_reg_info = GetRegInfo(dp_reg);
487    // 64-bit xmm vector register's master storage should refer to itself.
488    DCHECK_EQ(dp_reg_info, dp_reg_info->Master());
489    // Redirect 32-bit vector's master storage to 64-bit vector.
490    info->SetMaster(dp_reg_info);
491  }
492
493  // Don't start allocating temps at r0/s0/d0 or you may clobber return regs in early-exit methods.
494  // TODO: adjust for x86/hard float calling convention.
495  reg_pool_->next_core_reg_ = 2;
496  reg_pool_->next_sp_reg_ = 2;
497  reg_pool_->next_dp_reg_ = 1;
498}
499
500void X86Mir2Lir::FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free) {
501  DCHECK(rl_keep.wide);
502  DCHECK(rl_free.wide);
503  int free_low = rl_free.reg.GetLowReg();
504  int free_high = rl_free.reg.GetHighReg();
505  int keep_low = rl_keep.reg.GetLowReg();
506  int keep_high = rl_keep.reg.GetHighReg();
507  if ((free_low != keep_low) && (free_low != keep_high) &&
508      (free_high != keep_low) && (free_high != keep_high)) {
509    // No overlap, free both
510    FreeTemp(rl_free.reg);
511  }
512}
513
514void X86Mir2Lir::SpillCoreRegs() {
515  if (num_core_spills_ == 0) {
516    return;
517  }
518  // Spill mask not including fake return address register
519  uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum());
520  int offset = frame_size_ - (4 * num_core_spills_);
521  for (int reg = 0; mask; mask >>= 1, reg++) {
522    if (mask & 0x1) {
523      StoreWordDisp(rs_rX86_SP, offset, RegStorage::Solo32(reg));
524      offset += 4;
525    }
526  }
527}
528
529void X86Mir2Lir::UnSpillCoreRegs() {
530  if (num_core_spills_ == 0) {
531    return;
532  }
533  // Spill mask not including fake return address register
534  uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum());
535  int offset = frame_size_ - (4 * num_core_spills_);
536  for (int reg = 0; mask; mask >>= 1, reg++) {
537    if (mask & 0x1) {
538      LoadWordDisp(rs_rX86_SP, offset, RegStorage::Solo32(reg));
539      offset += 4;
540    }
541  }
542}
543
544bool X86Mir2Lir::IsUnconditionalBranch(LIR* lir) {
545  return (lir->opcode == kX86Jmp8 || lir->opcode == kX86Jmp32);
546}
547
548X86Mir2Lir::X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena)
549    : Mir2Lir(cu, mir_graph, arena),
550      base_of_code_(nullptr), store_method_addr_(false), store_method_addr_used_(false),
551      method_address_insns_(arena, 100, kGrowableArrayMisc),
552      class_type_address_insns_(arena, 100, kGrowableArrayMisc),
553      call_method_insns_(arena, 100, kGrowableArrayMisc),
554      stack_decrement_(nullptr), stack_increment_(nullptr) {
555  if (kIsDebugBuild) {
556    for (int i = 0; i < kX86Last; i++) {
557      if (X86Mir2Lir::EncodingMap[i].opcode != i) {
558        LOG(FATAL) << "Encoding order for " << X86Mir2Lir::EncodingMap[i].name
559            << " is wrong: expecting " << i << ", seeing "
560            << static_cast<int>(X86Mir2Lir::EncodingMap[i].opcode);
561      }
562    }
563  }
564}
565
566Mir2Lir* X86CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
567                          ArenaAllocator* const arena) {
568  return new X86Mir2Lir(cu, mir_graph, arena);
569}
570
571// Not used in x86
572RegStorage X86Mir2Lir::LoadHelper(ThreadOffset<4> offset) {
573  LOG(FATAL) << "Unexpected use of LoadHelper in x86";
574  return RegStorage::InvalidReg();
575}
576
577LIR* X86Mir2Lir::CheckSuspendUsingLoad() {
578  LOG(FATAL) << "Unexpected use of CheckSuspendUsingLoad in x86";
579  return nullptr;
580}
581
582uint64_t X86Mir2Lir::GetTargetInstFlags(int opcode) {
583  DCHECK(!IsPseudoLirOp(opcode));
584  return X86Mir2Lir::EncodingMap[opcode].flags;
585}
586
587const char* X86Mir2Lir::GetTargetInstName(int opcode) {
588  DCHECK(!IsPseudoLirOp(opcode));
589  return X86Mir2Lir::EncodingMap[opcode].name;
590}
591
592const char* X86Mir2Lir::GetTargetInstFmt(int opcode) {
593  DCHECK(!IsPseudoLirOp(opcode));
594  return X86Mir2Lir::EncodingMap[opcode].fmt;
595}
596
597void X86Mir2Lir::GenConstWide(RegLocation rl_dest, int64_t value) {
598  // Can we do this directly to memory?
599  rl_dest = UpdateLocWide(rl_dest);
600  if ((rl_dest.location == kLocDalvikFrame) ||
601      (rl_dest.location == kLocCompilerTemp)) {
602    int32_t val_lo = Low32Bits(value);
603    int32_t val_hi = High32Bits(value);
604    int r_base = TargetReg(kSp).GetReg();
605    int displacement = SRegOffset(rl_dest.s_reg_low);
606
607    LIR * store = NewLIR3(kX86Mov32MI, r_base, displacement + LOWORD_OFFSET, val_lo);
608    AnnotateDalvikRegAccess(store, (displacement + LOWORD_OFFSET) >> 2,
609                              false /* is_load */, true /* is64bit */);
610    store = NewLIR3(kX86Mov32MI, r_base, displacement + HIWORD_OFFSET, val_hi);
611    AnnotateDalvikRegAccess(store, (displacement + HIWORD_OFFSET) >> 2,
612                              false /* is_load */, true /* is64bit */);
613    return;
614  }
615
616  // Just use the standard code to do the generation.
617  Mir2Lir::GenConstWide(rl_dest, value);
618}
619
620// TODO: Merge with existing RegLocation dumper in vreg_analysis.cc
621void X86Mir2Lir::DumpRegLocation(RegLocation loc) {
622  LOG(INFO)  << "location: " << loc.location << ','
623             << (loc.wide ? " w" : "  ")
624             << (loc.defined ? " D" : "  ")
625             << (loc.is_const ? " c" : "  ")
626             << (loc.fp ? " F" : "  ")
627             << (loc.core ? " C" : "  ")
628             << (loc.ref ? " r" : "  ")
629             << (loc.high_word ? " h" : "  ")
630             << (loc.home ? " H" : "  ")
631             << ", low: " << static_cast<int>(loc.reg.GetLowReg())
632             << ", high: " << static_cast<int>(loc.reg.GetHighReg())
633             << ", s_reg: " << loc.s_reg_low
634             << ", orig: " << loc.orig_sreg;
635}
636
637void X86Mir2Lir::Materialize() {
638  // A good place to put the analysis before starting.
639  AnalyzeMIR();
640
641  // Now continue with regular code generation.
642  Mir2Lir::Materialize();
643}
644
645void X86Mir2Lir::LoadMethodAddress(const MethodReference& target_method, InvokeType type,
646                                   SpecialTargetRegister symbolic_reg) {
647  /*
648   * For x86, just generate a 32 bit move immediate instruction, that will be filled
649   * in at 'link time'.  For now, put a unique value based on target to ensure that
650   * code deduplication works.
651   */
652  int target_method_idx = target_method.dex_method_index;
653  const DexFile* target_dex_file = target_method.dex_file;
654  const DexFile::MethodId& target_method_id = target_dex_file->GetMethodId(target_method_idx);
655  uintptr_t target_method_id_ptr = reinterpret_cast<uintptr_t>(&target_method_id);
656
657  // Generate the move instruction with the unique pointer and save index, dex_file, and type.
658  LIR *move = RawLIR(current_dalvik_offset_, kX86Mov32RI, TargetReg(symbolic_reg).GetReg(),
659                     static_cast<int>(target_method_id_ptr), target_method_idx,
660                     WrapPointer(const_cast<DexFile*>(target_dex_file)), type);
661  AppendLIR(move);
662  method_address_insns_.Insert(move);
663}
664
665void X86Mir2Lir::LoadClassType(uint32_t type_idx, SpecialTargetRegister symbolic_reg) {
666  /*
667   * For x86, just generate a 32 bit move immediate instruction, that will be filled
668   * in at 'link time'.  For now, put a unique value based on target to ensure that
669   * code deduplication works.
670   */
671  const DexFile::TypeId& id = cu_->dex_file->GetTypeId(type_idx);
672  uintptr_t ptr = reinterpret_cast<uintptr_t>(&id);
673
674  // Generate the move instruction with the unique pointer and save index and type.
675  LIR *move = RawLIR(current_dalvik_offset_, kX86Mov32RI, TargetReg(symbolic_reg).GetReg(),
676                     static_cast<int>(ptr), type_idx);
677  AppendLIR(move);
678  class_type_address_insns_.Insert(move);
679}
680
681LIR *X86Mir2Lir::CallWithLinkerFixup(const MethodReference& target_method, InvokeType type) {
682  /*
683   * For x86, just generate a 32 bit call relative instruction, that will be filled
684   * in at 'link time'.  For now, put a unique value based on target to ensure that
685   * code deduplication works.
686   */
687  int target_method_idx = target_method.dex_method_index;
688  const DexFile* target_dex_file = target_method.dex_file;
689  const DexFile::MethodId& target_method_id = target_dex_file->GetMethodId(target_method_idx);
690  uintptr_t target_method_id_ptr = reinterpret_cast<uintptr_t>(&target_method_id);
691
692  // Generate the call instruction with the unique pointer and save index, dex_file, and type.
693  LIR *call = RawLIR(current_dalvik_offset_, kX86CallI, static_cast<int>(target_method_id_ptr),
694                     target_method_idx, WrapPointer(const_cast<DexFile*>(target_dex_file)), type);
695  AppendLIR(call);
696  call_method_insns_.Insert(call);
697  return call;
698}
699
700void X86Mir2Lir::InstallLiteralPools() {
701  // These are handled differently for x86.
702  DCHECK(code_literal_list_ == nullptr);
703  DCHECK(method_literal_list_ == nullptr);
704  DCHECK(class_literal_list_ == nullptr);
705
706  // Handle the fixups for methods.
707  for (uint32_t i = 0; i < method_address_insns_.Size(); i++) {
708      LIR* p = method_address_insns_.Get(i);
709      DCHECK_EQ(p->opcode, kX86Mov32RI);
710      uint32_t target_method_idx = p->operands[2];
711      const DexFile* target_dex_file =
712          reinterpret_cast<const DexFile*>(UnwrapPointer(p->operands[3]));
713
714      // The offset to patch is the last 4 bytes of the instruction.
715      int patch_offset = p->offset + p->flags.size - 4;
716      cu_->compiler_driver->AddMethodPatch(cu_->dex_file, cu_->class_def_idx,
717                                           cu_->method_idx, cu_->invoke_type,
718                                           target_method_idx, target_dex_file,
719                                           static_cast<InvokeType>(p->operands[4]),
720                                           patch_offset);
721  }
722
723  // Handle the fixups for class types.
724  for (uint32_t i = 0; i < class_type_address_insns_.Size(); i++) {
725      LIR* p = class_type_address_insns_.Get(i);
726      DCHECK_EQ(p->opcode, kX86Mov32RI);
727      uint32_t target_method_idx = p->operands[2];
728
729      // The offset to patch is the last 4 bytes of the instruction.
730      int patch_offset = p->offset + p->flags.size - 4;
731      cu_->compiler_driver->AddClassPatch(cu_->dex_file, cu_->class_def_idx,
732                                          cu_->method_idx, target_method_idx, patch_offset);
733  }
734
735  // And now the PC-relative calls to methods.
736  for (uint32_t i = 0; i < call_method_insns_.Size(); i++) {
737      LIR* p = call_method_insns_.Get(i);
738      DCHECK_EQ(p->opcode, kX86CallI);
739      uint32_t target_method_idx = p->operands[1];
740      const DexFile* target_dex_file =
741          reinterpret_cast<const DexFile*>(UnwrapPointer(p->operands[2]));
742
743      // The offset to patch is the last 4 bytes of the instruction.
744      int patch_offset = p->offset + p->flags.size - 4;
745      cu_->compiler_driver->AddRelativeCodePatch(cu_->dex_file, cu_->class_def_idx,
746                                                 cu_->method_idx, cu_->invoke_type,
747                                                 target_method_idx, target_dex_file,
748                                                 static_cast<InvokeType>(p->operands[3]),
749                                                 patch_offset, -4 /* offset */);
750  }
751
752  // And do the normal processing.
753  Mir2Lir::InstallLiteralPools();
754}
755
756/*
757 * Fast string.index_of(I) & (II).  Inline check for simple case of char <= 0xffff,
758 * otherwise bails to standard library code.
759 */
760bool X86Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) {
761  ClobberCallerSave();
762  LockCallTemps();  // Using fixed registers
763
764  // EAX: 16 bit character being searched.
765  // ECX: count: number of words to be searched.
766  // EDI: String being searched.
767  // EDX: temporary during execution.
768  // EBX: temporary during execution.
769
770  RegLocation rl_obj = info->args[0];
771  RegLocation rl_char = info->args[1];
772  RegLocation rl_start;  // Note: only present in III flavor or IndexOf.
773
774  uint32_t char_value =
775    rl_char.is_const ? mir_graph_->ConstantValue(rl_char.orig_sreg) : 0;
776
777  if (char_value > 0xFFFF) {
778    // We have to punt to the real String.indexOf.
779    return false;
780  }
781
782  // Okay, we are commited to inlining this.
783  RegLocation rl_return = GetReturn(false);
784  RegLocation rl_dest = InlineTarget(info);
785
786  // Is the string non-NULL?
787  LoadValueDirectFixed(rl_obj, rs_rDX);
788  GenNullCheck(rs_rDX, info->opt_flags);
789  info->opt_flags |= MIR_IGNORE_NULL_CHECK;  // Record that we've null checked.
790
791  // Does the character fit in 16 bits?
792  LIR* slowpath_branch = nullptr;
793  if (rl_char.is_const) {
794    // We need the value in EAX.
795    LoadConstantNoClobber(rs_rAX, char_value);
796  } else {
797    // Character is not a constant; compare at runtime.
798    LoadValueDirectFixed(rl_char, rs_rAX);
799    slowpath_branch = OpCmpImmBranch(kCondGt, rs_rAX, 0xFFFF, nullptr);
800  }
801
802  // From here down, we know that we are looking for a char that fits in 16 bits.
803  // Location of reference to data array within the String object.
804  int value_offset = mirror::String::ValueOffset().Int32Value();
805  // Location of count within the String object.
806  int count_offset = mirror::String::CountOffset().Int32Value();
807  // Starting offset within data array.
808  int offset_offset = mirror::String::OffsetOffset().Int32Value();
809  // Start of char data with array_.
810  int data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value();
811
812  // Character is in EAX.
813  // Object pointer is in EDX.
814
815  // We need to preserve EDI, but have no spare registers, so push it on the stack.
816  // We have to remember that all stack addresses after this are offset by sizeof(EDI).
817  NewLIR1(kX86Push32R, rs_rDI.GetReg());
818
819  // Compute the number of words to search in to rCX.
820  Load32Disp(rs_rDX, count_offset, rs_rCX);
821  LIR *length_compare = nullptr;
822  int start_value = 0;
823  bool is_index_on_stack = false;
824  if (zero_based) {
825    // We have to handle an empty string.  Use special instruction JECXZ.
826    length_compare = NewLIR0(kX86Jecxz8);
827  } else {
828    rl_start = info->args[2];
829    // We have to offset by the start index.
830    if (rl_start.is_const) {
831      start_value = mir_graph_->ConstantValue(rl_start.orig_sreg);
832      start_value = std::max(start_value, 0);
833
834      // Is the start > count?
835      length_compare = OpCmpImmBranch(kCondLe, rs_rCX, start_value, nullptr);
836
837      if (start_value != 0) {
838        OpRegImm(kOpSub, rs_rCX, start_value);
839      }
840    } else {
841      // Runtime start index.
842      rl_start = UpdateLoc(rl_start);
843      if (rl_start.location == kLocPhysReg) {
844        // Handle "start index < 0" case.
845        OpRegReg(kOpXor, rs_rBX, rs_rBX);
846        OpRegReg(kOpCmp, rl_start.reg, rs_rBX);
847        OpCondRegReg(kOpCmov, kCondLt, rl_start.reg, rs_rBX);
848
849        // The length of the string should be greater than the start index.
850        length_compare = OpCmpBranch(kCondLe, rs_rCX, rl_start.reg, nullptr);
851        OpRegReg(kOpSub, rs_rCX, rl_start.reg);
852        if (rl_start.reg == rs_rDI) {
853          // The special case. We will use EDI further, so lets put start index to stack.
854          NewLIR1(kX86Push32R, rs_rDI.GetReg());
855          is_index_on_stack = true;
856        }
857      } else {
858        // Load the start index from stack, remembering that we pushed EDI.
859        int displacement = SRegOffset(rl_start.s_reg_low) + sizeof(uint32_t);
860        Load32Disp(rs_rX86_SP, displacement, rs_rBX);
861        OpRegReg(kOpXor, rs_rDI, rs_rDI);
862        OpRegReg(kOpCmp, rs_rBX, rs_rDI);
863        OpCondRegReg(kOpCmov, kCondLt, rs_rBX, rs_rDI);
864
865        length_compare = OpCmpBranch(kCondLe, rs_rCX, rs_rBX, nullptr);
866        OpRegReg(kOpSub, rs_rCX, rs_rBX);
867        // Put the start index to stack.
868        NewLIR1(kX86Push32R, rs_rBX.GetReg());
869        is_index_on_stack = true;
870      }
871    }
872  }
873  DCHECK(length_compare != nullptr);
874
875  // ECX now contains the count in words to be searched.
876
877  // Load the address of the string into EBX.
878  // The string starts at VALUE(String) + 2 * OFFSET(String) + DATA_OFFSET.
879  Load32Disp(rs_rDX, value_offset, rs_rDI);
880  Load32Disp(rs_rDX, offset_offset, rs_rBX);
881  OpLea(rs_rBX, rs_rDI, rs_rBX, 1, data_offset);
882
883  // Now compute into EDI where the search will start.
884  if (zero_based || rl_start.is_const) {
885    if (start_value == 0) {
886      OpRegCopy(rs_rDI, rs_rBX);
887    } else {
888      NewLIR3(kX86Lea32RM, rs_rDI.GetReg(), rs_rBX.GetReg(), 2 * start_value);
889    }
890  } else {
891    if (is_index_on_stack == true) {
892      // Load the start index from stack.
893      NewLIR1(kX86Pop32R, rs_rDX.GetReg());
894      OpLea(rs_rDI, rs_rBX, rs_rDX, 1, 0);
895    } else {
896      OpLea(rs_rDI, rs_rBX, rl_start.reg, 1, 0);
897    }
898  }
899
900  // EDI now contains the start of the string to be searched.
901  // We are all prepared to do the search for the character.
902  NewLIR0(kX86RepneScasw);
903
904  // Did we find a match?
905  LIR* failed_branch = OpCondBranch(kCondNe, nullptr);
906
907  // yes, we matched.  Compute the index of the result.
908  // index = ((curr_ptr - orig_ptr) / 2) - 1.
909  OpRegReg(kOpSub, rs_rDI, rs_rBX);
910  OpRegImm(kOpAsr, rs_rDI, 1);
911  NewLIR3(kX86Lea32RM, rl_return.reg.GetReg(), rs_rDI.GetReg(), -1);
912  LIR *all_done = NewLIR1(kX86Jmp8, 0);
913
914  // Failed to match; return -1.
915  LIR *not_found = NewLIR0(kPseudoTargetLabel);
916  length_compare->target = not_found;
917  failed_branch->target = not_found;
918  LoadConstantNoClobber(rl_return.reg, -1);
919
920  // And join up at the end.
921  all_done->target = NewLIR0(kPseudoTargetLabel);
922  // Restore EDI from the stack.
923  NewLIR1(kX86Pop32R, rs_rDI.GetReg());
924
925  // Out of line code returns here.
926  if (slowpath_branch != nullptr) {
927    LIR *return_point = NewLIR0(kPseudoTargetLabel);
928    AddIntrinsicSlowPath(info, slowpath_branch, return_point);
929  }
930
931  StoreValue(rl_dest, rl_return);
932  return true;
933}
934
935/*
936 * @brief Enter a 32 bit quantity into the FDE buffer
937 * @param buf FDE buffer.
938 * @param data Data value.
939 */
940static void PushWord(std::vector<uint8_t>&buf, int data) {
941  buf.push_back(data & 0xff);
942  buf.push_back((data >> 8) & 0xff);
943  buf.push_back((data >> 16) & 0xff);
944  buf.push_back((data >> 24) & 0xff);
945}
946
947/*
948 * @brief Enter an 'advance LOC' into the FDE buffer
949 * @param buf FDE buffer.
950 * @param increment Amount by which to increase the current location.
951 */
952static void AdvanceLoc(std::vector<uint8_t>&buf, uint32_t increment) {
953  if (increment < 64) {
954    // Encoding in opcode.
955    buf.push_back(0x1 << 6 | increment);
956  } else if (increment < 256) {
957    // Single byte delta.
958    buf.push_back(0x02);
959    buf.push_back(increment);
960  } else if (increment < 256 * 256) {
961    // Two byte delta.
962    buf.push_back(0x03);
963    buf.push_back(increment & 0xff);
964    buf.push_back((increment >> 8) & 0xff);
965  } else {
966    // Four byte delta.
967    buf.push_back(0x04);
968    PushWord(buf, increment);
969  }
970}
971
972
973std::vector<uint8_t>* X86CFIInitialization() {
974  return X86Mir2Lir::ReturnCommonCallFrameInformation();
975}
976
977std::vector<uint8_t>* X86Mir2Lir::ReturnCommonCallFrameInformation() {
978  std::vector<uint8_t>*cfi_info = new std::vector<uint8_t>;
979
980  // Length of the CIE (except for this field).
981  PushWord(*cfi_info, 16);
982
983  // CIE id.
984  PushWord(*cfi_info, 0xFFFFFFFFU);
985
986  // Version: 3.
987  cfi_info->push_back(0x03);
988
989  // Augmentation: empty string.
990  cfi_info->push_back(0x0);
991
992  // Code alignment: 1.
993  cfi_info->push_back(0x01);
994
995  // Data alignment: -4.
996  cfi_info->push_back(0x7C);
997
998  // Return address register (R8).
999  cfi_info->push_back(0x08);
1000
1001  // Initial return PC is 4(ESP): DW_CFA_def_cfa R4 4.
1002  cfi_info->push_back(0x0C);
1003  cfi_info->push_back(0x04);
1004  cfi_info->push_back(0x04);
1005
1006  // Return address location: 0(SP): DW_CFA_offset R8 1 (* -4);.
1007  cfi_info->push_back(0x2 << 6 | 0x08);
1008  cfi_info->push_back(0x01);
1009
1010  // And 2 Noops to align to 4 byte boundary.
1011  cfi_info->push_back(0x0);
1012  cfi_info->push_back(0x0);
1013
1014  DCHECK_EQ(cfi_info->size() & 3, 0U);
1015  return cfi_info;
1016}
1017
1018static void EncodeUnsignedLeb128(std::vector<uint8_t>& buf, uint32_t value) {
1019  uint8_t buffer[12];
1020  uint8_t *ptr = EncodeUnsignedLeb128(buffer, value);
1021  for (uint8_t *p = buffer; p < ptr; p++) {
1022    buf.push_back(*p);
1023  }
1024}
1025
1026std::vector<uint8_t>* X86Mir2Lir::ReturnCallFrameInformation() {
1027  std::vector<uint8_t>*cfi_info = new std::vector<uint8_t>;
1028
1029  // Generate the FDE for the method.
1030  DCHECK_NE(data_offset_, 0U);
1031
1032  // Length (will be filled in later in this routine).
1033  PushWord(*cfi_info, 0);
1034
1035  // CIE_pointer (can be filled in by linker); might be left at 0 if there is only
1036  // one CIE for the whole debug_frame section.
1037  PushWord(*cfi_info, 0);
1038
1039  // 'initial_location' (filled in by linker).
1040  PushWord(*cfi_info, 0);
1041
1042  // 'address_range' (number of bytes in the method).
1043  PushWord(*cfi_info, data_offset_);
1044
1045  // The instructions in the FDE.
1046  if (stack_decrement_ != nullptr) {
1047    // Advance LOC to just past the stack decrement.
1048    uint32_t pc = NEXT_LIR(stack_decrement_)->offset;
1049    AdvanceLoc(*cfi_info, pc);
1050
1051    // Now update the offset to the call frame: DW_CFA_def_cfa_offset frame_size.
1052    cfi_info->push_back(0x0e);
1053    EncodeUnsignedLeb128(*cfi_info, frame_size_);
1054
1055    // We continue with that stack until the epilogue.
1056    if (stack_increment_ != nullptr) {
1057      uint32_t new_pc = NEXT_LIR(stack_increment_)->offset;
1058      AdvanceLoc(*cfi_info, new_pc - pc);
1059
1060      // We probably have code snippets after the epilogue, so save the
1061      // current state: DW_CFA_remember_state.
1062      cfi_info->push_back(0x0a);
1063
1064      // We have now popped the stack: DW_CFA_def_cfa_offset 4.  There is only the return
1065      // PC on the stack now.
1066      cfi_info->push_back(0x0e);
1067      EncodeUnsignedLeb128(*cfi_info, 4);
1068
1069      // Everything after that is the same as before the epilogue.
1070      // Stack bump was followed by RET instruction.
1071      LIR *post_ret_insn = NEXT_LIR(NEXT_LIR(stack_increment_));
1072      if (post_ret_insn != nullptr) {
1073        pc = new_pc;
1074        new_pc = post_ret_insn->offset;
1075        AdvanceLoc(*cfi_info, new_pc - pc);
1076        // Restore the state: DW_CFA_restore_state.
1077        cfi_info->push_back(0x0b);
1078      }
1079    }
1080  }
1081
1082  // Padding to a multiple of 4
1083  while ((cfi_info->size() & 3) != 0) {
1084    // DW_CFA_nop is encoded as 0.
1085    cfi_info->push_back(0);
1086  }
1087
1088  // Set the length of the FDE inside the generated bytes.
1089  uint32_t length = cfi_info->size() - 4;
1090  (*cfi_info)[0] = length;
1091  (*cfi_info)[1] = length >> 8;
1092  (*cfi_info)[2] = length >> 16;
1093  (*cfi_info)[3] = length >> 24;
1094  return cfi_info;
1095}
1096
1097}  // namespace art
1098