target_x86.cc revision 2f244e9faccfcca68af3c5484c397a01a1c3a342
1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <string>
18#include <inttypes.h>
19
20#include "codegen_x86.h"
21#include "dex/compiler_internals.h"
22#include "dex/quick/mir_to_lir-inl.h"
23#include "mirror/array.h"
24#include "mirror/string.h"
25#include "x86_lir.h"
26
27namespace art {
28
29static const RegStorage core_regs_arr[] = {
30    rs_rAX, rs_rCX, rs_rDX, rs_rBX, rs_rX86_SP, rs_rBP, rs_rSI, rs_rDI
31#ifdef TARGET_REX_SUPPORT
32    rs_r8, rs_r9, rs_r10, rs_r11, rs_r12, rs_r13, rs_r14, rs_r15
33#endif
34};
35static const RegStorage sp_regs_arr[] = {
36    rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
37#ifdef TARGET_REX_SUPPORT
38    rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15
39#endif
40};
41static const RegStorage dp_regs_arr[] = {
42    rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
43#ifdef TARGET_REX_SUPPORT
44    rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15
45#endif
46};
47static const RegStorage reserved_regs_arr[] = {rs_rX86_SP};
48static const RegStorage core_temps_arr[] = {rs_rAX, rs_rCX, rs_rDX, rs_rBX};
49static const RegStorage sp_temps_arr[] = {
50    rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
51#ifdef TARGET_REX_SUPPORT
52    rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15
53#endif
54};
55static const RegStorage dp_temps_arr[] = {
56    rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
57#ifdef TARGET_REX_SUPPORT
58    rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15
59#endif
60};
61
62static const std::vector<RegStorage> core_regs(core_regs_arr,
63    core_regs_arr + sizeof(core_regs_arr) / sizeof(core_regs_arr[0]));
64static const std::vector<RegStorage> sp_regs(sp_regs_arr,
65    sp_regs_arr + sizeof(sp_regs_arr) / sizeof(sp_regs_arr[0]));
66static const std::vector<RegStorage> dp_regs(dp_regs_arr,
67    dp_regs_arr + sizeof(dp_regs_arr) / sizeof(dp_regs_arr[0]));
68static const std::vector<RegStorage> reserved_regs(reserved_regs_arr,
69    reserved_regs_arr + sizeof(reserved_regs_arr) / sizeof(reserved_regs_arr[0]));
70static const std::vector<RegStorage> core_temps(core_temps_arr,
71    core_temps_arr + sizeof(core_temps_arr) / sizeof(core_temps_arr[0]));
72static const std::vector<RegStorage> sp_temps(sp_temps_arr,
73    sp_temps_arr + sizeof(sp_temps_arr) / sizeof(sp_temps_arr[0]));
74static const std::vector<RegStorage> dp_temps(dp_temps_arr,
75    dp_temps_arr + sizeof(dp_temps_arr) / sizeof(dp_temps_arr[0]));
76
77RegLocation X86Mir2Lir::LocCReturn() {
78  return x86_loc_c_return;
79}
80
81RegLocation X86Mir2Lir::LocCReturnWide() {
82  return x86_loc_c_return_wide;
83}
84
85RegLocation X86Mir2Lir::LocCReturnFloat() {
86  return x86_loc_c_return_float;
87}
88
89RegLocation X86Mir2Lir::LocCReturnDouble() {
90  return x86_loc_c_return_double;
91}
92
93// Return a target-dependent special register.
94RegStorage X86Mir2Lir::TargetReg(SpecialTargetRegister reg) {
95  RegStorage res_reg = RegStorage::InvalidReg();
96  switch (reg) {
97    case kSelf: res_reg = RegStorage::InvalidReg(); break;
98    case kSuspend: res_reg =  RegStorage::InvalidReg(); break;
99    case kLr: res_reg =  RegStorage::InvalidReg(); break;
100    case kPc: res_reg =  RegStorage::InvalidReg(); break;
101    case kSp: res_reg =  rs_rX86_SP; break;
102    case kArg0: res_reg = rs_rX86_ARG0; break;
103    case kArg1: res_reg = rs_rX86_ARG1; break;
104    case kArg2: res_reg = rs_rX86_ARG2; break;
105    case kArg3: res_reg = rs_rX86_ARG3; break;
106    case kFArg0: res_reg = rs_rX86_FARG0; break;
107    case kFArg1: res_reg = rs_rX86_FARG1; break;
108    case kFArg2: res_reg = rs_rX86_FARG2; break;
109    case kFArg3: res_reg = rs_rX86_FARG3; break;
110    case kRet0: res_reg = rs_rX86_RET0; break;
111    case kRet1: res_reg = rs_rX86_RET1; break;
112    case kInvokeTgt: res_reg = rs_rX86_INVOKE_TGT; break;
113    case kHiddenArg: res_reg = rs_rAX; break;
114    case kHiddenFpArg: res_reg = rs_fr0; break;
115    case kCount: res_reg = rs_rX86_COUNT; break;
116  }
117  return res_reg;
118}
119
120RegStorage X86Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
121  // For the 32-bit internal ABI, the first 3 arguments are passed in registers.
122  // TODO: This is not 64-bit compliant and depends on new internal ABI.
123  switch (arg_num) {
124    case 0:
125      return rs_rX86_ARG1;
126    case 1:
127      return rs_rX86_ARG2;
128    case 2:
129      return rs_rX86_ARG3;
130    default:
131      return RegStorage::InvalidReg();
132  }
133}
134
135/*
136 * Decode the register id.
137 */
138uint64_t X86Mir2Lir::GetRegMaskCommon(RegStorage reg) {
139  uint64_t seed;
140  int shift;
141  int reg_id;
142
143  reg_id = reg.GetRegNum();
144  /* Double registers in x86 are just a single FP register */
145  seed = 1;
146  /* FP register starts at bit position 16 */
147  shift = reg.IsFloat() ? kX86FPReg0 : 0;
148  /* Expand the double register id into single offset */
149  shift += reg_id;
150  return (seed << shift);
151}
152
153uint64_t X86Mir2Lir::GetPCUseDefEncoding() {
154  /*
155   * FIXME: might make sense to use a virtual resource encoding bit for pc.  Might be
156   * able to clean up some of the x86/Arm_Mips differences
157   */
158  LOG(FATAL) << "Unexpected call to GetPCUseDefEncoding for x86";
159  return 0ULL;
160}
161
162void X86Mir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags) {
163  DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64);
164  DCHECK(!lir->flags.use_def_invalid);
165
166  // X86-specific resource map setup here.
167  if (flags & REG_USE_SP) {
168    lir->u.m.use_mask |= ENCODE_X86_REG_SP;
169  }
170
171  if (flags & REG_DEF_SP) {
172    lir->u.m.def_mask |= ENCODE_X86_REG_SP;
173  }
174
175  if (flags & REG_DEFA) {
176    SetupRegMask(&lir->u.m.def_mask, rs_rAX.GetReg());
177  }
178
179  if (flags & REG_DEFD) {
180    SetupRegMask(&lir->u.m.def_mask, rs_rDX.GetReg());
181  }
182  if (flags & REG_USEA) {
183    SetupRegMask(&lir->u.m.use_mask, rs_rAX.GetReg());
184  }
185
186  if (flags & REG_USEC) {
187    SetupRegMask(&lir->u.m.use_mask, rs_rCX.GetReg());
188  }
189
190  if (flags & REG_USED) {
191    SetupRegMask(&lir->u.m.use_mask, rs_rDX.GetReg());
192  }
193
194  if (flags & REG_USEB) {
195    SetupRegMask(&lir->u.m.use_mask, rs_rBX.GetReg());
196  }
197
198  // Fixup hard to describe instruction: Uses rAX, rCX, rDI; sets rDI.
199  if (lir->opcode == kX86RepneScasw) {
200    SetupRegMask(&lir->u.m.use_mask, rs_rAX.GetReg());
201    SetupRegMask(&lir->u.m.use_mask, rs_rCX.GetReg());
202    SetupRegMask(&lir->u.m.use_mask, rs_rDI.GetReg());
203    SetupRegMask(&lir->u.m.def_mask, rs_rDI.GetReg());
204  }
205
206  if (flags & USE_FP_STACK) {
207    lir->u.m.use_mask |= ENCODE_X86_FP_STACK;
208    lir->u.m.def_mask |= ENCODE_X86_FP_STACK;
209  }
210}
211
212/* For dumping instructions */
213static const char* x86RegName[] = {
214  "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi",
215  "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
216};
217
218static const char* x86CondName[] = {
219  "O",
220  "NO",
221  "B/NAE/C",
222  "NB/AE/NC",
223  "Z/EQ",
224  "NZ/NE",
225  "BE/NA",
226  "NBE/A",
227  "S",
228  "NS",
229  "P/PE",
230  "NP/PO",
231  "L/NGE",
232  "NL/GE",
233  "LE/NG",
234  "NLE/G"
235};
236
237/*
238 * Interpret a format string and build a string no longer than size
239 * See format key in Assemble.cc.
240 */
241std::string X86Mir2Lir::BuildInsnString(const char *fmt, LIR *lir, unsigned char* base_addr) {
242  std::string buf;
243  size_t i = 0;
244  size_t fmt_len = strlen(fmt);
245  while (i < fmt_len) {
246    if (fmt[i] != '!') {
247      buf += fmt[i];
248      i++;
249    } else {
250      i++;
251      DCHECK_LT(i, fmt_len);
252      char operand_number_ch = fmt[i];
253      i++;
254      if (operand_number_ch == '!') {
255        buf += "!";
256      } else {
257        int operand_number = operand_number_ch - '0';
258        DCHECK_LT(operand_number, 6);  // Expect upto 6 LIR operands.
259        DCHECK_LT(i, fmt_len);
260        int operand = lir->operands[operand_number];
261        switch (fmt[i]) {
262          case 'c':
263            DCHECK_LT(static_cast<size_t>(operand), sizeof(x86CondName));
264            buf += x86CondName[operand];
265            break;
266          case 'd':
267            buf += StringPrintf("%d", operand);
268            break;
269          case 'p': {
270            EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(operand));
271            buf += StringPrintf("0x%08x", tab_rec->offset);
272            break;
273          }
274          case 'r':
275            if (RegStorage::IsFloat(operand)) {
276              int fp_reg = RegStorage::RegNum(operand);
277              buf += StringPrintf("xmm%d", fp_reg);
278            } else {
279              int reg_num = RegStorage::RegNum(operand);
280              DCHECK_LT(static_cast<size_t>(reg_num), sizeof(x86RegName));
281              buf += x86RegName[reg_num];
282            }
283            break;
284          case 't':
285            buf += StringPrintf("0x%08" PRIxPTR " (L%p)",
286                                reinterpret_cast<uintptr_t>(base_addr) + lir->offset + operand,
287                                lir->target);
288            break;
289          default:
290            buf += StringPrintf("DecodeError '%c'", fmt[i]);
291            break;
292        }
293        i++;
294      }
295    }
296  }
297  return buf;
298}
299
300void X86Mir2Lir::DumpResourceMask(LIR *x86LIR, uint64_t mask, const char *prefix) {
301  char buf[256];
302  buf[0] = 0;
303
304  if (mask == ENCODE_ALL) {
305    strcpy(buf, "all");
306  } else {
307    char num[8];
308    int i;
309
310    for (i = 0; i < kX86RegEnd; i++) {
311      if (mask & (1ULL << i)) {
312        snprintf(num, arraysize(num), "%d ", i);
313        strcat(buf, num);
314      }
315    }
316
317    if (mask & ENCODE_CCODE) {
318      strcat(buf, "cc ");
319    }
320    /* Memory bits */
321    if (x86LIR && (mask & ENCODE_DALVIK_REG)) {
322      snprintf(buf + strlen(buf), arraysize(buf) - strlen(buf), "dr%d%s",
323               DECODE_ALIAS_INFO_REG(x86LIR->flags.alias_info),
324               (DECODE_ALIAS_INFO_WIDE(x86LIR->flags.alias_info)) ? "(+1)" : "");
325    }
326    if (mask & ENCODE_LITERAL) {
327      strcat(buf, "lit ");
328    }
329
330    if (mask & ENCODE_HEAP_REF) {
331      strcat(buf, "heap ");
332    }
333    if (mask & ENCODE_MUST_NOT_ALIAS) {
334      strcat(buf, "noalias ");
335    }
336  }
337  if (buf[0]) {
338    LOG(INFO) << prefix << ": " <<  buf;
339  }
340}
341
342void X86Mir2Lir::AdjustSpillMask() {
343  // Adjustment for LR spilling, x86 has no LR so nothing to do here
344  core_spill_mask_ |= (1 << rs_rRET.GetRegNum());
345  num_core_spills_++;
346}
347
348/*
349 * Mark a callee-save fp register as promoted.  Note that
350 * vpush/vpop uses contiguous register lists so we must
351 * include any holes in the mask.  Associate holes with
352 * Dalvik register INVALID_VREG (0xFFFFU).
353 */
354void X86Mir2Lir::MarkPreservedSingle(int v_reg, RegStorage reg) {
355  UNIMPLEMENTED(FATAL) << "MarkPreservedSingle";
356}
357
358void X86Mir2Lir::MarkPreservedDouble(int v_reg, RegStorage reg) {
359  UNIMPLEMENTED(FATAL) << "MarkPreservedDouble";
360}
361
362/* Clobber all regs that might be used by an external C call */
363void X86Mir2Lir::ClobberCallerSave() {
364  Clobber(rs_rAX);
365  Clobber(rs_rCX);
366  Clobber(rs_rDX);
367  Clobber(rs_rBX);
368}
369
370RegLocation X86Mir2Lir::GetReturnWideAlt() {
371  RegLocation res = LocCReturnWide();
372  DCHECK(res.reg.GetLowReg() == rs_rAX.GetReg());
373  DCHECK(res.reg.GetHighReg() == rs_rDX.GetReg());
374  Clobber(rs_rAX);
375  Clobber(rs_rDX);
376  MarkInUse(rs_rAX);
377  MarkInUse(rs_rDX);
378  MarkWide(res.reg);
379  return res;
380}
381
382RegLocation X86Mir2Lir::GetReturnAlt() {
383  RegLocation res = LocCReturn();
384  res.reg.SetReg(rs_rDX.GetReg());
385  Clobber(rs_rDX);
386  MarkInUse(rs_rDX);
387  return res;
388}
389
390/* To be used when explicitly managing register use */
391void X86Mir2Lir::LockCallTemps() {
392  LockTemp(rs_rX86_ARG0);
393  LockTemp(rs_rX86_ARG1);
394  LockTemp(rs_rX86_ARG2);
395  LockTemp(rs_rX86_ARG3);
396}
397
398/* To be used when explicitly managing register use */
399void X86Mir2Lir::FreeCallTemps() {
400  FreeTemp(rs_rX86_ARG0);
401  FreeTemp(rs_rX86_ARG1);
402  FreeTemp(rs_rX86_ARG2);
403  FreeTemp(rs_rX86_ARG3);
404}
405
406bool X86Mir2Lir::ProvidesFullMemoryBarrier(X86OpCode opcode) {
407    switch (opcode) {
408      case kX86LockCmpxchgMR:
409      case kX86LockCmpxchgAR:
410      case kX86LockCmpxchg8bM:
411      case kX86LockCmpxchg8bA:
412      case kX86XchgMR:
413      case kX86Mfence:
414        // Atomic memory instructions provide full barrier.
415        return true;
416      default:
417        break;
418    }
419
420    // Conservative if cannot prove it provides full barrier.
421    return false;
422}
423
424void X86Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
425#if ANDROID_SMP != 0
426  // Start off with using the last LIR as the barrier. If it is not enough, then we will update it.
427  LIR* mem_barrier = last_lir_insn_;
428
429  /*
430   * According to the JSR-133 Cookbook, for x86 only StoreLoad barriers need memory fence. All other barriers
431   * (LoadLoad, LoadStore, StoreStore) are nops due to the x86 memory model. For those cases, all we need
432   * to ensure is that there is a scheduling barrier in place.
433   */
434  if (barrier_kind == kStoreLoad) {
435    // If no LIR exists already that can be used a barrier, then generate an mfence.
436    if (mem_barrier == nullptr) {
437      mem_barrier = NewLIR0(kX86Mfence);
438    }
439
440    // If last instruction does not provide full barrier, then insert an mfence.
441    if (ProvidesFullMemoryBarrier(static_cast<X86OpCode>(mem_barrier->opcode)) == false) {
442      mem_barrier = NewLIR0(kX86Mfence);
443    }
444  }
445
446  // Now ensure that a scheduling barrier is in place.
447  if (mem_barrier == nullptr) {
448    GenBarrier();
449  } else {
450    // Mark as a scheduling barrier.
451    DCHECK(!mem_barrier->flags.use_def_invalid);
452    mem_barrier->u.m.def_mask = ENCODE_ALL;
453  }
454#endif
455}
456
457// Alloc a pair of core registers, or a double.
458RegStorage X86Mir2Lir::AllocTypedTempWide(bool fp_hint, int reg_class) {
459  if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) {
460    return AllocTempDouble();
461  }
462  RegStorage low_reg = AllocTemp();
463  RegStorage high_reg = AllocTemp();
464  return RegStorage::MakeRegPair(low_reg, high_reg);
465}
466
467RegStorage X86Mir2Lir::AllocTypedTemp(bool fp_hint, int reg_class) {
468  if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) {
469    return AllocTempSingle();
470  }
471  return AllocTemp();
472}
473
474void X86Mir2Lir::CompilerInitializeRegAlloc() {
475  reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs, sp_regs, dp_regs, reserved_regs,
476                                        core_temps, sp_temps, dp_temps);
477
478  // Target-specific adjustments.
479
480  // Alias single precision xmm to double xmms.
481  // TODO: as needed, add larger vector sizes - alias all to the largest.
482  GrowableArray<RegisterInfo*>::Iterator it(&reg_pool_->sp_regs_);
483  for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
484    int sp_reg_num = info->GetReg().GetRegNum();
485    RegStorage dp_reg = RegStorage::Solo64(RegStorage::kFloatingPoint | sp_reg_num);
486    RegisterInfo* dp_reg_info = GetRegInfo(dp_reg);
487    // 64-bit xmm vector register's master storage should refer to itself.
488    DCHECK_EQ(dp_reg_info, dp_reg_info->Master());
489    // Redirect 32-bit vector's master storage to 64-bit vector.
490    info->SetMaster(dp_reg_info);
491  }
492
493  // Don't start allocating temps at r0/s0/d0 or you may clobber return regs in early-exit methods.
494  // TODO: adjust for x86/hard float calling convention.
495  reg_pool_->next_core_reg_ = 2;
496  reg_pool_->next_sp_reg_ = 2;
497  reg_pool_->next_dp_reg_ = 1;
498}
499
500void X86Mir2Lir::FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free) {
501  DCHECK(rl_keep.wide);
502  DCHECK(rl_free.wide);
503  int free_low = rl_free.reg.GetLowReg();
504  int free_high = rl_free.reg.GetHighReg();
505  int keep_low = rl_keep.reg.GetLowReg();
506  int keep_high = rl_keep.reg.GetHighReg();
507  if ((free_low != keep_low) && (free_low != keep_high) &&
508      (free_high != keep_low) && (free_high != keep_high)) {
509    // No overlap, free both
510    FreeTemp(rl_free.reg);
511  }
512}
513
514void X86Mir2Lir::SpillCoreRegs() {
515  if (num_core_spills_ == 0) {
516    return;
517  }
518  // Spill mask not including fake return address register
519  uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum());
520  int offset = frame_size_ - (4 * num_core_spills_);
521  for (int reg = 0; mask; mask >>= 1, reg++) {
522    if (mask & 0x1) {
523      StoreWordDisp(rs_rX86_SP, offset, RegStorage::Solo32(reg));
524      offset += 4;
525    }
526  }
527}
528
529void X86Mir2Lir::UnSpillCoreRegs() {
530  if (num_core_spills_ == 0) {
531    return;
532  }
533  // Spill mask not including fake return address register
534  uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum());
535  int offset = frame_size_ - (4 * num_core_spills_);
536  for (int reg = 0; mask; mask >>= 1, reg++) {
537    if (mask & 0x1) {
538      LoadWordDisp(rs_rX86_SP, offset, RegStorage::Solo32(reg));
539      offset += 4;
540    }
541  }
542}
543
544bool X86Mir2Lir::IsUnconditionalBranch(LIR* lir) {
545  return (lir->opcode == kX86Jmp8 || lir->opcode == kX86Jmp32);
546}
547
548bool X86Mir2Lir::SupportsVolatileLoadStore(OpSize size) {
549  return true;
550}
551
552RegisterClass X86Mir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volatile) {
553  if (UNLIKELY(is_volatile)) {
554    // On x86, atomic 64-bit load/store requires an fp register.
555    // Smaller aligned load/store is atomic for both core and fp registers.
556    if (size == k64 || size == kDouble) {
557      return kFPReg;
558    }
559  }
560  return RegClassBySize(size);
561}
562
563X86Mir2Lir::X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena)
564    : Mir2Lir(cu, mir_graph, arena),
565      base_of_code_(nullptr), store_method_addr_(false), store_method_addr_used_(false),
566      method_address_insns_(arena, 100, kGrowableArrayMisc),
567      class_type_address_insns_(arena, 100, kGrowableArrayMisc),
568      call_method_insns_(arena, 100, kGrowableArrayMisc),
569      stack_decrement_(nullptr), stack_increment_(nullptr) {
570  if (kIsDebugBuild) {
571    for (int i = 0; i < kX86Last; i++) {
572      if (X86Mir2Lir::EncodingMap[i].opcode != i) {
573        LOG(FATAL) << "Encoding order for " << X86Mir2Lir::EncodingMap[i].name
574            << " is wrong: expecting " << i << ", seeing "
575            << static_cast<int>(X86Mir2Lir::EncodingMap[i].opcode);
576      }
577    }
578  }
579}
580
581Mir2Lir* X86CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
582                          ArenaAllocator* const arena) {
583  return new X86Mir2Lir(cu, mir_graph, arena);
584}
585
586// Not used in x86
587RegStorage X86Mir2Lir::LoadHelper(ThreadOffset<4> offset) {
588  LOG(FATAL) << "Unexpected use of LoadHelper in x86";
589  return RegStorage::InvalidReg();
590}
591
592// Not used in x86
593RegStorage X86Mir2Lir::LoadHelper(ThreadOffset<8> offset) {
594  LOG(FATAL) << "Unexpected use of LoadHelper in x86";
595  return RegStorage::InvalidReg();
596}
597
598LIR* X86Mir2Lir::CheckSuspendUsingLoad() {
599  LOG(FATAL) << "Unexpected use of CheckSuspendUsingLoad in x86";
600  return nullptr;
601}
602
603uint64_t X86Mir2Lir::GetTargetInstFlags(int opcode) {
604  DCHECK(!IsPseudoLirOp(opcode));
605  return X86Mir2Lir::EncodingMap[opcode].flags;
606}
607
608const char* X86Mir2Lir::GetTargetInstName(int opcode) {
609  DCHECK(!IsPseudoLirOp(opcode));
610  return X86Mir2Lir::EncodingMap[opcode].name;
611}
612
613const char* X86Mir2Lir::GetTargetInstFmt(int opcode) {
614  DCHECK(!IsPseudoLirOp(opcode));
615  return X86Mir2Lir::EncodingMap[opcode].fmt;
616}
617
618void X86Mir2Lir::GenConstWide(RegLocation rl_dest, int64_t value) {
619  // Can we do this directly to memory?
620  rl_dest = UpdateLocWide(rl_dest);
621  if ((rl_dest.location == kLocDalvikFrame) ||
622      (rl_dest.location == kLocCompilerTemp)) {
623    int32_t val_lo = Low32Bits(value);
624    int32_t val_hi = High32Bits(value);
625    int r_base = TargetReg(kSp).GetReg();
626    int displacement = SRegOffset(rl_dest.s_reg_low);
627
628    LIR * store = NewLIR3(kX86Mov32MI, r_base, displacement + LOWORD_OFFSET, val_lo);
629    AnnotateDalvikRegAccess(store, (displacement + LOWORD_OFFSET) >> 2,
630                              false /* is_load */, true /* is64bit */);
631    store = NewLIR3(kX86Mov32MI, r_base, displacement + HIWORD_OFFSET, val_hi);
632    AnnotateDalvikRegAccess(store, (displacement + HIWORD_OFFSET) >> 2,
633                              false /* is_load */, true /* is64bit */);
634    return;
635  }
636
637  // Just use the standard code to do the generation.
638  Mir2Lir::GenConstWide(rl_dest, value);
639}
640
641// TODO: Merge with existing RegLocation dumper in vreg_analysis.cc
642void X86Mir2Lir::DumpRegLocation(RegLocation loc) {
643  LOG(INFO)  << "location: " << loc.location << ','
644             << (loc.wide ? " w" : "  ")
645             << (loc.defined ? " D" : "  ")
646             << (loc.is_const ? " c" : "  ")
647             << (loc.fp ? " F" : "  ")
648             << (loc.core ? " C" : "  ")
649             << (loc.ref ? " r" : "  ")
650             << (loc.high_word ? " h" : "  ")
651             << (loc.home ? " H" : "  ")
652             << ", low: " << static_cast<int>(loc.reg.GetLowReg())
653             << ", high: " << static_cast<int>(loc.reg.GetHighReg())
654             << ", s_reg: " << loc.s_reg_low
655             << ", orig: " << loc.orig_sreg;
656}
657
658void X86Mir2Lir::Materialize() {
659  // A good place to put the analysis before starting.
660  AnalyzeMIR();
661
662  // Now continue with regular code generation.
663  Mir2Lir::Materialize();
664}
665
666void X86Mir2Lir::LoadMethodAddress(const MethodReference& target_method, InvokeType type,
667                                   SpecialTargetRegister symbolic_reg) {
668  /*
669   * For x86, just generate a 32 bit move immediate instruction, that will be filled
670   * in at 'link time'.  For now, put a unique value based on target to ensure that
671   * code deduplication works.
672   */
673  int target_method_idx = target_method.dex_method_index;
674  const DexFile* target_dex_file = target_method.dex_file;
675  const DexFile::MethodId& target_method_id = target_dex_file->GetMethodId(target_method_idx);
676  uintptr_t target_method_id_ptr = reinterpret_cast<uintptr_t>(&target_method_id);
677
678  // Generate the move instruction with the unique pointer and save index, dex_file, and type.
679  LIR *move = RawLIR(current_dalvik_offset_, kX86Mov32RI, TargetReg(symbolic_reg).GetReg(),
680                     static_cast<int>(target_method_id_ptr), target_method_idx,
681                     WrapPointer(const_cast<DexFile*>(target_dex_file)), type);
682  AppendLIR(move);
683  method_address_insns_.Insert(move);
684}
685
686void X86Mir2Lir::LoadClassType(uint32_t type_idx, SpecialTargetRegister symbolic_reg) {
687  /*
688   * For x86, just generate a 32 bit move immediate instruction, that will be filled
689   * in at 'link time'.  For now, put a unique value based on target to ensure that
690   * code deduplication works.
691   */
692  const DexFile::TypeId& id = cu_->dex_file->GetTypeId(type_idx);
693  uintptr_t ptr = reinterpret_cast<uintptr_t>(&id);
694
695  // Generate the move instruction with the unique pointer and save index and type.
696  LIR *move = RawLIR(current_dalvik_offset_, kX86Mov32RI, TargetReg(symbolic_reg).GetReg(),
697                     static_cast<int>(ptr), type_idx);
698  AppendLIR(move);
699  class_type_address_insns_.Insert(move);
700}
701
702LIR *X86Mir2Lir::CallWithLinkerFixup(const MethodReference& target_method, InvokeType type) {
703  /*
704   * For x86, just generate a 32 bit call relative instruction, that will be filled
705   * in at 'link time'.  For now, put a unique value based on target to ensure that
706   * code deduplication works.
707   */
708  int target_method_idx = target_method.dex_method_index;
709  const DexFile* target_dex_file = target_method.dex_file;
710  const DexFile::MethodId& target_method_id = target_dex_file->GetMethodId(target_method_idx);
711  uintptr_t target_method_id_ptr = reinterpret_cast<uintptr_t>(&target_method_id);
712
713  // Generate the call instruction with the unique pointer and save index, dex_file, and type.
714  LIR *call = RawLIR(current_dalvik_offset_, kX86CallI, static_cast<int>(target_method_id_ptr),
715                     target_method_idx, WrapPointer(const_cast<DexFile*>(target_dex_file)), type);
716  AppendLIR(call);
717  call_method_insns_.Insert(call);
718  return call;
719}
720
721void X86Mir2Lir::InstallLiteralPools() {
722  // These are handled differently for x86.
723  DCHECK(code_literal_list_ == nullptr);
724  DCHECK(method_literal_list_ == nullptr);
725  DCHECK(class_literal_list_ == nullptr);
726
727  // Handle the fixups for methods.
728  for (uint32_t i = 0; i < method_address_insns_.Size(); i++) {
729      LIR* p = method_address_insns_.Get(i);
730      DCHECK_EQ(p->opcode, kX86Mov32RI);
731      uint32_t target_method_idx = p->operands[2];
732      const DexFile* target_dex_file =
733          reinterpret_cast<const DexFile*>(UnwrapPointer(p->operands[3]));
734
735      // The offset to patch is the last 4 bytes of the instruction.
736      int patch_offset = p->offset + p->flags.size - 4;
737      cu_->compiler_driver->AddMethodPatch(cu_->dex_file, cu_->class_def_idx,
738                                           cu_->method_idx, cu_->invoke_type,
739                                           target_method_idx, target_dex_file,
740                                           static_cast<InvokeType>(p->operands[4]),
741                                           patch_offset);
742  }
743
744  // Handle the fixups for class types.
745  for (uint32_t i = 0; i < class_type_address_insns_.Size(); i++) {
746      LIR* p = class_type_address_insns_.Get(i);
747      DCHECK_EQ(p->opcode, kX86Mov32RI);
748      uint32_t target_method_idx = p->operands[2];
749
750      // The offset to patch is the last 4 bytes of the instruction.
751      int patch_offset = p->offset + p->flags.size - 4;
752      cu_->compiler_driver->AddClassPatch(cu_->dex_file, cu_->class_def_idx,
753                                          cu_->method_idx, target_method_idx, patch_offset);
754  }
755
756  // And now the PC-relative calls to methods.
757  for (uint32_t i = 0; i < call_method_insns_.Size(); i++) {
758      LIR* p = call_method_insns_.Get(i);
759      DCHECK_EQ(p->opcode, kX86CallI);
760      uint32_t target_method_idx = p->operands[1];
761      const DexFile* target_dex_file =
762          reinterpret_cast<const DexFile*>(UnwrapPointer(p->operands[2]));
763
764      // The offset to patch is the last 4 bytes of the instruction.
765      int patch_offset = p->offset + p->flags.size - 4;
766      cu_->compiler_driver->AddRelativeCodePatch(cu_->dex_file, cu_->class_def_idx,
767                                                 cu_->method_idx, cu_->invoke_type,
768                                                 target_method_idx, target_dex_file,
769                                                 static_cast<InvokeType>(p->operands[3]),
770                                                 patch_offset, -4 /* offset */);
771  }
772
773  // And do the normal processing.
774  Mir2Lir::InstallLiteralPools();
775}
776
777/*
778 * Fast string.index_of(I) & (II).  Inline check for simple case of char <= 0xffff,
779 * otherwise bails to standard library code.
780 */
781bool X86Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) {
782  ClobberCallerSave();
783  LockCallTemps();  // Using fixed registers
784
785  // EAX: 16 bit character being searched.
786  // ECX: count: number of words to be searched.
787  // EDI: String being searched.
788  // EDX: temporary during execution.
789  // EBX: temporary during execution.
790
791  RegLocation rl_obj = info->args[0];
792  RegLocation rl_char = info->args[1];
793  RegLocation rl_start;  // Note: only present in III flavor or IndexOf.
794
795  uint32_t char_value =
796    rl_char.is_const ? mir_graph_->ConstantValue(rl_char.orig_sreg) : 0;
797
798  if (char_value > 0xFFFF) {
799    // We have to punt to the real String.indexOf.
800    return false;
801  }
802
803  // Okay, we are commited to inlining this.
804  RegLocation rl_return = GetReturn(false);
805  RegLocation rl_dest = InlineTarget(info);
806
807  // Is the string non-NULL?
808  LoadValueDirectFixed(rl_obj, rs_rDX);
809  GenNullCheck(rs_rDX, info->opt_flags);
810  info->opt_flags |= MIR_IGNORE_NULL_CHECK;  // Record that we've null checked.
811
812  // Does the character fit in 16 bits?
813  LIR* slowpath_branch = nullptr;
814  if (rl_char.is_const) {
815    // We need the value in EAX.
816    LoadConstantNoClobber(rs_rAX, char_value);
817  } else {
818    // Character is not a constant; compare at runtime.
819    LoadValueDirectFixed(rl_char, rs_rAX);
820    slowpath_branch = OpCmpImmBranch(kCondGt, rs_rAX, 0xFFFF, nullptr);
821  }
822
823  // From here down, we know that we are looking for a char that fits in 16 bits.
824  // Location of reference to data array within the String object.
825  int value_offset = mirror::String::ValueOffset().Int32Value();
826  // Location of count within the String object.
827  int count_offset = mirror::String::CountOffset().Int32Value();
828  // Starting offset within data array.
829  int offset_offset = mirror::String::OffsetOffset().Int32Value();
830  // Start of char data with array_.
831  int data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value();
832
833  // Character is in EAX.
834  // Object pointer is in EDX.
835
836  // We need to preserve EDI, but have no spare registers, so push it on the stack.
837  // We have to remember that all stack addresses after this are offset by sizeof(EDI).
838  NewLIR1(kX86Push32R, rs_rDI.GetReg());
839
840  // Compute the number of words to search in to rCX.
841  Load32Disp(rs_rDX, count_offset, rs_rCX);
842  LIR *length_compare = nullptr;
843  int start_value = 0;
844  bool is_index_on_stack = false;
845  if (zero_based) {
846    // We have to handle an empty string.  Use special instruction JECXZ.
847    length_compare = NewLIR0(kX86Jecxz8);
848  } else {
849    rl_start = info->args[2];
850    // We have to offset by the start index.
851    if (rl_start.is_const) {
852      start_value = mir_graph_->ConstantValue(rl_start.orig_sreg);
853      start_value = std::max(start_value, 0);
854
855      // Is the start > count?
856      length_compare = OpCmpImmBranch(kCondLe, rs_rCX, start_value, nullptr);
857
858      if (start_value != 0) {
859        OpRegImm(kOpSub, rs_rCX, start_value);
860      }
861    } else {
862      // Runtime start index.
863      rl_start = UpdateLocTyped(rl_start, kCoreReg);
864      if (rl_start.location == kLocPhysReg) {
865        // Handle "start index < 0" case.
866        OpRegReg(kOpXor, rs_rBX, rs_rBX);
867        OpRegReg(kOpCmp, rl_start.reg, rs_rBX);
868        OpCondRegReg(kOpCmov, kCondLt, rl_start.reg, rs_rBX);
869
870        // The length of the string should be greater than the start index.
871        length_compare = OpCmpBranch(kCondLe, rs_rCX, rl_start.reg, nullptr);
872        OpRegReg(kOpSub, rs_rCX, rl_start.reg);
873        if (rl_start.reg == rs_rDI) {
874          // The special case. We will use EDI further, so lets put start index to stack.
875          NewLIR1(kX86Push32R, rs_rDI.GetReg());
876          is_index_on_stack = true;
877        }
878      } else {
879        // Load the start index from stack, remembering that we pushed EDI.
880        int displacement = SRegOffset(rl_start.s_reg_low) + sizeof(uint32_t);
881        Load32Disp(rs_rX86_SP, displacement, rs_rBX);
882        OpRegReg(kOpXor, rs_rDI, rs_rDI);
883        OpRegReg(kOpCmp, rs_rBX, rs_rDI);
884        OpCondRegReg(kOpCmov, kCondLt, rs_rBX, rs_rDI);
885
886        length_compare = OpCmpBranch(kCondLe, rs_rCX, rs_rBX, nullptr);
887        OpRegReg(kOpSub, rs_rCX, rs_rBX);
888        // Put the start index to stack.
889        NewLIR1(kX86Push32R, rs_rBX.GetReg());
890        is_index_on_stack = true;
891      }
892    }
893  }
894  DCHECK(length_compare != nullptr);
895
896  // ECX now contains the count in words to be searched.
897
898  // Load the address of the string into EBX.
899  // The string starts at VALUE(String) + 2 * OFFSET(String) + DATA_OFFSET.
900  Load32Disp(rs_rDX, value_offset, rs_rDI);
901  Load32Disp(rs_rDX, offset_offset, rs_rBX);
902  OpLea(rs_rBX, rs_rDI, rs_rBX, 1, data_offset);
903
904  // Now compute into EDI where the search will start.
905  if (zero_based || rl_start.is_const) {
906    if (start_value == 0) {
907      OpRegCopy(rs_rDI, rs_rBX);
908    } else {
909      NewLIR3(kX86Lea32RM, rs_rDI.GetReg(), rs_rBX.GetReg(), 2 * start_value);
910    }
911  } else {
912    if (is_index_on_stack == true) {
913      // Load the start index from stack.
914      NewLIR1(kX86Pop32R, rs_rDX.GetReg());
915      OpLea(rs_rDI, rs_rBX, rs_rDX, 1, 0);
916    } else {
917      OpLea(rs_rDI, rs_rBX, rl_start.reg, 1, 0);
918    }
919  }
920
921  // EDI now contains the start of the string to be searched.
922  // We are all prepared to do the search for the character.
923  NewLIR0(kX86RepneScasw);
924
925  // Did we find a match?
926  LIR* failed_branch = OpCondBranch(kCondNe, nullptr);
927
928  // yes, we matched.  Compute the index of the result.
929  // index = ((curr_ptr - orig_ptr) / 2) - 1.
930  OpRegReg(kOpSub, rs_rDI, rs_rBX);
931  OpRegImm(kOpAsr, rs_rDI, 1);
932  NewLIR3(kX86Lea32RM, rl_return.reg.GetReg(), rs_rDI.GetReg(), -1);
933  LIR *all_done = NewLIR1(kX86Jmp8, 0);
934
935  // Failed to match; return -1.
936  LIR *not_found = NewLIR0(kPseudoTargetLabel);
937  length_compare->target = not_found;
938  failed_branch->target = not_found;
939  LoadConstantNoClobber(rl_return.reg, -1);
940
941  // And join up at the end.
942  all_done->target = NewLIR0(kPseudoTargetLabel);
943  // Restore EDI from the stack.
944  NewLIR1(kX86Pop32R, rs_rDI.GetReg());
945
946  // Out of line code returns here.
947  if (slowpath_branch != nullptr) {
948    LIR *return_point = NewLIR0(kPseudoTargetLabel);
949    AddIntrinsicSlowPath(info, slowpath_branch, return_point);
950  }
951
952  StoreValue(rl_dest, rl_return);
953  return true;
954}
955
956/*
957 * @brief Enter a 32 bit quantity into the FDE buffer
958 * @param buf FDE buffer.
959 * @param data Data value.
960 */
961static void PushWord(std::vector<uint8_t>&buf, int data) {
962  buf.push_back(data & 0xff);
963  buf.push_back((data >> 8) & 0xff);
964  buf.push_back((data >> 16) & 0xff);
965  buf.push_back((data >> 24) & 0xff);
966}
967
968/*
969 * @brief Enter an 'advance LOC' into the FDE buffer
970 * @param buf FDE buffer.
971 * @param increment Amount by which to increase the current location.
972 */
973static void AdvanceLoc(std::vector<uint8_t>&buf, uint32_t increment) {
974  if (increment < 64) {
975    // Encoding in opcode.
976    buf.push_back(0x1 << 6 | increment);
977  } else if (increment < 256) {
978    // Single byte delta.
979    buf.push_back(0x02);
980    buf.push_back(increment);
981  } else if (increment < 256 * 256) {
982    // Two byte delta.
983    buf.push_back(0x03);
984    buf.push_back(increment & 0xff);
985    buf.push_back((increment >> 8) & 0xff);
986  } else {
987    // Four byte delta.
988    buf.push_back(0x04);
989    PushWord(buf, increment);
990  }
991}
992
993
994std::vector<uint8_t>* X86CFIInitialization() {
995  return X86Mir2Lir::ReturnCommonCallFrameInformation();
996}
997
998std::vector<uint8_t>* X86Mir2Lir::ReturnCommonCallFrameInformation() {
999  std::vector<uint8_t>*cfi_info = new std::vector<uint8_t>;
1000
1001  // Length of the CIE (except for this field).
1002  PushWord(*cfi_info, 16);
1003
1004  // CIE id.
1005  PushWord(*cfi_info, 0xFFFFFFFFU);
1006
1007  // Version: 3.
1008  cfi_info->push_back(0x03);
1009
1010  // Augmentation: empty string.
1011  cfi_info->push_back(0x0);
1012
1013  // Code alignment: 1.
1014  cfi_info->push_back(0x01);
1015
1016  // Data alignment: -4.
1017  cfi_info->push_back(0x7C);
1018
1019  // Return address register (R8).
1020  cfi_info->push_back(0x08);
1021
1022  // Initial return PC is 4(ESP): DW_CFA_def_cfa R4 4.
1023  cfi_info->push_back(0x0C);
1024  cfi_info->push_back(0x04);
1025  cfi_info->push_back(0x04);
1026
1027  // Return address location: 0(SP): DW_CFA_offset R8 1 (* -4);.
1028  cfi_info->push_back(0x2 << 6 | 0x08);
1029  cfi_info->push_back(0x01);
1030
1031  // And 2 Noops to align to 4 byte boundary.
1032  cfi_info->push_back(0x0);
1033  cfi_info->push_back(0x0);
1034
1035  DCHECK_EQ(cfi_info->size() & 3, 0U);
1036  return cfi_info;
1037}
1038
1039static void EncodeUnsignedLeb128(std::vector<uint8_t>& buf, uint32_t value) {
1040  uint8_t buffer[12];
1041  uint8_t *ptr = EncodeUnsignedLeb128(buffer, value);
1042  for (uint8_t *p = buffer; p < ptr; p++) {
1043    buf.push_back(*p);
1044  }
1045}
1046
1047std::vector<uint8_t>* X86Mir2Lir::ReturnCallFrameInformation() {
1048  std::vector<uint8_t>*cfi_info = new std::vector<uint8_t>;
1049
1050  // Generate the FDE for the method.
1051  DCHECK_NE(data_offset_, 0U);
1052
1053  // Length (will be filled in later in this routine).
1054  PushWord(*cfi_info, 0);
1055
1056  // CIE_pointer (can be filled in by linker); might be left at 0 if there is only
1057  // one CIE for the whole debug_frame section.
1058  PushWord(*cfi_info, 0);
1059
1060  // 'initial_location' (filled in by linker).
1061  PushWord(*cfi_info, 0);
1062
1063  // 'address_range' (number of bytes in the method).
1064  PushWord(*cfi_info, data_offset_);
1065
1066  // The instructions in the FDE.
1067  if (stack_decrement_ != nullptr) {
1068    // Advance LOC to just past the stack decrement.
1069    uint32_t pc = NEXT_LIR(stack_decrement_)->offset;
1070    AdvanceLoc(*cfi_info, pc);
1071
1072    // Now update the offset to the call frame: DW_CFA_def_cfa_offset frame_size.
1073    cfi_info->push_back(0x0e);
1074    EncodeUnsignedLeb128(*cfi_info, frame_size_);
1075
1076    // We continue with that stack until the epilogue.
1077    if (stack_increment_ != nullptr) {
1078      uint32_t new_pc = NEXT_LIR(stack_increment_)->offset;
1079      AdvanceLoc(*cfi_info, new_pc - pc);
1080
1081      // We probably have code snippets after the epilogue, so save the
1082      // current state: DW_CFA_remember_state.
1083      cfi_info->push_back(0x0a);
1084
1085      // We have now popped the stack: DW_CFA_def_cfa_offset 4.  There is only the return
1086      // PC on the stack now.
1087      cfi_info->push_back(0x0e);
1088      EncodeUnsignedLeb128(*cfi_info, 4);
1089
1090      // Everything after that is the same as before the epilogue.
1091      // Stack bump was followed by RET instruction.
1092      LIR *post_ret_insn = NEXT_LIR(NEXT_LIR(stack_increment_));
1093      if (post_ret_insn != nullptr) {
1094        pc = new_pc;
1095        new_pc = post_ret_insn->offset;
1096        AdvanceLoc(*cfi_info, new_pc - pc);
1097        // Restore the state: DW_CFA_restore_state.
1098        cfi_info->push_back(0x0b);
1099      }
1100    }
1101  }
1102
1103  // Padding to a multiple of 4
1104  while ((cfi_info->size() & 3) != 0) {
1105    // DW_CFA_nop is encoded as 0.
1106    cfi_info->push_back(0);
1107  }
1108
1109  // Set the length of the FDE inside the generated bytes.
1110  uint32_t length = cfi_info->size() - 4;
1111  (*cfi_info)[0] = length;
1112  (*cfi_info)[1] = length >> 8;
1113  (*cfi_info)[2] = length >> 16;
1114  (*cfi_info)[3] = length >> 24;
1115  return cfi_info;
1116}
1117
1118}  // namespace art
1119