target_x86.cc revision 674744e635ddbdfb311fbd25b5a27356560d30c3
1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <string>
18#include <inttypes.h>
19
20#include "codegen_x86.h"
21#include "dex/compiler_internals.h"
22#include "dex/quick/mir_to_lir-inl.h"
23#include "mirror/array.h"
24#include "mirror/string.h"
25#include "x86_lir.h"
26
27namespace art {
28
29static const RegStorage core_regs_arr[] = {
30    rs_rAX, rs_rCX, rs_rDX, rs_rBX, rs_rX86_SP, rs_rBP, rs_rSI, rs_rDI
31#ifdef TARGET_REX_SUPPORT
32    rs_r8, rs_r9, rs_r10, rs_r11, rs_r12, rs_r13, rs_r14, rs_r15
33#endif
34};
35static const RegStorage sp_regs_arr[] = {
36    rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
37#ifdef TARGET_REX_SUPPORT
38    rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15
39#endif
40};
41static const RegStorage dp_regs_arr[] = {
42    rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
43#ifdef TARGET_REX_SUPPORT
44    rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15
45#endif
46};
47static const RegStorage reserved_regs_arr[] = {rs_rX86_SP};
48static const RegStorage core_temps_arr[] = {rs_rAX, rs_rCX, rs_rDX, rs_rBX};
49static const RegStorage sp_temps_arr[] = {
50    rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
51#ifdef TARGET_REX_SUPPORT
52    rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15
53#endif
54};
55static const RegStorage dp_temps_arr[] = {
56    rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
57#ifdef TARGET_REX_SUPPORT
58    rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15
59#endif
60};
61
62static const std::vector<RegStorage> core_regs(core_regs_arr,
63    core_regs_arr + sizeof(core_regs_arr) / sizeof(core_regs_arr[0]));
64static const std::vector<RegStorage> sp_regs(sp_regs_arr,
65    sp_regs_arr + sizeof(sp_regs_arr) / sizeof(sp_regs_arr[0]));
66static const std::vector<RegStorage> dp_regs(dp_regs_arr,
67    dp_regs_arr + sizeof(dp_regs_arr) / sizeof(dp_regs_arr[0]));
68static const std::vector<RegStorage> reserved_regs(reserved_regs_arr,
69    reserved_regs_arr + sizeof(reserved_regs_arr) / sizeof(reserved_regs_arr[0]));
70static const std::vector<RegStorage> core_temps(core_temps_arr,
71    core_temps_arr + sizeof(core_temps_arr) / sizeof(core_temps_arr[0]));
72static const std::vector<RegStorage> sp_temps(sp_temps_arr,
73    sp_temps_arr + sizeof(sp_temps_arr) / sizeof(sp_temps_arr[0]));
74static const std::vector<RegStorage> dp_temps(dp_temps_arr,
75    dp_temps_arr + sizeof(dp_temps_arr) / sizeof(dp_temps_arr[0]));
76
77RegLocation X86Mir2Lir::LocCReturn() {
78  return x86_loc_c_return;
79}
80
81RegLocation X86Mir2Lir::LocCReturnWide() {
82  return x86_loc_c_return_wide;
83}
84
85RegLocation X86Mir2Lir::LocCReturnFloat() {
86  return x86_loc_c_return_float;
87}
88
89RegLocation X86Mir2Lir::LocCReturnDouble() {
90  return x86_loc_c_return_double;
91}
92
93// Return a target-dependent special register.
94RegStorage X86Mir2Lir::TargetReg(SpecialTargetRegister reg) {
95  RegStorage res_reg = RegStorage::InvalidReg();
96  switch (reg) {
97    case kSelf: res_reg = RegStorage::InvalidReg(); break;
98    case kSuspend: res_reg =  RegStorage::InvalidReg(); break;
99    case kLr: res_reg =  RegStorage::InvalidReg(); break;
100    case kPc: res_reg =  RegStorage::InvalidReg(); break;
101    case kSp: res_reg =  rs_rX86_SP; break;
102    case kArg0: res_reg = rs_rX86_ARG0; break;
103    case kArg1: res_reg = rs_rX86_ARG1; break;
104    case kArg2: res_reg = rs_rX86_ARG2; break;
105    case kArg3: res_reg = rs_rX86_ARG3; break;
106    case kFArg0: res_reg = rs_rX86_FARG0; break;
107    case kFArg1: res_reg = rs_rX86_FARG1; break;
108    case kFArg2: res_reg = rs_rX86_FARG2; break;
109    case kFArg3: res_reg = rs_rX86_FARG3; break;
110    case kRet0: res_reg = rs_rX86_RET0; break;
111    case kRet1: res_reg = rs_rX86_RET1; break;
112    case kInvokeTgt: res_reg = rs_rX86_INVOKE_TGT; break;
113    case kHiddenArg: res_reg = rs_rAX; break;
114    case kHiddenFpArg: res_reg = rs_fr0; break;
115    case kCount: res_reg = rs_rX86_COUNT; break;
116  }
117  return res_reg;
118}
119
120RegStorage X86Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
121  // For the 32-bit internal ABI, the first 3 arguments are passed in registers.
122  // TODO: This is not 64-bit compliant and depends on new internal ABI.
123  switch (arg_num) {
124    case 0:
125      return rs_rX86_ARG1;
126    case 1:
127      return rs_rX86_ARG2;
128    case 2:
129      return rs_rX86_ARG3;
130    default:
131      return RegStorage::InvalidReg();
132  }
133}
134
135/*
136 * Decode the register id.
137 */
138uint64_t X86Mir2Lir::GetRegMaskCommon(RegStorage reg) {
139  uint64_t seed;
140  int shift;
141  int reg_id;
142
143  reg_id = reg.GetRegNum();
144  /* Double registers in x86 are just a single FP register */
145  seed = 1;
146  /* FP register starts at bit position 16 */
147  shift = reg.IsFloat() ? kX86FPReg0 : 0;
148  /* Expand the double register id into single offset */
149  shift += reg_id;
150  return (seed << shift);
151}
152
153uint64_t X86Mir2Lir::GetPCUseDefEncoding() {
154  /*
155   * FIXME: might make sense to use a virtual resource encoding bit for pc.  Might be
156   * able to clean up some of the x86/Arm_Mips differences
157   */
158  LOG(FATAL) << "Unexpected call to GetPCUseDefEncoding for x86";
159  return 0ULL;
160}
161
162void X86Mir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags) {
163  DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64);
164  DCHECK(!lir->flags.use_def_invalid);
165
166  // X86-specific resource map setup here.
167  if (flags & REG_USE_SP) {
168    lir->u.m.use_mask |= ENCODE_X86_REG_SP;
169  }
170
171  if (flags & REG_DEF_SP) {
172    lir->u.m.def_mask |= ENCODE_X86_REG_SP;
173  }
174
175  if (flags & REG_DEFA) {
176    SetupRegMask(&lir->u.m.def_mask, rs_rAX.GetReg());
177  }
178
179  if (flags & REG_DEFD) {
180    SetupRegMask(&lir->u.m.def_mask, rs_rDX.GetReg());
181  }
182  if (flags & REG_USEA) {
183    SetupRegMask(&lir->u.m.use_mask, rs_rAX.GetReg());
184  }
185
186  if (flags & REG_USEC) {
187    SetupRegMask(&lir->u.m.use_mask, rs_rCX.GetReg());
188  }
189
190  if (flags & REG_USED) {
191    SetupRegMask(&lir->u.m.use_mask, rs_rDX.GetReg());
192  }
193
194  if (flags & REG_USEB) {
195    SetupRegMask(&lir->u.m.use_mask, rs_rBX.GetReg());
196  }
197
198  // Fixup hard to describe instruction: Uses rAX, rCX, rDI; sets rDI.
199  if (lir->opcode == kX86RepneScasw) {
200    SetupRegMask(&lir->u.m.use_mask, rs_rAX.GetReg());
201    SetupRegMask(&lir->u.m.use_mask, rs_rCX.GetReg());
202    SetupRegMask(&lir->u.m.use_mask, rs_rDI.GetReg());
203    SetupRegMask(&lir->u.m.def_mask, rs_rDI.GetReg());
204  }
205
206  if (flags & USE_FP_STACK) {
207    lir->u.m.use_mask |= ENCODE_X86_FP_STACK;
208    lir->u.m.def_mask |= ENCODE_X86_FP_STACK;
209  }
210}
211
212/* For dumping instructions */
213static const char* x86RegName[] = {
214  "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi",
215  "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
216};
217
218static const char* x86CondName[] = {
219  "O",
220  "NO",
221  "B/NAE/C",
222  "NB/AE/NC",
223  "Z/EQ",
224  "NZ/NE",
225  "BE/NA",
226  "NBE/A",
227  "S",
228  "NS",
229  "P/PE",
230  "NP/PO",
231  "L/NGE",
232  "NL/GE",
233  "LE/NG",
234  "NLE/G"
235};
236
237/*
238 * Interpret a format string and build a string no longer than size
239 * See format key in Assemble.cc.
240 */
241std::string X86Mir2Lir::BuildInsnString(const char *fmt, LIR *lir, unsigned char* base_addr) {
242  std::string buf;
243  size_t i = 0;
244  size_t fmt_len = strlen(fmt);
245  while (i < fmt_len) {
246    if (fmt[i] != '!') {
247      buf += fmt[i];
248      i++;
249    } else {
250      i++;
251      DCHECK_LT(i, fmt_len);
252      char operand_number_ch = fmt[i];
253      i++;
254      if (operand_number_ch == '!') {
255        buf += "!";
256      } else {
257        int operand_number = operand_number_ch - '0';
258        DCHECK_LT(operand_number, 6);  // Expect upto 6 LIR operands.
259        DCHECK_LT(i, fmt_len);
260        int operand = lir->operands[operand_number];
261        switch (fmt[i]) {
262          case 'c':
263            DCHECK_LT(static_cast<size_t>(operand), sizeof(x86CondName));
264            buf += x86CondName[operand];
265            break;
266          case 'd':
267            buf += StringPrintf("%d", operand);
268            break;
269          case 'p': {
270            EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(operand));
271            buf += StringPrintf("0x%08x", tab_rec->offset);
272            break;
273          }
274          case 'r':
275            if (RegStorage::IsFloat(operand)) {
276              int fp_reg = RegStorage::RegNum(operand);
277              buf += StringPrintf("xmm%d", fp_reg);
278            } else {
279              int reg_num = RegStorage::RegNum(operand);
280              DCHECK_LT(static_cast<size_t>(reg_num), sizeof(x86RegName));
281              buf += x86RegName[reg_num];
282            }
283            break;
284          case 't':
285            buf += StringPrintf("0x%08" PRIxPTR " (L%p)",
286                                reinterpret_cast<uintptr_t>(base_addr) + lir->offset + operand,
287                                lir->target);
288            break;
289          default:
290            buf += StringPrintf("DecodeError '%c'", fmt[i]);
291            break;
292        }
293        i++;
294      }
295    }
296  }
297  return buf;
298}
299
300void X86Mir2Lir::DumpResourceMask(LIR *x86LIR, uint64_t mask, const char *prefix) {
301  char buf[256];
302  buf[0] = 0;
303
304  if (mask == ENCODE_ALL) {
305    strcpy(buf, "all");
306  } else {
307    char num[8];
308    int i;
309
310    for (i = 0; i < kX86RegEnd; i++) {
311      if (mask & (1ULL << i)) {
312        snprintf(num, arraysize(num), "%d ", i);
313        strcat(buf, num);
314      }
315    }
316
317    if (mask & ENCODE_CCODE) {
318      strcat(buf, "cc ");
319    }
320    /* Memory bits */
321    if (x86LIR && (mask & ENCODE_DALVIK_REG)) {
322      snprintf(buf + strlen(buf), arraysize(buf) - strlen(buf), "dr%d%s",
323               DECODE_ALIAS_INFO_REG(x86LIR->flags.alias_info),
324               (DECODE_ALIAS_INFO_WIDE(x86LIR->flags.alias_info)) ? "(+1)" : "");
325    }
326    if (mask & ENCODE_LITERAL) {
327      strcat(buf, "lit ");
328    }
329
330    if (mask & ENCODE_HEAP_REF) {
331      strcat(buf, "heap ");
332    }
333    if (mask & ENCODE_MUST_NOT_ALIAS) {
334      strcat(buf, "noalias ");
335    }
336  }
337  if (buf[0]) {
338    LOG(INFO) << prefix << ": " <<  buf;
339  }
340}
341
342void X86Mir2Lir::AdjustSpillMask() {
343  // Adjustment for LR spilling, x86 has no LR so nothing to do here
344  core_spill_mask_ |= (1 << rs_rRET.GetRegNum());
345  num_core_spills_++;
346}
347
348/*
349 * Mark a callee-save fp register as promoted.  Note that
350 * vpush/vpop uses contiguous register lists so we must
351 * include any holes in the mask.  Associate holes with
352 * Dalvik register INVALID_VREG (0xFFFFU).
353 */
354void X86Mir2Lir::MarkPreservedSingle(int v_reg, RegStorage reg) {
355  UNIMPLEMENTED(FATAL) << "MarkPreservedSingle";
356}
357
358void X86Mir2Lir::MarkPreservedDouble(int v_reg, RegStorage reg) {
359  UNIMPLEMENTED(FATAL) << "MarkPreservedDouble";
360}
361
362/* Clobber all regs that might be used by an external C call */
363void X86Mir2Lir::ClobberCallerSave() {
364  Clobber(rs_rAX);
365  Clobber(rs_rCX);
366  Clobber(rs_rDX);
367  Clobber(rs_rBX);
368}
369
370RegLocation X86Mir2Lir::GetReturnWideAlt() {
371  RegLocation res = LocCReturnWide();
372  DCHECK(res.reg.GetLowReg() == rs_rAX.GetReg());
373  DCHECK(res.reg.GetHighReg() == rs_rDX.GetReg());
374  Clobber(rs_rAX);
375  Clobber(rs_rDX);
376  MarkInUse(rs_rAX);
377  MarkInUse(rs_rDX);
378  MarkWide(res.reg);
379  return res;
380}
381
382RegLocation X86Mir2Lir::GetReturnAlt() {
383  RegLocation res = LocCReturn();
384  res.reg.SetReg(rs_rDX.GetReg());
385  Clobber(rs_rDX);
386  MarkInUse(rs_rDX);
387  return res;
388}
389
390/* To be used when explicitly managing register use */
391void X86Mir2Lir::LockCallTemps() {
392  LockTemp(rs_rX86_ARG0);
393  LockTemp(rs_rX86_ARG1);
394  LockTemp(rs_rX86_ARG2);
395  LockTemp(rs_rX86_ARG3);
396}
397
398/* To be used when explicitly managing register use */
399void X86Mir2Lir::FreeCallTemps() {
400  FreeTemp(rs_rX86_ARG0);
401  FreeTemp(rs_rX86_ARG1);
402  FreeTemp(rs_rX86_ARG2);
403  FreeTemp(rs_rX86_ARG3);
404}
405
406bool X86Mir2Lir::ProvidesFullMemoryBarrier(X86OpCode opcode) {
407    switch (opcode) {
408      case kX86LockCmpxchgMR:
409      case kX86LockCmpxchgAR:
410      case kX86LockCmpxchg8bM:
411      case kX86LockCmpxchg8bA:
412      case kX86XchgMR:
413      case kX86Mfence:
414        // Atomic memory instructions provide full barrier.
415        return true;
416      default:
417        break;
418    }
419
420    // Conservative if cannot prove it provides full barrier.
421    return false;
422}
423
424void X86Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
425#if ANDROID_SMP != 0
426  // Start off with using the last LIR as the barrier. If it is not enough, then we will update it.
427  LIR* mem_barrier = last_lir_insn_;
428
429  /*
430   * According to the JSR-133 Cookbook, for x86 only StoreLoad barriers need memory fence. All other barriers
431   * (LoadLoad, LoadStore, StoreStore) are nops due to the x86 memory model. For those cases, all we need
432   * to ensure is that there is a scheduling barrier in place.
433   */
434  if (barrier_kind == kStoreLoad) {
435    // If no LIR exists already that can be used a barrier, then generate an mfence.
436    if (mem_barrier == nullptr) {
437      mem_barrier = NewLIR0(kX86Mfence);
438    }
439
440    // If last instruction does not provide full barrier, then insert an mfence.
441    if (ProvidesFullMemoryBarrier(static_cast<X86OpCode>(mem_barrier->opcode)) == false) {
442      mem_barrier = NewLIR0(kX86Mfence);
443    }
444  }
445
446  // Now ensure that a scheduling barrier is in place.
447  if (mem_barrier == nullptr) {
448    GenBarrier();
449  } else {
450    // Mark as a scheduling barrier.
451    DCHECK(!mem_barrier->flags.use_def_invalid);
452    mem_barrier->u.m.def_mask = ENCODE_ALL;
453  }
454#endif
455}
456
457// Alloc a pair of core registers, or a double.
458RegStorage X86Mir2Lir::AllocTypedTempWide(bool fp_hint, int reg_class) {
459  if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) {
460    return AllocTempDouble();
461  }
462  RegStorage low_reg = AllocTemp();
463  RegStorage high_reg = AllocTemp();
464  return RegStorage::MakeRegPair(low_reg, high_reg);
465}
466
467RegStorage X86Mir2Lir::AllocTypedTemp(bool fp_hint, int reg_class) {
468  if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) {
469    return AllocTempSingle();
470  }
471  return AllocTemp();
472}
473
474void X86Mir2Lir::CompilerInitializeRegAlloc() {
475  reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs, sp_regs, dp_regs, reserved_regs,
476                                        core_temps, sp_temps, dp_temps);
477
478  // Target-specific adjustments.
479
480  // Alias single precision xmm to double xmms.
481  // TODO: as needed, add larger vector sizes - alias all to the largest.
482  GrowableArray<RegisterInfo*>::Iterator it(&reg_pool_->sp_regs_);
483  for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
484    int sp_reg_num = info->GetReg().GetRegNum();
485    RegStorage dp_reg = RegStorage::Solo64(RegStorage::kFloatingPoint | sp_reg_num);
486    RegisterInfo* dp_reg_info = GetRegInfo(dp_reg);
487    // 64-bit xmm vector register's master storage should refer to itself.
488    DCHECK_EQ(dp_reg_info, dp_reg_info->Master());
489    // Redirect 32-bit vector's master storage to 64-bit vector.
490    info->SetMaster(dp_reg_info);
491  }
492
493  // Don't start allocating temps at r0/s0/d0 or you may clobber return regs in early-exit methods.
494  // TODO: adjust for x86/hard float calling convention.
495  reg_pool_->next_core_reg_ = 2;
496  reg_pool_->next_sp_reg_ = 2;
497  reg_pool_->next_dp_reg_ = 1;
498}
499
500void X86Mir2Lir::FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free) {
501  DCHECK(rl_keep.wide);
502  DCHECK(rl_free.wide);
503  int free_low = rl_free.reg.GetLowReg();
504  int free_high = rl_free.reg.GetHighReg();
505  int keep_low = rl_keep.reg.GetLowReg();
506  int keep_high = rl_keep.reg.GetHighReg();
507  if ((free_low != keep_low) && (free_low != keep_high) &&
508      (free_high != keep_low) && (free_high != keep_high)) {
509    // No overlap, free both
510    FreeTemp(rl_free.reg);
511  }
512}
513
514void X86Mir2Lir::SpillCoreRegs() {
515  if (num_core_spills_ == 0) {
516    return;
517  }
518  // Spill mask not including fake return address register
519  uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum());
520  int offset = frame_size_ - (4 * num_core_spills_);
521  for (int reg = 0; mask; mask >>= 1, reg++) {
522    if (mask & 0x1) {
523      StoreWordDisp(rs_rX86_SP, offset, RegStorage::Solo32(reg));
524      offset += 4;
525    }
526  }
527}
528
529void X86Mir2Lir::UnSpillCoreRegs() {
530  if (num_core_spills_ == 0) {
531    return;
532  }
533  // Spill mask not including fake return address register
534  uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum());
535  int offset = frame_size_ - (4 * num_core_spills_);
536  for (int reg = 0; mask; mask >>= 1, reg++) {
537    if (mask & 0x1) {
538      LoadWordDisp(rs_rX86_SP, offset, RegStorage::Solo32(reg));
539      offset += 4;
540    }
541  }
542}
543
544bool X86Mir2Lir::IsUnconditionalBranch(LIR* lir) {
545  return (lir->opcode == kX86Jmp8 || lir->opcode == kX86Jmp32);
546}
547
548bool X86Mir2Lir::SupportsVolatileLoadStore(OpSize size) {
549  return true;
550}
551
552RegisterClass X86Mir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volatile) {
553  if (UNLIKELY(is_volatile)) {
554    // On x86, atomic 64-bit load/store requires an fp register.
555    // Smaller aligned load/store is atomic for both core and fp registers.
556    if (size == k64 || size == kDouble) {
557      return kFPReg;
558    }
559  }
560  return RegClassBySize(size);
561}
562
563X86Mir2Lir::X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena)
564    : Mir2Lir(cu, mir_graph, arena),
565      base_of_code_(nullptr), store_method_addr_(false), store_method_addr_used_(false),
566      method_address_insns_(arena, 100, kGrowableArrayMisc),
567      class_type_address_insns_(arena, 100, kGrowableArrayMisc),
568      call_method_insns_(arena, 100, kGrowableArrayMisc),
569      stack_decrement_(nullptr), stack_increment_(nullptr) {
570  if (kIsDebugBuild) {
571    for (int i = 0; i < kX86Last; i++) {
572      if (X86Mir2Lir::EncodingMap[i].opcode != i) {
573        LOG(FATAL) << "Encoding order for " << X86Mir2Lir::EncodingMap[i].name
574            << " is wrong: expecting " << i << ", seeing "
575            << static_cast<int>(X86Mir2Lir::EncodingMap[i].opcode);
576      }
577    }
578  }
579}
580
581Mir2Lir* X86CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
582                          ArenaAllocator* const arena) {
583  return new X86Mir2Lir(cu, mir_graph, arena);
584}
585
586// Not used in x86
587RegStorage X86Mir2Lir::LoadHelper(ThreadOffset<4> offset) {
588  LOG(FATAL) << "Unexpected use of LoadHelper in x86";
589  return RegStorage::InvalidReg();
590}
591
592LIR* X86Mir2Lir::CheckSuspendUsingLoad() {
593  LOG(FATAL) << "Unexpected use of CheckSuspendUsingLoad in x86";
594  return nullptr;
595}
596
597uint64_t X86Mir2Lir::GetTargetInstFlags(int opcode) {
598  DCHECK(!IsPseudoLirOp(opcode));
599  return X86Mir2Lir::EncodingMap[opcode].flags;
600}
601
602const char* X86Mir2Lir::GetTargetInstName(int opcode) {
603  DCHECK(!IsPseudoLirOp(opcode));
604  return X86Mir2Lir::EncodingMap[opcode].name;
605}
606
607const char* X86Mir2Lir::GetTargetInstFmt(int opcode) {
608  DCHECK(!IsPseudoLirOp(opcode));
609  return X86Mir2Lir::EncodingMap[opcode].fmt;
610}
611
612void X86Mir2Lir::GenConstWide(RegLocation rl_dest, int64_t value) {
613  // Can we do this directly to memory?
614  rl_dest = UpdateLocWide(rl_dest);
615  if ((rl_dest.location == kLocDalvikFrame) ||
616      (rl_dest.location == kLocCompilerTemp)) {
617    int32_t val_lo = Low32Bits(value);
618    int32_t val_hi = High32Bits(value);
619    int r_base = TargetReg(kSp).GetReg();
620    int displacement = SRegOffset(rl_dest.s_reg_low);
621
622    LIR * store = NewLIR3(kX86Mov32MI, r_base, displacement + LOWORD_OFFSET, val_lo);
623    AnnotateDalvikRegAccess(store, (displacement + LOWORD_OFFSET) >> 2,
624                              false /* is_load */, true /* is64bit */);
625    store = NewLIR3(kX86Mov32MI, r_base, displacement + HIWORD_OFFSET, val_hi);
626    AnnotateDalvikRegAccess(store, (displacement + HIWORD_OFFSET) >> 2,
627                              false /* is_load */, true /* is64bit */);
628    return;
629  }
630
631  // Just use the standard code to do the generation.
632  Mir2Lir::GenConstWide(rl_dest, value);
633}
634
635// TODO: Merge with existing RegLocation dumper in vreg_analysis.cc
636void X86Mir2Lir::DumpRegLocation(RegLocation loc) {
637  LOG(INFO)  << "location: " << loc.location << ','
638             << (loc.wide ? " w" : "  ")
639             << (loc.defined ? " D" : "  ")
640             << (loc.is_const ? " c" : "  ")
641             << (loc.fp ? " F" : "  ")
642             << (loc.core ? " C" : "  ")
643             << (loc.ref ? " r" : "  ")
644             << (loc.high_word ? " h" : "  ")
645             << (loc.home ? " H" : "  ")
646             << ", low: " << static_cast<int>(loc.reg.GetLowReg())
647             << ", high: " << static_cast<int>(loc.reg.GetHighReg())
648             << ", s_reg: " << loc.s_reg_low
649             << ", orig: " << loc.orig_sreg;
650}
651
652void X86Mir2Lir::Materialize() {
653  // A good place to put the analysis before starting.
654  AnalyzeMIR();
655
656  // Now continue with regular code generation.
657  Mir2Lir::Materialize();
658}
659
660void X86Mir2Lir::LoadMethodAddress(const MethodReference& target_method, InvokeType type,
661                                   SpecialTargetRegister symbolic_reg) {
662  /*
663   * For x86, just generate a 32 bit move immediate instruction, that will be filled
664   * in at 'link time'.  For now, put a unique value based on target to ensure that
665   * code deduplication works.
666   */
667  int target_method_idx = target_method.dex_method_index;
668  const DexFile* target_dex_file = target_method.dex_file;
669  const DexFile::MethodId& target_method_id = target_dex_file->GetMethodId(target_method_idx);
670  uintptr_t target_method_id_ptr = reinterpret_cast<uintptr_t>(&target_method_id);
671
672  // Generate the move instruction with the unique pointer and save index, dex_file, and type.
673  LIR *move = RawLIR(current_dalvik_offset_, kX86Mov32RI, TargetReg(symbolic_reg).GetReg(),
674                     static_cast<int>(target_method_id_ptr), target_method_idx,
675                     WrapPointer(const_cast<DexFile*>(target_dex_file)), type);
676  AppendLIR(move);
677  method_address_insns_.Insert(move);
678}
679
680void X86Mir2Lir::LoadClassType(uint32_t type_idx, SpecialTargetRegister symbolic_reg) {
681  /*
682   * For x86, just generate a 32 bit move immediate instruction, that will be filled
683   * in at 'link time'.  For now, put a unique value based on target to ensure that
684   * code deduplication works.
685   */
686  const DexFile::TypeId& id = cu_->dex_file->GetTypeId(type_idx);
687  uintptr_t ptr = reinterpret_cast<uintptr_t>(&id);
688
689  // Generate the move instruction with the unique pointer and save index and type.
690  LIR *move = RawLIR(current_dalvik_offset_, kX86Mov32RI, TargetReg(symbolic_reg).GetReg(),
691                     static_cast<int>(ptr), type_idx);
692  AppendLIR(move);
693  class_type_address_insns_.Insert(move);
694}
695
696LIR *X86Mir2Lir::CallWithLinkerFixup(const MethodReference& target_method, InvokeType type) {
697  /*
698   * For x86, just generate a 32 bit call relative instruction, that will be filled
699   * in at 'link time'.  For now, put a unique value based on target to ensure that
700   * code deduplication works.
701   */
702  int target_method_idx = target_method.dex_method_index;
703  const DexFile* target_dex_file = target_method.dex_file;
704  const DexFile::MethodId& target_method_id = target_dex_file->GetMethodId(target_method_idx);
705  uintptr_t target_method_id_ptr = reinterpret_cast<uintptr_t>(&target_method_id);
706
707  // Generate the call instruction with the unique pointer and save index, dex_file, and type.
708  LIR *call = RawLIR(current_dalvik_offset_, kX86CallI, static_cast<int>(target_method_id_ptr),
709                     target_method_idx, WrapPointer(const_cast<DexFile*>(target_dex_file)), type);
710  AppendLIR(call);
711  call_method_insns_.Insert(call);
712  return call;
713}
714
715void X86Mir2Lir::InstallLiteralPools() {
716  // These are handled differently for x86.
717  DCHECK(code_literal_list_ == nullptr);
718  DCHECK(method_literal_list_ == nullptr);
719  DCHECK(class_literal_list_ == nullptr);
720
721  // Handle the fixups for methods.
722  for (uint32_t i = 0; i < method_address_insns_.Size(); i++) {
723      LIR* p = method_address_insns_.Get(i);
724      DCHECK_EQ(p->opcode, kX86Mov32RI);
725      uint32_t target_method_idx = p->operands[2];
726      const DexFile* target_dex_file =
727          reinterpret_cast<const DexFile*>(UnwrapPointer(p->operands[3]));
728
729      // The offset to patch is the last 4 bytes of the instruction.
730      int patch_offset = p->offset + p->flags.size - 4;
731      cu_->compiler_driver->AddMethodPatch(cu_->dex_file, cu_->class_def_idx,
732                                           cu_->method_idx, cu_->invoke_type,
733                                           target_method_idx, target_dex_file,
734                                           static_cast<InvokeType>(p->operands[4]),
735                                           patch_offset);
736  }
737
738  // Handle the fixups for class types.
739  for (uint32_t i = 0; i < class_type_address_insns_.Size(); i++) {
740      LIR* p = class_type_address_insns_.Get(i);
741      DCHECK_EQ(p->opcode, kX86Mov32RI);
742      uint32_t target_method_idx = p->operands[2];
743
744      // The offset to patch is the last 4 bytes of the instruction.
745      int patch_offset = p->offset + p->flags.size - 4;
746      cu_->compiler_driver->AddClassPatch(cu_->dex_file, cu_->class_def_idx,
747                                          cu_->method_idx, target_method_idx, patch_offset);
748  }
749
750  // And now the PC-relative calls to methods.
751  for (uint32_t i = 0; i < call_method_insns_.Size(); i++) {
752      LIR* p = call_method_insns_.Get(i);
753      DCHECK_EQ(p->opcode, kX86CallI);
754      uint32_t target_method_idx = p->operands[1];
755      const DexFile* target_dex_file =
756          reinterpret_cast<const DexFile*>(UnwrapPointer(p->operands[2]));
757
758      // The offset to patch is the last 4 bytes of the instruction.
759      int patch_offset = p->offset + p->flags.size - 4;
760      cu_->compiler_driver->AddRelativeCodePatch(cu_->dex_file, cu_->class_def_idx,
761                                                 cu_->method_idx, cu_->invoke_type,
762                                                 target_method_idx, target_dex_file,
763                                                 static_cast<InvokeType>(p->operands[3]),
764                                                 patch_offset, -4 /* offset */);
765  }
766
767  // And do the normal processing.
768  Mir2Lir::InstallLiteralPools();
769}
770
771/*
772 * Fast string.index_of(I) & (II).  Inline check for simple case of char <= 0xffff,
773 * otherwise bails to standard library code.
774 */
775bool X86Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) {
776  ClobberCallerSave();
777  LockCallTemps();  // Using fixed registers
778
779  // EAX: 16 bit character being searched.
780  // ECX: count: number of words to be searched.
781  // EDI: String being searched.
782  // EDX: temporary during execution.
783  // EBX: temporary during execution.
784
785  RegLocation rl_obj = info->args[0];
786  RegLocation rl_char = info->args[1];
787  RegLocation rl_start;  // Note: only present in III flavor or IndexOf.
788
789  uint32_t char_value =
790    rl_char.is_const ? mir_graph_->ConstantValue(rl_char.orig_sreg) : 0;
791
792  if (char_value > 0xFFFF) {
793    // We have to punt to the real String.indexOf.
794    return false;
795  }
796
797  // Okay, we are commited to inlining this.
798  RegLocation rl_return = GetReturn(false);
799  RegLocation rl_dest = InlineTarget(info);
800
801  // Is the string non-NULL?
802  LoadValueDirectFixed(rl_obj, rs_rDX);
803  GenNullCheck(rs_rDX, info->opt_flags);
804  info->opt_flags |= MIR_IGNORE_NULL_CHECK;  // Record that we've null checked.
805
806  // Does the character fit in 16 bits?
807  LIR* slowpath_branch = nullptr;
808  if (rl_char.is_const) {
809    // We need the value in EAX.
810    LoadConstantNoClobber(rs_rAX, char_value);
811  } else {
812    // Character is not a constant; compare at runtime.
813    LoadValueDirectFixed(rl_char, rs_rAX);
814    slowpath_branch = OpCmpImmBranch(kCondGt, rs_rAX, 0xFFFF, nullptr);
815  }
816
817  // From here down, we know that we are looking for a char that fits in 16 bits.
818  // Location of reference to data array within the String object.
819  int value_offset = mirror::String::ValueOffset().Int32Value();
820  // Location of count within the String object.
821  int count_offset = mirror::String::CountOffset().Int32Value();
822  // Starting offset within data array.
823  int offset_offset = mirror::String::OffsetOffset().Int32Value();
824  // Start of char data with array_.
825  int data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value();
826
827  // Character is in EAX.
828  // Object pointer is in EDX.
829
830  // We need to preserve EDI, but have no spare registers, so push it on the stack.
831  // We have to remember that all stack addresses after this are offset by sizeof(EDI).
832  NewLIR1(kX86Push32R, rs_rDI.GetReg());
833
834  // Compute the number of words to search in to rCX.
835  Load32Disp(rs_rDX, count_offset, rs_rCX);
836  LIR *length_compare = nullptr;
837  int start_value = 0;
838  bool is_index_on_stack = false;
839  if (zero_based) {
840    // We have to handle an empty string.  Use special instruction JECXZ.
841    length_compare = NewLIR0(kX86Jecxz8);
842  } else {
843    rl_start = info->args[2];
844    // We have to offset by the start index.
845    if (rl_start.is_const) {
846      start_value = mir_graph_->ConstantValue(rl_start.orig_sreg);
847      start_value = std::max(start_value, 0);
848
849      // Is the start > count?
850      length_compare = OpCmpImmBranch(kCondLe, rs_rCX, start_value, nullptr);
851
852      if (start_value != 0) {
853        OpRegImm(kOpSub, rs_rCX, start_value);
854      }
855    } else {
856      // Runtime start index.
857      rl_start = UpdateLoc(rl_start);
858      if (rl_start.location == kLocPhysReg) {
859        // Handle "start index < 0" case.
860        OpRegReg(kOpXor, rs_rBX, rs_rBX);
861        OpRegReg(kOpCmp, rl_start.reg, rs_rBX);
862        OpCondRegReg(kOpCmov, kCondLt, rl_start.reg, rs_rBX);
863
864        // The length of the string should be greater than the start index.
865        length_compare = OpCmpBranch(kCondLe, rs_rCX, rl_start.reg, nullptr);
866        OpRegReg(kOpSub, rs_rCX, rl_start.reg);
867        if (rl_start.reg == rs_rDI) {
868          // The special case. We will use EDI further, so lets put start index to stack.
869          NewLIR1(kX86Push32R, rs_rDI.GetReg());
870          is_index_on_stack = true;
871        }
872      } else {
873        // Load the start index from stack, remembering that we pushed EDI.
874        int displacement = SRegOffset(rl_start.s_reg_low) + sizeof(uint32_t);
875        Load32Disp(rs_rX86_SP, displacement, rs_rBX);
876        OpRegReg(kOpXor, rs_rDI, rs_rDI);
877        OpRegReg(kOpCmp, rs_rBX, rs_rDI);
878        OpCondRegReg(kOpCmov, kCondLt, rs_rBX, rs_rDI);
879
880        length_compare = OpCmpBranch(kCondLe, rs_rCX, rs_rBX, nullptr);
881        OpRegReg(kOpSub, rs_rCX, rs_rBX);
882        // Put the start index to stack.
883        NewLIR1(kX86Push32R, rs_rBX.GetReg());
884        is_index_on_stack = true;
885      }
886    }
887  }
888  DCHECK(length_compare != nullptr);
889
890  // ECX now contains the count in words to be searched.
891
892  // Load the address of the string into EBX.
893  // The string starts at VALUE(String) + 2 * OFFSET(String) + DATA_OFFSET.
894  Load32Disp(rs_rDX, value_offset, rs_rDI);
895  Load32Disp(rs_rDX, offset_offset, rs_rBX);
896  OpLea(rs_rBX, rs_rDI, rs_rBX, 1, data_offset);
897
898  // Now compute into EDI where the search will start.
899  if (zero_based || rl_start.is_const) {
900    if (start_value == 0) {
901      OpRegCopy(rs_rDI, rs_rBX);
902    } else {
903      NewLIR3(kX86Lea32RM, rs_rDI.GetReg(), rs_rBX.GetReg(), 2 * start_value);
904    }
905  } else {
906    if (is_index_on_stack == true) {
907      // Load the start index from stack.
908      NewLIR1(kX86Pop32R, rs_rDX.GetReg());
909      OpLea(rs_rDI, rs_rBX, rs_rDX, 1, 0);
910    } else {
911      OpLea(rs_rDI, rs_rBX, rl_start.reg, 1, 0);
912    }
913  }
914
915  // EDI now contains the start of the string to be searched.
916  // We are all prepared to do the search for the character.
917  NewLIR0(kX86RepneScasw);
918
919  // Did we find a match?
920  LIR* failed_branch = OpCondBranch(kCondNe, nullptr);
921
922  // yes, we matched.  Compute the index of the result.
923  // index = ((curr_ptr - orig_ptr) / 2) - 1.
924  OpRegReg(kOpSub, rs_rDI, rs_rBX);
925  OpRegImm(kOpAsr, rs_rDI, 1);
926  NewLIR3(kX86Lea32RM, rl_return.reg.GetReg(), rs_rDI.GetReg(), -1);
927  LIR *all_done = NewLIR1(kX86Jmp8, 0);
928
929  // Failed to match; return -1.
930  LIR *not_found = NewLIR0(kPseudoTargetLabel);
931  length_compare->target = not_found;
932  failed_branch->target = not_found;
933  LoadConstantNoClobber(rl_return.reg, -1);
934
935  // And join up at the end.
936  all_done->target = NewLIR0(kPseudoTargetLabel);
937  // Restore EDI from the stack.
938  NewLIR1(kX86Pop32R, rs_rDI.GetReg());
939
940  // Out of line code returns here.
941  if (slowpath_branch != nullptr) {
942    LIR *return_point = NewLIR0(kPseudoTargetLabel);
943    AddIntrinsicSlowPath(info, slowpath_branch, return_point);
944  }
945
946  StoreValue(rl_dest, rl_return);
947  return true;
948}
949
950/*
951 * @brief Enter a 32 bit quantity into the FDE buffer
952 * @param buf FDE buffer.
953 * @param data Data value.
954 */
955static void PushWord(std::vector<uint8_t>&buf, int data) {
956  buf.push_back(data & 0xff);
957  buf.push_back((data >> 8) & 0xff);
958  buf.push_back((data >> 16) & 0xff);
959  buf.push_back((data >> 24) & 0xff);
960}
961
962/*
963 * @brief Enter an 'advance LOC' into the FDE buffer
964 * @param buf FDE buffer.
965 * @param increment Amount by which to increase the current location.
966 */
967static void AdvanceLoc(std::vector<uint8_t>&buf, uint32_t increment) {
968  if (increment < 64) {
969    // Encoding in opcode.
970    buf.push_back(0x1 << 6 | increment);
971  } else if (increment < 256) {
972    // Single byte delta.
973    buf.push_back(0x02);
974    buf.push_back(increment);
975  } else if (increment < 256 * 256) {
976    // Two byte delta.
977    buf.push_back(0x03);
978    buf.push_back(increment & 0xff);
979    buf.push_back((increment >> 8) & 0xff);
980  } else {
981    // Four byte delta.
982    buf.push_back(0x04);
983    PushWord(buf, increment);
984  }
985}
986
987
988std::vector<uint8_t>* X86CFIInitialization() {
989  return X86Mir2Lir::ReturnCommonCallFrameInformation();
990}
991
992std::vector<uint8_t>* X86Mir2Lir::ReturnCommonCallFrameInformation() {
993  std::vector<uint8_t>*cfi_info = new std::vector<uint8_t>;
994
995  // Length of the CIE (except for this field).
996  PushWord(*cfi_info, 16);
997
998  // CIE id.
999  PushWord(*cfi_info, 0xFFFFFFFFU);
1000
1001  // Version: 3.
1002  cfi_info->push_back(0x03);
1003
1004  // Augmentation: empty string.
1005  cfi_info->push_back(0x0);
1006
1007  // Code alignment: 1.
1008  cfi_info->push_back(0x01);
1009
1010  // Data alignment: -4.
1011  cfi_info->push_back(0x7C);
1012
1013  // Return address register (R8).
1014  cfi_info->push_back(0x08);
1015
1016  // Initial return PC is 4(ESP): DW_CFA_def_cfa R4 4.
1017  cfi_info->push_back(0x0C);
1018  cfi_info->push_back(0x04);
1019  cfi_info->push_back(0x04);
1020
1021  // Return address location: 0(SP): DW_CFA_offset R8 1 (* -4);.
1022  cfi_info->push_back(0x2 << 6 | 0x08);
1023  cfi_info->push_back(0x01);
1024
1025  // And 2 Noops to align to 4 byte boundary.
1026  cfi_info->push_back(0x0);
1027  cfi_info->push_back(0x0);
1028
1029  DCHECK_EQ(cfi_info->size() & 3, 0U);
1030  return cfi_info;
1031}
1032
1033static void EncodeUnsignedLeb128(std::vector<uint8_t>& buf, uint32_t value) {
1034  uint8_t buffer[12];
1035  uint8_t *ptr = EncodeUnsignedLeb128(buffer, value);
1036  for (uint8_t *p = buffer; p < ptr; p++) {
1037    buf.push_back(*p);
1038  }
1039}
1040
1041std::vector<uint8_t>* X86Mir2Lir::ReturnCallFrameInformation() {
1042  std::vector<uint8_t>*cfi_info = new std::vector<uint8_t>;
1043
1044  // Generate the FDE for the method.
1045  DCHECK_NE(data_offset_, 0U);
1046
1047  // Length (will be filled in later in this routine).
1048  PushWord(*cfi_info, 0);
1049
1050  // CIE_pointer (can be filled in by linker); might be left at 0 if there is only
1051  // one CIE for the whole debug_frame section.
1052  PushWord(*cfi_info, 0);
1053
1054  // 'initial_location' (filled in by linker).
1055  PushWord(*cfi_info, 0);
1056
1057  // 'address_range' (number of bytes in the method).
1058  PushWord(*cfi_info, data_offset_);
1059
1060  // The instructions in the FDE.
1061  if (stack_decrement_ != nullptr) {
1062    // Advance LOC to just past the stack decrement.
1063    uint32_t pc = NEXT_LIR(stack_decrement_)->offset;
1064    AdvanceLoc(*cfi_info, pc);
1065
1066    // Now update the offset to the call frame: DW_CFA_def_cfa_offset frame_size.
1067    cfi_info->push_back(0x0e);
1068    EncodeUnsignedLeb128(*cfi_info, frame_size_);
1069
1070    // We continue with that stack until the epilogue.
1071    if (stack_increment_ != nullptr) {
1072      uint32_t new_pc = NEXT_LIR(stack_increment_)->offset;
1073      AdvanceLoc(*cfi_info, new_pc - pc);
1074
1075      // We probably have code snippets after the epilogue, so save the
1076      // current state: DW_CFA_remember_state.
1077      cfi_info->push_back(0x0a);
1078
1079      // We have now popped the stack: DW_CFA_def_cfa_offset 4.  There is only the return
1080      // PC on the stack now.
1081      cfi_info->push_back(0x0e);
1082      EncodeUnsignedLeb128(*cfi_info, 4);
1083
1084      // Everything after that is the same as before the epilogue.
1085      // Stack bump was followed by RET instruction.
1086      LIR *post_ret_insn = NEXT_LIR(NEXT_LIR(stack_increment_));
1087      if (post_ret_insn != nullptr) {
1088        pc = new_pc;
1089        new_pc = post_ret_insn->offset;
1090        AdvanceLoc(*cfi_info, new_pc - pc);
1091        // Restore the state: DW_CFA_restore_state.
1092        cfi_info->push_back(0x0b);
1093      }
1094    }
1095  }
1096
1097  // Padding to a multiple of 4
1098  while ((cfi_info->size() & 3) != 0) {
1099    // DW_CFA_nop is encoded as 0.
1100    cfi_info->push_back(0);
1101  }
1102
1103  // Set the length of the FDE inside the generated bytes.
1104  uint32_t length = cfi_info->size() - 4;
1105  (*cfi_info)[0] = length;
1106  (*cfi_info)[1] = length >> 8;
1107  (*cfi_info)[2] = length >> 16;
1108  (*cfi_info)[3] = length >> 24;
1109  return cfi_info;
1110}
1111
1112}  // namespace art
1113