target_x86.cc revision b14329f90f725af0f67c45dfcb94933a426d63ce
1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <string>
18#include <inttypes.h>
19
20#include "codegen_x86.h"
21#include "dex/compiler_internals.h"
22#include "dex/quick/mir_to_lir-inl.h"
23#include "mirror/array.h"
24#include "mirror/string.h"
25#include "x86_lir.h"
26
27namespace art {
28
29static const RegStorage core_regs_arr[] = {
30    rs_rAX, rs_rCX, rs_rDX, rs_rBX, rs_rX86_SP, rs_rBP, rs_rSI, rs_rDI
31#ifdef TARGET_REX_SUPPORT
32    rs_r8, rs_r9, rs_r10, rs_r11, rs_r12, rs_r13, rs_r14, rs_r15
33#endif
34};
35static const RegStorage sp_regs_arr[] = {
36    rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
37#ifdef TARGET_REX_SUPPORT
38    rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15
39#endif
40};
41static const RegStorage dp_regs_arr[] = {
42    rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
43#ifdef TARGET_REX_SUPPORT
44    rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15
45#endif
46};
47static const RegStorage reserved_regs_arr[] = {rs_rX86_SP};
48static const RegStorage core_temps_arr[] = {rs_rAX, rs_rCX, rs_rDX, rs_rBX};
49static const RegStorage sp_temps_arr[] = {
50    rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
51#ifdef TARGET_REX_SUPPORT
52    rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15
53#endif
54};
55static const RegStorage dp_temps_arr[] = {
56    rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
57#ifdef TARGET_REX_SUPPORT
58    rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15
59#endif
60};
61
62static const std::vector<RegStorage> core_regs(core_regs_arr,
63    core_regs_arr + sizeof(core_regs_arr) / sizeof(core_regs_arr[0]));
64static const std::vector<RegStorage> sp_regs(sp_regs_arr,
65    sp_regs_arr + sizeof(sp_regs_arr) / sizeof(sp_regs_arr[0]));
66static const std::vector<RegStorage> dp_regs(dp_regs_arr,
67    dp_regs_arr + sizeof(dp_regs_arr) / sizeof(dp_regs_arr[0]));
68static const std::vector<RegStorage> reserved_regs(reserved_regs_arr,
69    reserved_regs_arr + sizeof(reserved_regs_arr) / sizeof(reserved_regs_arr[0]));
70static const std::vector<RegStorage> core_temps(core_temps_arr,
71    core_temps_arr + sizeof(core_temps_arr) / sizeof(core_temps_arr[0]));
72static const std::vector<RegStorage> sp_temps(sp_temps_arr,
73    sp_temps_arr + sizeof(sp_temps_arr) / sizeof(sp_temps_arr[0]));
74static const std::vector<RegStorage> dp_temps(dp_temps_arr,
75    dp_temps_arr + sizeof(dp_temps_arr) / sizeof(dp_temps_arr[0]));
76
77RegLocation X86Mir2Lir::LocCReturn() {
78  return x86_loc_c_return;
79}
80
81RegLocation X86Mir2Lir::LocCReturnWide() {
82  return x86_loc_c_return_wide;
83}
84
85RegLocation X86Mir2Lir::LocCReturnFloat() {
86  return x86_loc_c_return_float;
87}
88
89RegLocation X86Mir2Lir::LocCReturnDouble() {
90  return x86_loc_c_return_double;
91}
92
93// Return a target-dependent special register.
94RegStorage X86Mir2Lir::TargetReg(SpecialTargetRegister reg) {
95  RegStorage res_reg = RegStorage::InvalidReg();
96  switch (reg) {
97    case kSelf: res_reg = RegStorage::InvalidReg(); break;
98    case kSuspend: res_reg =  RegStorage::InvalidReg(); break;
99    case kLr: res_reg =  RegStorage::InvalidReg(); break;
100    case kPc: res_reg =  RegStorage::InvalidReg(); break;
101    case kSp: res_reg =  rs_rX86_SP; break;
102    case kArg0: res_reg = rs_rX86_ARG0; break;
103    case kArg1: res_reg = rs_rX86_ARG1; break;
104    case kArg2: res_reg = rs_rX86_ARG2; break;
105    case kArg3: res_reg = rs_rX86_ARG3; break;
106    case kFArg0: res_reg = rs_rX86_FARG0; break;
107    case kFArg1: res_reg = rs_rX86_FARG1; break;
108    case kFArg2: res_reg = rs_rX86_FARG2; break;
109    case kFArg3: res_reg = rs_rX86_FARG3; break;
110    case kRet0: res_reg = rs_rX86_RET0; break;
111    case kRet1: res_reg = rs_rX86_RET1; break;
112    case kInvokeTgt: res_reg = rs_rX86_INVOKE_TGT; break;
113    case kHiddenArg: res_reg = rs_rAX; break;
114    case kHiddenFpArg: res_reg = rs_fr0; break;
115    case kCount: res_reg = rs_rX86_COUNT; break;
116  }
117  return res_reg;
118}
119
120RegStorage X86Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
121  // For the 32-bit internal ABI, the first 3 arguments are passed in registers.
122  // TODO: This is not 64-bit compliant and depends on new internal ABI.
123  switch (arg_num) {
124    case 0:
125      return rs_rX86_ARG1;
126    case 1:
127      return rs_rX86_ARG2;
128    case 2:
129      return rs_rX86_ARG3;
130    default:
131      return RegStorage::InvalidReg();
132  }
133}
134
135/*
136 * Decode the register id.
137 */
138uint64_t X86Mir2Lir::GetRegMaskCommon(RegStorage reg) {
139  uint64_t seed;
140  int shift;
141  int reg_id;
142
143  reg_id = reg.GetRegNum();
144  /* Double registers in x86 are just a single FP register */
145  seed = 1;
146  /* FP register starts at bit position 16 */
147  shift = reg.IsFloat() ? kX86FPReg0 : 0;
148  /* Expand the double register id into single offset */
149  shift += reg_id;
150  return (seed << shift);
151}
152
153uint64_t X86Mir2Lir::GetPCUseDefEncoding() {
154  /*
155   * FIXME: might make sense to use a virtual resource encoding bit for pc.  Might be
156   * able to clean up some of the x86/Arm_Mips differences
157   */
158  LOG(FATAL) << "Unexpected call to GetPCUseDefEncoding for x86";
159  return 0ULL;
160}
161
162void X86Mir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags) {
163  DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64);
164  DCHECK(!lir->flags.use_def_invalid);
165
166  // X86-specific resource map setup here.
167  if (flags & REG_USE_SP) {
168    lir->u.m.use_mask |= ENCODE_X86_REG_SP;
169  }
170
171  if (flags & REG_DEF_SP) {
172    lir->u.m.def_mask |= ENCODE_X86_REG_SP;
173  }
174
175  if (flags & REG_DEFA) {
176    SetupRegMask(&lir->u.m.def_mask, rs_rAX.GetReg());
177  }
178
179  if (flags & REG_DEFD) {
180    SetupRegMask(&lir->u.m.def_mask, rs_rDX.GetReg());
181  }
182  if (flags & REG_USEA) {
183    SetupRegMask(&lir->u.m.use_mask, rs_rAX.GetReg());
184  }
185
186  if (flags & REG_USEC) {
187    SetupRegMask(&lir->u.m.use_mask, rs_rCX.GetReg());
188  }
189
190  if (flags & REG_USED) {
191    SetupRegMask(&lir->u.m.use_mask, rs_rDX.GetReg());
192  }
193
194  if (flags & REG_USEB) {
195    SetupRegMask(&lir->u.m.use_mask, rs_rBX.GetReg());
196  }
197
198  // Fixup hard to describe instruction: Uses rAX, rCX, rDI; sets rDI.
199  if (lir->opcode == kX86RepneScasw) {
200    SetupRegMask(&lir->u.m.use_mask, rs_rAX.GetReg());
201    SetupRegMask(&lir->u.m.use_mask, rs_rCX.GetReg());
202    SetupRegMask(&lir->u.m.use_mask, rs_rDI.GetReg());
203    SetupRegMask(&lir->u.m.def_mask, rs_rDI.GetReg());
204  }
205
206  if (flags & USE_FP_STACK) {
207    lir->u.m.use_mask |= ENCODE_X86_FP_STACK;
208    lir->u.m.def_mask |= ENCODE_X86_FP_STACK;
209  }
210}
211
212/* For dumping instructions */
213static const char* x86RegName[] = {
214  "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi",
215  "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
216};
217
218static const char* x86CondName[] = {
219  "O",
220  "NO",
221  "B/NAE/C",
222  "NB/AE/NC",
223  "Z/EQ",
224  "NZ/NE",
225  "BE/NA",
226  "NBE/A",
227  "S",
228  "NS",
229  "P/PE",
230  "NP/PO",
231  "L/NGE",
232  "NL/GE",
233  "LE/NG",
234  "NLE/G"
235};
236
237/*
238 * Interpret a format string and build a string no longer than size
239 * See format key in Assemble.cc.
240 */
241std::string X86Mir2Lir::BuildInsnString(const char *fmt, LIR *lir, unsigned char* base_addr) {
242  std::string buf;
243  size_t i = 0;
244  size_t fmt_len = strlen(fmt);
245  while (i < fmt_len) {
246    if (fmt[i] != '!') {
247      buf += fmt[i];
248      i++;
249    } else {
250      i++;
251      DCHECK_LT(i, fmt_len);
252      char operand_number_ch = fmt[i];
253      i++;
254      if (operand_number_ch == '!') {
255        buf += "!";
256      } else {
257        int operand_number = operand_number_ch - '0';
258        DCHECK_LT(operand_number, 6);  // Expect upto 6 LIR operands.
259        DCHECK_LT(i, fmt_len);
260        int operand = lir->operands[operand_number];
261        switch (fmt[i]) {
262          case 'c':
263            DCHECK_LT(static_cast<size_t>(operand), sizeof(x86CondName));
264            buf += x86CondName[operand];
265            break;
266          case 'd':
267            buf += StringPrintf("%d", operand);
268            break;
269          case 'p': {
270            EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(operand));
271            buf += StringPrintf("0x%08x", tab_rec->offset);
272            break;
273          }
274          case 'r':
275            if (RegStorage::IsFloat(operand)) {
276              int fp_reg = RegStorage::RegNum(operand);
277              buf += StringPrintf("xmm%d", fp_reg);
278            } else {
279              int reg_num = RegStorage::RegNum(operand);
280              DCHECK_LT(static_cast<size_t>(reg_num), sizeof(x86RegName));
281              buf += x86RegName[reg_num];
282            }
283            break;
284          case 't':
285            buf += StringPrintf("0x%08" PRIxPTR " (L%p)",
286                                reinterpret_cast<uintptr_t>(base_addr) + lir->offset + operand,
287                                lir->target);
288            break;
289          default:
290            buf += StringPrintf("DecodeError '%c'", fmt[i]);
291            break;
292        }
293        i++;
294      }
295    }
296  }
297  return buf;
298}
299
300void X86Mir2Lir::DumpResourceMask(LIR *x86LIR, uint64_t mask, const char *prefix) {
301  char buf[256];
302  buf[0] = 0;
303
304  if (mask == ENCODE_ALL) {
305    strcpy(buf, "all");
306  } else {
307    char num[8];
308    int i;
309
310    for (i = 0; i < kX86RegEnd; i++) {
311      if (mask & (1ULL << i)) {
312        snprintf(num, arraysize(num), "%d ", i);
313        strcat(buf, num);
314      }
315    }
316
317    if (mask & ENCODE_CCODE) {
318      strcat(buf, "cc ");
319    }
320    /* Memory bits */
321    if (x86LIR && (mask & ENCODE_DALVIK_REG)) {
322      snprintf(buf + strlen(buf), arraysize(buf) - strlen(buf), "dr%d%s",
323               DECODE_ALIAS_INFO_REG(x86LIR->flags.alias_info),
324               (DECODE_ALIAS_INFO_WIDE(x86LIR->flags.alias_info)) ? "(+1)" : "");
325    }
326    if (mask & ENCODE_LITERAL) {
327      strcat(buf, "lit ");
328    }
329
330    if (mask & ENCODE_HEAP_REF) {
331      strcat(buf, "heap ");
332    }
333    if (mask & ENCODE_MUST_NOT_ALIAS) {
334      strcat(buf, "noalias ");
335    }
336  }
337  if (buf[0]) {
338    LOG(INFO) << prefix << ": " <<  buf;
339  }
340}
341
342void X86Mir2Lir::AdjustSpillMask() {
343  // Adjustment for LR spilling, x86 has no LR so nothing to do here
344  core_spill_mask_ |= (1 << rs_rRET.GetRegNum());
345  num_core_spills_++;
346}
347
348/*
349 * Mark a callee-save fp register as promoted.  Note that
350 * vpush/vpop uses contiguous register lists so we must
351 * include any holes in the mask.  Associate holes with
352 * Dalvik register INVALID_VREG (0xFFFFU).
353 */
354void X86Mir2Lir::MarkPreservedSingle(int v_reg, RegStorage reg) {
355  UNIMPLEMENTED(FATAL) << "MarkPreservedSingle";
356}
357
358void X86Mir2Lir::MarkPreservedDouble(int v_reg, RegStorage reg) {
359  UNIMPLEMENTED(FATAL) << "MarkPreservedDouble";
360}
361
362/* Clobber all regs that might be used by an external C call */
363void X86Mir2Lir::ClobberCallerSave() {
364  Clobber(rs_rAX);
365  Clobber(rs_rCX);
366  Clobber(rs_rDX);
367  Clobber(rs_rBX);
368}
369
370RegLocation X86Mir2Lir::GetReturnWideAlt() {
371  RegLocation res = LocCReturnWide();
372  DCHECK(res.reg.GetLowReg() == rs_rAX.GetReg());
373  DCHECK(res.reg.GetHighReg() == rs_rDX.GetReg());
374  Clobber(rs_rAX);
375  Clobber(rs_rDX);
376  MarkInUse(rs_rAX);
377  MarkInUse(rs_rDX);
378  MarkWide(res.reg);
379  return res;
380}
381
382RegLocation X86Mir2Lir::GetReturnAlt() {
383  RegLocation res = LocCReturn();
384  res.reg.SetReg(rs_rDX.GetReg());
385  Clobber(rs_rDX);
386  MarkInUse(rs_rDX);
387  return res;
388}
389
390/* To be used when explicitly managing register use */
391void X86Mir2Lir::LockCallTemps() {
392  LockTemp(rs_rX86_ARG0);
393  LockTemp(rs_rX86_ARG1);
394  LockTemp(rs_rX86_ARG2);
395  LockTemp(rs_rX86_ARG3);
396}
397
398/* To be used when explicitly managing register use */
399void X86Mir2Lir::FreeCallTemps() {
400  FreeTemp(rs_rX86_ARG0);
401  FreeTemp(rs_rX86_ARG1);
402  FreeTemp(rs_rX86_ARG2);
403  FreeTemp(rs_rX86_ARG3);
404}
405
406bool X86Mir2Lir::ProvidesFullMemoryBarrier(X86OpCode opcode) {
407    switch (opcode) {
408      case kX86LockCmpxchgMR:
409      case kX86LockCmpxchgAR:
410      case kX86LockCmpxchg8bM:
411      case kX86LockCmpxchg8bA:
412      case kX86XchgMR:
413      case kX86Mfence:
414        // Atomic memory instructions provide full barrier.
415        return true;
416      default:
417        break;
418    }
419
420    // Conservative if cannot prove it provides full barrier.
421    return false;
422}
423
424bool X86Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
425#if ANDROID_SMP != 0
426  // Start off with using the last LIR as the barrier. If it is not enough, then we will update it.
427  LIR* mem_barrier = last_lir_insn_;
428
429  bool ret = false;
430  /*
431   * According to the JSR-133 Cookbook, for x86 only StoreLoad barriers need memory fence. All other barriers
432   * (LoadLoad, LoadStore, StoreStore) are nops due to the x86 memory model. For those cases, all we need
433   * to ensure is that there is a scheduling barrier in place.
434   */
435  if (barrier_kind == kStoreLoad) {
436    // If no LIR exists already that can be used a barrier, then generate an mfence.
437    if (mem_barrier == nullptr) {
438      mem_barrier = NewLIR0(kX86Mfence);
439      ret = true;
440    }
441
442    // If last instruction does not provide full barrier, then insert an mfence.
443    if (ProvidesFullMemoryBarrier(static_cast<X86OpCode>(mem_barrier->opcode)) == false) {
444      mem_barrier = NewLIR0(kX86Mfence);
445      ret = true;
446    }
447  }
448
449  // Now ensure that a scheduling barrier is in place.
450  if (mem_barrier == nullptr) {
451    GenBarrier();
452  } else {
453    // Mark as a scheduling barrier.
454    DCHECK(!mem_barrier->flags.use_def_invalid);
455    mem_barrier->u.m.def_mask = ENCODE_ALL;
456  }
457  return ret;
458#else
459  return false;
460#endif
461}
462
463// Alloc a pair of core registers, or a double.
464RegStorage X86Mir2Lir::AllocTypedTempWide(bool fp_hint, int reg_class) {
465  if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) {
466    return AllocTempDouble();
467  }
468  RegStorage low_reg = AllocTemp();
469  RegStorage high_reg = AllocTemp();
470  return RegStorage::MakeRegPair(low_reg, high_reg);
471}
472
473RegStorage X86Mir2Lir::AllocTypedTemp(bool fp_hint, int reg_class) {
474  if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) {
475    return AllocTempSingle();
476  }
477  return AllocTemp();
478}
479
480void X86Mir2Lir::CompilerInitializeRegAlloc() {
481  reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs, sp_regs, dp_regs, reserved_regs,
482                                        core_temps, sp_temps, dp_temps);
483
484  // Target-specific adjustments.
485
486  // Alias single precision xmm to double xmms.
487  // TODO: as needed, add larger vector sizes - alias all to the largest.
488  GrowableArray<RegisterInfo*>::Iterator it(&reg_pool_->sp_regs_);
489  for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
490    int sp_reg_num = info->GetReg().GetRegNum();
491    RegStorage dp_reg = RegStorage::Solo64(RegStorage::kFloatingPoint | sp_reg_num);
492    RegisterInfo* dp_reg_info = GetRegInfo(dp_reg);
493    // 64-bit xmm vector register's master storage should refer to itself.
494    DCHECK_EQ(dp_reg_info, dp_reg_info->Master());
495    // Redirect 32-bit vector's master storage to 64-bit vector.
496    info->SetMaster(dp_reg_info);
497  }
498
499  // Don't start allocating temps at r0/s0/d0 or you may clobber return regs in early-exit methods.
500  // TODO: adjust for x86/hard float calling convention.
501  reg_pool_->next_core_reg_ = 2;
502  reg_pool_->next_sp_reg_ = 2;
503  reg_pool_->next_dp_reg_ = 1;
504}
505
506void X86Mir2Lir::FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free) {
507  DCHECK(rl_keep.wide);
508  DCHECK(rl_free.wide);
509  int free_low = rl_free.reg.GetLowReg();
510  int free_high = rl_free.reg.GetHighReg();
511  int keep_low = rl_keep.reg.GetLowReg();
512  int keep_high = rl_keep.reg.GetHighReg();
513  if ((free_low != keep_low) && (free_low != keep_high) &&
514      (free_high != keep_low) && (free_high != keep_high)) {
515    // No overlap, free both
516    FreeTemp(rl_free.reg);
517  }
518}
519
520void X86Mir2Lir::SpillCoreRegs() {
521  if (num_core_spills_ == 0) {
522    return;
523  }
524  // Spill mask not including fake return address register
525  uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum());
526  int offset = frame_size_ - (4 * num_core_spills_);
527  for (int reg = 0; mask; mask >>= 1, reg++) {
528    if (mask & 0x1) {
529      StoreWordDisp(rs_rX86_SP, offset, RegStorage::Solo32(reg));
530      offset += 4;
531    }
532  }
533}
534
535void X86Mir2Lir::UnSpillCoreRegs() {
536  if (num_core_spills_ == 0) {
537    return;
538  }
539  // Spill mask not including fake return address register
540  uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum());
541  int offset = frame_size_ - (4 * num_core_spills_);
542  for (int reg = 0; mask; mask >>= 1, reg++) {
543    if (mask & 0x1) {
544      LoadWordDisp(rs_rX86_SP, offset, RegStorage::Solo32(reg));
545      offset += 4;
546    }
547  }
548}
549
550bool X86Mir2Lir::IsUnconditionalBranch(LIR* lir) {
551  return (lir->opcode == kX86Jmp8 || lir->opcode == kX86Jmp32);
552}
553
554bool X86Mir2Lir::SupportsVolatileLoadStore(OpSize size) {
555  return true;
556}
557
558RegisterClass X86Mir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volatile) {
559  if (UNLIKELY(is_volatile)) {
560    // On x86, atomic 64-bit load/store requires an fp register.
561    // Smaller aligned load/store is atomic for both core and fp registers.
562    if (size == k64 || size == kDouble) {
563      return kFPReg;
564    }
565  }
566  return RegClassBySize(size);
567}
568
569X86Mir2Lir::X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena)
570    : Mir2Lir(cu, mir_graph, arena),
571      base_of_code_(nullptr), store_method_addr_(false), store_method_addr_used_(false),
572      method_address_insns_(arena, 100, kGrowableArrayMisc),
573      class_type_address_insns_(arena, 100, kGrowableArrayMisc),
574      call_method_insns_(arena, 100, kGrowableArrayMisc),
575      stack_decrement_(nullptr), stack_increment_(nullptr) {
576  if (kIsDebugBuild) {
577    for (int i = 0; i < kX86Last; i++) {
578      if (X86Mir2Lir::EncodingMap[i].opcode != i) {
579        LOG(FATAL) << "Encoding order for " << X86Mir2Lir::EncodingMap[i].name
580            << " is wrong: expecting " << i << ", seeing "
581            << static_cast<int>(X86Mir2Lir::EncodingMap[i].opcode);
582      }
583    }
584  }
585}
586
587Mir2Lir* X86CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
588                          ArenaAllocator* const arena) {
589  return new X86Mir2Lir(cu, mir_graph, arena);
590}
591
592// Not used in x86
593RegStorage X86Mir2Lir::LoadHelper(ThreadOffset<4> offset) {
594  LOG(FATAL) << "Unexpected use of LoadHelper in x86";
595  return RegStorage::InvalidReg();
596}
597
598// Not used in x86
599RegStorage X86Mir2Lir::LoadHelper(ThreadOffset<8> offset) {
600  LOG(FATAL) << "Unexpected use of LoadHelper in x86";
601  return RegStorage::InvalidReg();
602}
603
604LIR* X86Mir2Lir::CheckSuspendUsingLoad() {
605  LOG(FATAL) << "Unexpected use of CheckSuspendUsingLoad in x86";
606  return nullptr;
607}
608
609uint64_t X86Mir2Lir::GetTargetInstFlags(int opcode) {
610  DCHECK(!IsPseudoLirOp(opcode));
611  return X86Mir2Lir::EncodingMap[opcode].flags;
612}
613
614const char* X86Mir2Lir::GetTargetInstName(int opcode) {
615  DCHECK(!IsPseudoLirOp(opcode));
616  return X86Mir2Lir::EncodingMap[opcode].name;
617}
618
619const char* X86Mir2Lir::GetTargetInstFmt(int opcode) {
620  DCHECK(!IsPseudoLirOp(opcode));
621  return X86Mir2Lir::EncodingMap[opcode].fmt;
622}
623
624void X86Mir2Lir::GenConstWide(RegLocation rl_dest, int64_t value) {
625  // Can we do this directly to memory?
626  rl_dest = UpdateLocWide(rl_dest);
627  if ((rl_dest.location == kLocDalvikFrame) ||
628      (rl_dest.location == kLocCompilerTemp)) {
629    int32_t val_lo = Low32Bits(value);
630    int32_t val_hi = High32Bits(value);
631    int r_base = TargetReg(kSp).GetReg();
632    int displacement = SRegOffset(rl_dest.s_reg_low);
633
634    LIR * store = NewLIR3(kX86Mov32MI, r_base, displacement + LOWORD_OFFSET, val_lo);
635    AnnotateDalvikRegAccess(store, (displacement + LOWORD_OFFSET) >> 2,
636                              false /* is_load */, true /* is64bit */);
637    store = NewLIR3(kX86Mov32MI, r_base, displacement + HIWORD_OFFSET, val_hi);
638    AnnotateDalvikRegAccess(store, (displacement + HIWORD_OFFSET) >> 2,
639                              false /* is_load */, true /* is64bit */);
640    return;
641  }
642
643  // Just use the standard code to do the generation.
644  Mir2Lir::GenConstWide(rl_dest, value);
645}
646
647// TODO: Merge with existing RegLocation dumper in vreg_analysis.cc
648void X86Mir2Lir::DumpRegLocation(RegLocation loc) {
649  LOG(INFO)  << "location: " << loc.location << ','
650             << (loc.wide ? " w" : "  ")
651             << (loc.defined ? " D" : "  ")
652             << (loc.is_const ? " c" : "  ")
653             << (loc.fp ? " F" : "  ")
654             << (loc.core ? " C" : "  ")
655             << (loc.ref ? " r" : "  ")
656             << (loc.high_word ? " h" : "  ")
657             << (loc.home ? " H" : "  ")
658             << ", low: " << static_cast<int>(loc.reg.GetLowReg())
659             << ", high: " << static_cast<int>(loc.reg.GetHighReg())
660             << ", s_reg: " << loc.s_reg_low
661             << ", orig: " << loc.orig_sreg;
662}
663
664void X86Mir2Lir::Materialize() {
665  // A good place to put the analysis before starting.
666  AnalyzeMIR();
667
668  // Now continue with regular code generation.
669  Mir2Lir::Materialize();
670}
671
672void X86Mir2Lir::LoadMethodAddress(const MethodReference& target_method, InvokeType type,
673                                   SpecialTargetRegister symbolic_reg) {
674  /*
675   * For x86, just generate a 32 bit move immediate instruction, that will be filled
676   * in at 'link time'.  For now, put a unique value based on target to ensure that
677   * code deduplication works.
678   */
679  int target_method_idx = target_method.dex_method_index;
680  const DexFile* target_dex_file = target_method.dex_file;
681  const DexFile::MethodId& target_method_id = target_dex_file->GetMethodId(target_method_idx);
682  uintptr_t target_method_id_ptr = reinterpret_cast<uintptr_t>(&target_method_id);
683
684  // Generate the move instruction with the unique pointer and save index, dex_file, and type.
685  LIR *move = RawLIR(current_dalvik_offset_, kX86Mov32RI, TargetReg(symbolic_reg).GetReg(),
686                     static_cast<int>(target_method_id_ptr), target_method_idx,
687                     WrapPointer(const_cast<DexFile*>(target_dex_file)), type);
688  AppendLIR(move);
689  method_address_insns_.Insert(move);
690}
691
692void X86Mir2Lir::LoadClassType(uint32_t type_idx, SpecialTargetRegister symbolic_reg) {
693  /*
694   * For x86, just generate a 32 bit move immediate instruction, that will be filled
695   * in at 'link time'.  For now, put a unique value based on target to ensure that
696   * code deduplication works.
697   */
698  const DexFile::TypeId& id = cu_->dex_file->GetTypeId(type_idx);
699  uintptr_t ptr = reinterpret_cast<uintptr_t>(&id);
700
701  // Generate the move instruction with the unique pointer and save index and type.
702  LIR *move = RawLIR(current_dalvik_offset_, kX86Mov32RI, TargetReg(symbolic_reg).GetReg(),
703                     static_cast<int>(ptr), type_idx);
704  AppendLIR(move);
705  class_type_address_insns_.Insert(move);
706}
707
708LIR *X86Mir2Lir::CallWithLinkerFixup(const MethodReference& target_method, InvokeType type) {
709  /*
710   * For x86, just generate a 32 bit call relative instruction, that will be filled
711   * in at 'link time'.  For now, put a unique value based on target to ensure that
712   * code deduplication works.
713   */
714  int target_method_idx = target_method.dex_method_index;
715  const DexFile* target_dex_file = target_method.dex_file;
716  const DexFile::MethodId& target_method_id = target_dex_file->GetMethodId(target_method_idx);
717  uintptr_t target_method_id_ptr = reinterpret_cast<uintptr_t>(&target_method_id);
718
719  // Generate the call instruction with the unique pointer and save index, dex_file, and type.
720  LIR *call = RawLIR(current_dalvik_offset_, kX86CallI, static_cast<int>(target_method_id_ptr),
721                     target_method_idx, WrapPointer(const_cast<DexFile*>(target_dex_file)), type);
722  AppendLIR(call);
723  call_method_insns_.Insert(call);
724  return call;
725}
726
727void X86Mir2Lir::InstallLiteralPools() {
728  // These are handled differently for x86.
729  DCHECK(code_literal_list_ == nullptr);
730  DCHECK(method_literal_list_ == nullptr);
731  DCHECK(class_literal_list_ == nullptr);
732
733  // Handle the fixups for methods.
734  for (uint32_t i = 0; i < method_address_insns_.Size(); i++) {
735      LIR* p = method_address_insns_.Get(i);
736      DCHECK_EQ(p->opcode, kX86Mov32RI);
737      uint32_t target_method_idx = p->operands[2];
738      const DexFile* target_dex_file =
739          reinterpret_cast<const DexFile*>(UnwrapPointer(p->operands[3]));
740
741      // The offset to patch is the last 4 bytes of the instruction.
742      int patch_offset = p->offset + p->flags.size - 4;
743      cu_->compiler_driver->AddMethodPatch(cu_->dex_file, cu_->class_def_idx,
744                                           cu_->method_idx, cu_->invoke_type,
745                                           target_method_idx, target_dex_file,
746                                           static_cast<InvokeType>(p->operands[4]),
747                                           patch_offset);
748  }
749
750  // Handle the fixups for class types.
751  for (uint32_t i = 0; i < class_type_address_insns_.Size(); i++) {
752      LIR* p = class_type_address_insns_.Get(i);
753      DCHECK_EQ(p->opcode, kX86Mov32RI);
754      uint32_t target_method_idx = p->operands[2];
755
756      // The offset to patch is the last 4 bytes of the instruction.
757      int patch_offset = p->offset + p->flags.size - 4;
758      cu_->compiler_driver->AddClassPatch(cu_->dex_file, cu_->class_def_idx,
759                                          cu_->method_idx, target_method_idx, patch_offset);
760  }
761
762  // And now the PC-relative calls to methods.
763  for (uint32_t i = 0; i < call_method_insns_.Size(); i++) {
764      LIR* p = call_method_insns_.Get(i);
765      DCHECK_EQ(p->opcode, kX86CallI);
766      uint32_t target_method_idx = p->operands[1];
767      const DexFile* target_dex_file =
768          reinterpret_cast<const DexFile*>(UnwrapPointer(p->operands[2]));
769
770      // The offset to patch is the last 4 bytes of the instruction.
771      int patch_offset = p->offset + p->flags.size - 4;
772      cu_->compiler_driver->AddRelativeCodePatch(cu_->dex_file, cu_->class_def_idx,
773                                                 cu_->method_idx, cu_->invoke_type,
774                                                 target_method_idx, target_dex_file,
775                                                 static_cast<InvokeType>(p->operands[3]),
776                                                 patch_offset, -4 /* offset */);
777  }
778
779  // And do the normal processing.
780  Mir2Lir::InstallLiteralPools();
781}
782
783/*
784 * Fast string.index_of(I) & (II).  Inline check for simple case of char <= 0xffff,
785 * otherwise bails to standard library code.
786 */
787bool X86Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) {
788  ClobberCallerSave();
789  LockCallTemps();  // Using fixed registers
790
791  // EAX: 16 bit character being searched.
792  // ECX: count: number of words to be searched.
793  // EDI: String being searched.
794  // EDX: temporary during execution.
795  // EBX: temporary during execution.
796
797  RegLocation rl_obj = info->args[0];
798  RegLocation rl_char = info->args[1];
799  RegLocation rl_start;  // Note: only present in III flavor or IndexOf.
800
801  uint32_t char_value =
802    rl_char.is_const ? mir_graph_->ConstantValue(rl_char.orig_sreg) : 0;
803
804  if (char_value > 0xFFFF) {
805    // We have to punt to the real String.indexOf.
806    return false;
807  }
808
809  // Okay, we are commited to inlining this.
810  RegLocation rl_return = GetReturn(false);
811  RegLocation rl_dest = InlineTarget(info);
812
813  // Is the string non-NULL?
814  LoadValueDirectFixed(rl_obj, rs_rDX);
815  GenNullCheck(rs_rDX, info->opt_flags);
816  info->opt_flags |= MIR_IGNORE_NULL_CHECK;  // Record that we've null checked.
817
818  // Does the character fit in 16 bits?
819  LIR* slowpath_branch = nullptr;
820  if (rl_char.is_const) {
821    // We need the value in EAX.
822    LoadConstantNoClobber(rs_rAX, char_value);
823  } else {
824    // Character is not a constant; compare at runtime.
825    LoadValueDirectFixed(rl_char, rs_rAX);
826    slowpath_branch = OpCmpImmBranch(kCondGt, rs_rAX, 0xFFFF, nullptr);
827  }
828
829  // From here down, we know that we are looking for a char that fits in 16 bits.
830  // Location of reference to data array within the String object.
831  int value_offset = mirror::String::ValueOffset().Int32Value();
832  // Location of count within the String object.
833  int count_offset = mirror::String::CountOffset().Int32Value();
834  // Starting offset within data array.
835  int offset_offset = mirror::String::OffsetOffset().Int32Value();
836  // Start of char data with array_.
837  int data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value();
838
839  // Character is in EAX.
840  // Object pointer is in EDX.
841
842  // We need to preserve EDI, but have no spare registers, so push it on the stack.
843  // We have to remember that all stack addresses after this are offset by sizeof(EDI).
844  NewLIR1(kX86Push32R, rs_rDI.GetReg());
845
846  // Compute the number of words to search in to rCX.
847  Load32Disp(rs_rDX, count_offset, rs_rCX);
848  LIR *length_compare = nullptr;
849  int start_value = 0;
850  bool is_index_on_stack = false;
851  if (zero_based) {
852    // We have to handle an empty string.  Use special instruction JECXZ.
853    length_compare = NewLIR0(kX86Jecxz8);
854  } else {
855    rl_start = info->args[2];
856    // We have to offset by the start index.
857    if (rl_start.is_const) {
858      start_value = mir_graph_->ConstantValue(rl_start.orig_sreg);
859      start_value = std::max(start_value, 0);
860
861      // Is the start > count?
862      length_compare = OpCmpImmBranch(kCondLe, rs_rCX, start_value, nullptr);
863
864      if (start_value != 0) {
865        OpRegImm(kOpSub, rs_rCX, start_value);
866      }
867    } else {
868      // Runtime start index.
869      rl_start = UpdateLocTyped(rl_start, kCoreReg);
870      if (rl_start.location == kLocPhysReg) {
871        // Handle "start index < 0" case.
872        OpRegReg(kOpXor, rs_rBX, rs_rBX);
873        OpRegReg(kOpCmp, rl_start.reg, rs_rBX);
874        OpCondRegReg(kOpCmov, kCondLt, rl_start.reg, rs_rBX);
875
876        // The length of the string should be greater than the start index.
877        length_compare = OpCmpBranch(kCondLe, rs_rCX, rl_start.reg, nullptr);
878        OpRegReg(kOpSub, rs_rCX, rl_start.reg);
879        if (rl_start.reg == rs_rDI) {
880          // The special case. We will use EDI further, so lets put start index to stack.
881          NewLIR1(kX86Push32R, rs_rDI.GetReg());
882          is_index_on_stack = true;
883        }
884      } else {
885        // Load the start index from stack, remembering that we pushed EDI.
886        int displacement = SRegOffset(rl_start.s_reg_low) + sizeof(uint32_t);
887        Load32Disp(rs_rX86_SP, displacement, rs_rBX);
888        OpRegReg(kOpXor, rs_rDI, rs_rDI);
889        OpRegReg(kOpCmp, rs_rBX, rs_rDI);
890        OpCondRegReg(kOpCmov, kCondLt, rs_rBX, rs_rDI);
891
892        length_compare = OpCmpBranch(kCondLe, rs_rCX, rs_rBX, nullptr);
893        OpRegReg(kOpSub, rs_rCX, rs_rBX);
894        // Put the start index to stack.
895        NewLIR1(kX86Push32R, rs_rBX.GetReg());
896        is_index_on_stack = true;
897      }
898    }
899  }
900  DCHECK(length_compare != nullptr);
901
902  // ECX now contains the count in words to be searched.
903
904  // Load the address of the string into EBX.
905  // The string starts at VALUE(String) + 2 * OFFSET(String) + DATA_OFFSET.
906  Load32Disp(rs_rDX, value_offset, rs_rDI);
907  Load32Disp(rs_rDX, offset_offset, rs_rBX);
908  OpLea(rs_rBX, rs_rDI, rs_rBX, 1, data_offset);
909
910  // Now compute into EDI where the search will start.
911  if (zero_based || rl_start.is_const) {
912    if (start_value == 0) {
913      OpRegCopy(rs_rDI, rs_rBX);
914    } else {
915      NewLIR3(kX86Lea32RM, rs_rDI.GetReg(), rs_rBX.GetReg(), 2 * start_value);
916    }
917  } else {
918    if (is_index_on_stack == true) {
919      // Load the start index from stack.
920      NewLIR1(kX86Pop32R, rs_rDX.GetReg());
921      OpLea(rs_rDI, rs_rBX, rs_rDX, 1, 0);
922    } else {
923      OpLea(rs_rDI, rs_rBX, rl_start.reg, 1, 0);
924    }
925  }
926
927  // EDI now contains the start of the string to be searched.
928  // We are all prepared to do the search for the character.
929  NewLIR0(kX86RepneScasw);
930
931  // Did we find a match?
932  LIR* failed_branch = OpCondBranch(kCondNe, nullptr);
933
934  // yes, we matched.  Compute the index of the result.
935  // index = ((curr_ptr - orig_ptr) / 2) - 1.
936  OpRegReg(kOpSub, rs_rDI, rs_rBX);
937  OpRegImm(kOpAsr, rs_rDI, 1);
938  NewLIR3(kX86Lea32RM, rl_return.reg.GetReg(), rs_rDI.GetReg(), -1);
939  LIR *all_done = NewLIR1(kX86Jmp8, 0);
940
941  // Failed to match; return -1.
942  LIR *not_found = NewLIR0(kPseudoTargetLabel);
943  length_compare->target = not_found;
944  failed_branch->target = not_found;
945  LoadConstantNoClobber(rl_return.reg, -1);
946
947  // And join up at the end.
948  all_done->target = NewLIR0(kPseudoTargetLabel);
949  // Restore EDI from the stack.
950  NewLIR1(kX86Pop32R, rs_rDI.GetReg());
951
952  // Out of line code returns here.
953  if (slowpath_branch != nullptr) {
954    LIR *return_point = NewLIR0(kPseudoTargetLabel);
955    AddIntrinsicSlowPath(info, slowpath_branch, return_point);
956  }
957
958  StoreValue(rl_dest, rl_return);
959  return true;
960}
961
962/*
963 * @brief Enter a 32 bit quantity into the FDE buffer
964 * @param buf FDE buffer.
965 * @param data Data value.
966 */
967static void PushWord(std::vector<uint8_t>&buf, int data) {
968  buf.push_back(data & 0xff);
969  buf.push_back((data >> 8) & 0xff);
970  buf.push_back((data >> 16) & 0xff);
971  buf.push_back((data >> 24) & 0xff);
972}
973
974/*
975 * @brief Enter an 'advance LOC' into the FDE buffer
976 * @param buf FDE buffer.
977 * @param increment Amount by which to increase the current location.
978 */
979static void AdvanceLoc(std::vector<uint8_t>&buf, uint32_t increment) {
980  if (increment < 64) {
981    // Encoding in opcode.
982    buf.push_back(0x1 << 6 | increment);
983  } else if (increment < 256) {
984    // Single byte delta.
985    buf.push_back(0x02);
986    buf.push_back(increment);
987  } else if (increment < 256 * 256) {
988    // Two byte delta.
989    buf.push_back(0x03);
990    buf.push_back(increment & 0xff);
991    buf.push_back((increment >> 8) & 0xff);
992  } else {
993    // Four byte delta.
994    buf.push_back(0x04);
995    PushWord(buf, increment);
996  }
997}
998
999
1000std::vector<uint8_t>* X86CFIInitialization() {
1001  return X86Mir2Lir::ReturnCommonCallFrameInformation();
1002}
1003
1004std::vector<uint8_t>* X86Mir2Lir::ReturnCommonCallFrameInformation() {
1005  std::vector<uint8_t>*cfi_info = new std::vector<uint8_t>;
1006
1007  // Length of the CIE (except for this field).
1008  PushWord(*cfi_info, 16);
1009
1010  // CIE id.
1011  PushWord(*cfi_info, 0xFFFFFFFFU);
1012
1013  // Version: 3.
1014  cfi_info->push_back(0x03);
1015
1016  // Augmentation: empty string.
1017  cfi_info->push_back(0x0);
1018
1019  // Code alignment: 1.
1020  cfi_info->push_back(0x01);
1021
1022  // Data alignment: -4.
1023  cfi_info->push_back(0x7C);
1024
1025  // Return address register (R8).
1026  cfi_info->push_back(0x08);
1027
1028  // Initial return PC is 4(ESP): DW_CFA_def_cfa R4 4.
1029  cfi_info->push_back(0x0C);
1030  cfi_info->push_back(0x04);
1031  cfi_info->push_back(0x04);
1032
1033  // Return address location: 0(SP): DW_CFA_offset R8 1 (* -4);.
1034  cfi_info->push_back(0x2 << 6 | 0x08);
1035  cfi_info->push_back(0x01);
1036
1037  // And 2 Noops to align to 4 byte boundary.
1038  cfi_info->push_back(0x0);
1039  cfi_info->push_back(0x0);
1040
1041  DCHECK_EQ(cfi_info->size() & 3, 0U);
1042  return cfi_info;
1043}
1044
1045static void EncodeUnsignedLeb128(std::vector<uint8_t>& buf, uint32_t value) {
1046  uint8_t buffer[12];
1047  uint8_t *ptr = EncodeUnsignedLeb128(buffer, value);
1048  for (uint8_t *p = buffer; p < ptr; p++) {
1049    buf.push_back(*p);
1050  }
1051}
1052
1053std::vector<uint8_t>* X86Mir2Lir::ReturnCallFrameInformation() {
1054  std::vector<uint8_t>*cfi_info = new std::vector<uint8_t>;
1055
1056  // Generate the FDE for the method.
1057  DCHECK_NE(data_offset_, 0U);
1058
1059  // Length (will be filled in later in this routine).
1060  PushWord(*cfi_info, 0);
1061
1062  // CIE_pointer (can be filled in by linker); might be left at 0 if there is only
1063  // one CIE for the whole debug_frame section.
1064  PushWord(*cfi_info, 0);
1065
1066  // 'initial_location' (filled in by linker).
1067  PushWord(*cfi_info, 0);
1068
1069  // 'address_range' (number of bytes in the method).
1070  PushWord(*cfi_info, data_offset_);
1071
1072  // The instructions in the FDE.
1073  if (stack_decrement_ != nullptr) {
1074    // Advance LOC to just past the stack decrement.
1075    uint32_t pc = NEXT_LIR(stack_decrement_)->offset;
1076    AdvanceLoc(*cfi_info, pc);
1077
1078    // Now update the offset to the call frame: DW_CFA_def_cfa_offset frame_size.
1079    cfi_info->push_back(0x0e);
1080    EncodeUnsignedLeb128(*cfi_info, frame_size_);
1081
1082    // We continue with that stack until the epilogue.
1083    if (stack_increment_ != nullptr) {
1084      uint32_t new_pc = NEXT_LIR(stack_increment_)->offset;
1085      AdvanceLoc(*cfi_info, new_pc - pc);
1086
1087      // We probably have code snippets after the epilogue, so save the
1088      // current state: DW_CFA_remember_state.
1089      cfi_info->push_back(0x0a);
1090
1091      // We have now popped the stack: DW_CFA_def_cfa_offset 4.  There is only the return
1092      // PC on the stack now.
1093      cfi_info->push_back(0x0e);
1094      EncodeUnsignedLeb128(*cfi_info, 4);
1095
1096      // Everything after that is the same as before the epilogue.
1097      // Stack bump was followed by RET instruction.
1098      LIR *post_ret_insn = NEXT_LIR(NEXT_LIR(stack_increment_));
1099      if (post_ret_insn != nullptr) {
1100        pc = new_pc;
1101        new_pc = post_ret_insn->offset;
1102        AdvanceLoc(*cfi_info, new_pc - pc);
1103        // Restore the state: DW_CFA_restore_state.
1104        cfi_info->push_back(0x0b);
1105      }
1106    }
1107  }
1108
1109  // Padding to a multiple of 4
1110  while ((cfi_info->size() & 3) != 0) {
1111    // DW_CFA_nop is encoded as 0.
1112    cfi_info->push_back(0);
1113  }
1114
1115  // Set the length of the FDE inside the generated bytes.
1116  uint32_t length = cfi_info->size() - 4;
1117  (*cfi_info)[0] = length;
1118  (*cfi_info)[1] = length >> 8;
1119  (*cfi_info)[2] = length >> 16;
1120  (*cfi_info)[3] = length >> 24;
1121  return cfi_info;
1122}
1123
1124}  // namespace art
1125