target_x86.cc revision 05d3aeb33683b16837741f9348d6fba9a8432068
1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <string>
18#include <inttypes.h>
19
20#include "codegen_x86.h"
21#include "dex/compiler_internals.h"
22#include "dex/quick/mir_to_lir-inl.h"
23#include "mirror/array.h"
24#include "mirror/string.h"
25#include "x86_lir.h"
26
27namespace art {
28
29static const RegStorage core_regs_arr[] = {
30    rs_rAX, rs_rCX, rs_rDX, rs_rBX, rs_rX86_SP, rs_rBP, rs_rSI, rs_rDI
31#ifdef TARGET_REX_SUPPORT
32    rs_r8, rs_r9, rs_r10, rs_r11, rs_r12, rs_r13, rs_r14, rs_r15
33#endif
34};
35static const RegStorage sp_regs_arr[] = {
36    rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
37#ifdef TARGET_REX_SUPPORT
38    rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15
39#endif
40};
41static const RegStorage dp_regs_arr[] = {
42    rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
43#ifdef TARGET_REX_SUPPORT
44    rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15
45#endif
46};
47static const RegStorage reserved_regs_arr[] = {rs_rX86_SP};
48static const RegStorage core_temps_arr[] = {rs_rAX, rs_rCX, rs_rDX, rs_rBX};
49static const RegStorage sp_temps_arr[] = {
50    rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
51#ifdef TARGET_REX_SUPPORT
52    rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15
53#endif
54};
55static const RegStorage dp_temps_arr[] = {
56    rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
57#ifdef TARGET_REX_SUPPORT
58    rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15
59#endif
60};
61
62static const std::vector<RegStorage> core_regs(core_regs_arr,
63    core_regs_arr + sizeof(core_regs_arr) / sizeof(core_regs_arr[0]));
64static const std::vector<RegStorage> sp_regs(sp_regs_arr,
65    sp_regs_arr + sizeof(sp_regs_arr) / sizeof(sp_regs_arr[0]));
66static const std::vector<RegStorage> dp_regs(dp_regs_arr,
67    dp_regs_arr + sizeof(dp_regs_arr) / sizeof(dp_regs_arr[0]));
68static const std::vector<RegStorage> reserved_regs(reserved_regs_arr,
69    reserved_regs_arr + sizeof(reserved_regs_arr) / sizeof(reserved_regs_arr[0]));
70static const std::vector<RegStorage> core_temps(core_temps_arr,
71    core_temps_arr + sizeof(core_temps_arr) / sizeof(core_temps_arr[0]));
72static const std::vector<RegStorage> sp_temps(sp_temps_arr,
73    sp_temps_arr + sizeof(sp_temps_arr) / sizeof(sp_temps_arr[0]));
74static const std::vector<RegStorage> dp_temps(dp_temps_arr,
75    dp_temps_arr + sizeof(dp_temps_arr) / sizeof(dp_temps_arr[0]));
76
77RegLocation X86Mir2Lir::LocCReturn() {
78  return x86_loc_c_return;
79}
80
81RegLocation X86Mir2Lir::LocCReturnWide() {
82  return x86_loc_c_return_wide;
83}
84
85RegLocation X86Mir2Lir::LocCReturnFloat() {
86  return x86_loc_c_return_float;
87}
88
89RegLocation X86Mir2Lir::LocCReturnDouble() {
90  return x86_loc_c_return_double;
91}
92
93// Return a target-dependent special register.
94RegStorage X86Mir2Lir::TargetReg(SpecialTargetRegister reg) {
95  RegStorage res_reg = RegStorage::InvalidReg();
96  switch (reg) {
97    case kSelf: res_reg = RegStorage::InvalidReg(); break;
98    case kSuspend: res_reg =  RegStorage::InvalidReg(); break;
99    case kLr: res_reg =  RegStorage::InvalidReg(); break;
100    case kPc: res_reg =  RegStorage::InvalidReg(); break;
101    case kSp: res_reg =  rs_rX86_SP; break;
102    case kArg0: res_reg = rs_rX86_ARG0; break;
103    case kArg1: res_reg = rs_rX86_ARG1; break;
104    case kArg2: res_reg = rs_rX86_ARG2; break;
105    case kArg3: res_reg = rs_rX86_ARG3; break;
106    case kFArg0: res_reg = rs_rX86_FARG0; break;
107    case kFArg1: res_reg = rs_rX86_FARG1; break;
108    case kFArg2: res_reg = rs_rX86_FARG2; break;
109    case kFArg3: res_reg = rs_rX86_FARG3; break;
110    case kRet0: res_reg = rs_rX86_RET0; break;
111    case kRet1: res_reg = rs_rX86_RET1; break;
112    case kInvokeTgt: res_reg = rs_rX86_INVOKE_TGT; break;
113    case kHiddenArg: res_reg = rs_rAX; break;
114    case kHiddenFpArg: res_reg = rs_fr0; break;
115    case kCount: res_reg = rs_rX86_COUNT; break;
116  }
117  return res_reg;
118}
119
120RegStorage X86Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
121  // For the 32-bit internal ABI, the first 3 arguments are passed in registers.
122  // TODO: This is not 64-bit compliant and depends on new internal ABI.
123  switch (arg_num) {
124    case 0:
125      return rs_rX86_ARG1;
126    case 1:
127      return rs_rX86_ARG2;
128    case 2:
129      return rs_rX86_ARG3;
130    default:
131      return RegStorage::InvalidReg();
132  }
133}
134
135/*
136 * Decode the register id.
137 */
138uint64_t X86Mir2Lir::GetRegMaskCommon(RegStorage reg) {
139  uint64_t seed;
140  int shift;
141  int reg_id;
142
143  reg_id = reg.GetRegNum();
144  /* Double registers in x86 are just a single FP register */
145  seed = 1;
146  /* FP register starts at bit position 16 */
147  shift = reg.IsFloat() ? kX86FPReg0 : 0;
148  /* Expand the double register id into single offset */
149  shift += reg_id;
150  return (seed << shift);
151}
152
153uint64_t X86Mir2Lir::GetPCUseDefEncoding() {
154  /*
155   * FIXME: might make sense to use a virtual resource encoding bit for pc.  Might be
156   * able to clean up some of the x86/Arm_Mips differences
157   */
158  LOG(FATAL) << "Unexpected call to GetPCUseDefEncoding for x86";
159  return 0ULL;
160}
161
162void X86Mir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags) {
163  DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64);
164  DCHECK(!lir->flags.use_def_invalid);
165
166  // X86-specific resource map setup here.
167  if (flags & REG_USE_SP) {
168    lir->u.m.use_mask |= ENCODE_X86_REG_SP;
169  }
170
171  if (flags & REG_DEF_SP) {
172    lir->u.m.def_mask |= ENCODE_X86_REG_SP;
173  }
174
175  if (flags & REG_DEFA) {
176    SetupRegMask(&lir->u.m.def_mask, rs_rAX.GetReg());
177  }
178
179  if (flags & REG_DEFD) {
180    SetupRegMask(&lir->u.m.def_mask, rs_rDX.GetReg());
181  }
182  if (flags & REG_USEA) {
183    SetupRegMask(&lir->u.m.use_mask, rs_rAX.GetReg());
184  }
185
186  if (flags & REG_USEC) {
187    SetupRegMask(&lir->u.m.use_mask, rs_rCX.GetReg());
188  }
189
190  if (flags & REG_USED) {
191    SetupRegMask(&lir->u.m.use_mask, rs_rDX.GetReg());
192  }
193
194  if (flags & REG_USEB) {
195    SetupRegMask(&lir->u.m.use_mask, rs_rBX.GetReg());
196  }
197
198  // Fixup hard to describe instruction: Uses rAX, rCX, rDI; sets rDI.
199  if (lir->opcode == kX86RepneScasw) {
200    SetupRegMask(&lir->u.m.use_mask, rs_rAX.GetReg());
201    SetupRegMask(&lir->u.m.use_mask, rs_rCX.GetReg());
202    SetupRegMask(&lir->u.m.use_mask, rs_rDI.GetReg());
203    SetupRegMask(&lir->u.m.def_mask, rs_rDI.GetReg());
204  }
205
206  if (flags & USE_FP_STACK) {
207    lir->u.m.use_mask |= ENCODE_X86_FP_STACK;
208    lir->u.m.def_mask |= ENCODE_X86_FP_STACK;
209  }
210}
211
212/* For dumping instructions */
213static const char* x86RegName[] = {
214  "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi",
215  "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
216};
217
218static const char* x86CondName[] = {
219  "O",
220  "NO",
221  "B/NAE/C",
222  "NB/AE/NC",
223  "Z/EQ",
224  "NZ/NE",
225  "BE/NA",
226  "NBE/A",
227  "S",
228  "NS",
229  "P/PE",
230  "NP/PO",
231  "L/NGE",
232  "NL/GE",
233  "LE/NG",
234  "NLE/G"
235};
236
237/*
238 * Interpret a format string and build a string no longer than size
239 * See format key in Assemble.cc.
240 */
241std::string X86Mir2Lir::BuildInsnString(const char *fmt, LIR *lir, unsigned char* base_addr) {
242  std::string buf;
243  size_t i = 0;
244  size_t fmt_len = strlen(fmt);
245  while (i < fmt_len) {
246    if (fmt[i] != '!') {
247      buf += fmt[i];
248      i++;
249    } else {
250      i++;
251      DCHECK_LT(i, fmt_len);
252      char operand_number_ch = fmt[i];
253      i++;
254      if (operand_number_ch == '!') {
255        buf += "!";
256      } else {
257        int operand_number = operand_number_ch - '0';
258        DCHECK_LT(operand_number, 6);  // Expect upto 6 LIR operands.
259        DCHECK_LT(i, fmt_len);
260        int operand = lir->operands[operand_number];
261        switch (fmt[i]) {
262          case 'c':
263            DCHECK_LT(static_cast<size_t>(operand), sizeof(x86CondName));
264            buf += x86CondName[operand];
265            break;
266          case 'd':
267            buf += StringPrintf("%d", operand);
268            break;
269          case 'p': {
270            EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(operand));
271            buf += StringPrintf("0x%08x", tab_rec->offset);
272            break;
273          }
274          case 'r':
275            if (RegStorage::IsFloat(operand)) {
276              int fp_reg = RegStorage::RegNum(operand);
277              buf += StringPrintf("xmm%d", fp_reg);
278            } else {
279              int reg_num = RegStorage::RegNum(operand);
280              DCHECK_LT(static_cast<size_t>(reg_num), sizeof(x86RegName));
281              buf += x86RegName[reg_num];
282            }
283            break;
284          case 't':
285            buf += StringPrintf("0x%08" PRIxPTR " (L%p)",
286                                reinterpret_cast<uintptr_t>(base_addr) + lir->offset + operand,
287                                lir->target);
288            break;
289          default:
290            buf += StringPrintf("DecodeError '%c'", fmt[i]);
291            break;
292        }
293        i++;
294      }
295    }
296  }
297  return buf;
298}
299
300void X86Mir2Lir::DumpResourceMask(LIR *x86LIR, uint64_t mask, const char *prefix) {
301  char buf[256];
302  buf[0] = 0;
303
304  if (mask == ENCODE_ALL) {
305    strcpy(buf, "all");
306  } else {
307    char num[8];
308    int i;
309
310    for (i = 0; i < kX86RegEnd; i++) {
311      if (mask & (1ULL << i)) {
312        snprintf(num, arraysize(num), "%d ", i);
313        strcat(buf, num);
314      }
315    }
316
317    if (mask & ENCODE_CCODE) {
318      strcat(buf, "cc ");
319    }
320    /* Memory bits */
321    if (x86LIR && (mask & ENCODE_DALVIK_REG)) {
322      snprintf(buf + strlen(buf), arraysize(buf) - strlen(buf), "dr%d%s",
323               DECODE_ALIAS_INFO_REG(x86LIR->flags.alias_info),
324               (DECODE_ALIAS_INFO_WIDE(x86LIR->flags.alias_info)) ? "(+1)" : "");
325    }
326    if (mask & ENCODE_LITERAL) {
327      strcat(buf, "lit ");
328    }
329
330    if (mask & ENCODE_HEAP_REF) {
331      strcat(buf, "heap ");
332    }
333    if (mask & ENCODE_MUST_NOT_ALIAS) {
334      strcat(buf, "noalias ");
335    }
336  }
337  if (buf[0]) {
338    LOG(INFO) << prefix << ": " <<  buf;
339  }
340}
341
342void X86Mir2Lir::AdjustSpillMask() {
343  // Adjustment for LR spilling, x86 has no LR so nothing to do here
344  core_spill_mask_ |= (1 << rs_rRET.GetRegNum());
345  num_core_spills_++;
346}
347
348/*
349 * Mark a callee-save fp register as promoted.  Note that
350 * vpush/vpop uses contiguous register lists so we must
351 * include any holes in the mask.  Associate holes with
352 * Dalvik register INVALID_VREG (0xFFFFU).
353 */
354void X86Mir2Lir::MarkPreservedSingle(int v_reg, RegStorage reg) {
355  UNIMPLEMENTED(FATAL) << "MarkPreservedSingle";
356}
357
358void X86Mir2Lir::MarkPreservedDouble(int v_reg, RegStorage reg) {
359  UNIMPLEMENTED(FATAL) << "MarkPreservedDouble";
360}
361
362/* Clobber all regs that might be used by an external C call */
363void X86Mir2Lir::ClobberCallerSave() {
364  Clobber(rs_rAX);
365  Clobber(rs_rCX);
366  Clobber(rs_rDX);
367  Clobber(rs_rBX);
368}
369
370RegLocation X86Mir2Lir::GetReturnWideAlt() {
371  RegLocation res = LocCReturnWide();
372  DCHECK(res.reg.GetLowReg() == rs_rAX.GetReg());
373  DCHECK(res.reg.GetHighReg() == rs_rDX.GetReg());
374  Clobber(rs_rAX);
375  Clobber(rs_rDX);
376  MarkInUse(rs_rAX);
377  MarkInUse(rs_rDX);
378  MarkWide(res.reg);
379  return res;
380}
381
382RegLocation X86Mir2Lir::GetReturnAlt() {
383  RegLocation res = LocCReturn();
384  res.reg.SetReg(rs_rDX.GetReg());
385  Clobber(rs_rDX);
386  MarkInUse(rs_rDX);
387  return res;
388}
389
390/* To be used when explicitly managing register use */
391void X86Mir2Lir::LockCallTemps() {
392  LockTemp(rs_rX86_ARG0);
393  LockTemp(rs_rX86_ARG1);
394  LockTemp(rs_rX86_ARG2);
395  LockTemp(rs_rX86_ARG3);
396}
397
398/* To be used when explicitly managing register use */
399void X86Mir2Lir::FreeCallTemps() {
400  FreeTemp(rs_rX86_ARG0);
401  FreeTemp(rs_rX86_ARG1);
402  FreeTemp(rs_rX86_ARG2);
403  FreeTemp(rs_rX86_ARG3);
404}
405
406bool X86Mir2Lir::ProvidesFullMemoryBarrier(X86OpCode opcode) {
407    switch (opcode) {
408      case kX86LockCmpxchgMR:
409      case kX86LockCmpxchgAR:
410      case kX86LockCmpxchg8bM:
411      case kX86LockCmpxchg8bA:
412      case kX86XchgMR:
413      case kX86Mfence:
414        // Atomic memory instructions provide full barrier.
415        return true;
416      default:
417        break;
418    }
419
420    // Conservative if cannot prove it provides full barrier.
421    return false;
422}
423
424void X86Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
425#if ANDROID_SMP != 0
426  // Start off with using the last LIR as the barrier. If it is not enough, then we will update it.
427  LIR* mem_barrier = last_lir_insn_;
428
429  /*
430   * According to the JSR-133 Cookbook, for x86 only StoreLoad barriers need memory fence. All other barriers
431   * (LoadLoad, LoadStore, StoreStore) are nops due to the x86 memory model. For those cases, all we need
432   * to ensure is that there is a scheduling barrier in place.
433   */
434  if (barrier_kind == kStoreLoad) {
435    // If no LIR exists already that can be used a barrier, then generate an mfence.
436    if (mem_barrier == nullptr) {
437      mem_barrier = NewLIR0(kX86Mfence);
438    }
439
440    // If last instruction does not provide full barrier, then insert an mfence.
441    if (ProvidesFullMemoryBarrier(static_cast<X86OpCode>(mem_barrier->opcode)) == false) {
442      mem_barrier = NewLIR0(kX86Mfence);
443    }
444  }
445
446  // Now ensure that a scheduling barrier is in place.
447  if (mem_barrier == nullptr) {
448    GenBarrier();
449  } else {
450    // Mark as a scheduling barrier.
451    DCHECK(!mem_barrier->flags.use_def_invalid);
452    mem_barrier->u.m.def_mask = ENCODE_ALL;
453  }
454#endif
455}
456
457// Alloc a pair of core registers, or a double.
458RegStorage X86Mir2Lir::AllocTypedTempWide(bool fp_hint, int reg_class) {
459  if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) {
460    return AllocTempDouble();
461  }
462  RegStorage low_reg = AllocTemp();
463  RegStorage high_reg = AllocTemp();
464  return RegStorage::MakeRegPair(low_reg, high_reg);
465}
466
467RegStorage X86Mir2Lir::AllocTypedTemp(bool fp_hint, int reg_class) {
468  if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) {
469    return AllocTempSingle();
470  }
471  return AllocTemp();
472}
473
474void X86Mir2Lir::CompilerInitializeRegAlloc() {
475  reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs, sp_regs, dp_regs, reserved_regs,
476                                        core_temps, sp_temps, dp_temps);
477
478  // Target-specific adjustments.
479
480  // Alias single precision xmm to double xmms.
481  // TODO: as needed, add larger vector sizes - alias all to the largest.
482  GrowableArray<RegisterInfo*>::Iterator it(&reg_pool_->sp_regs_);
483  for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
484    int sp_reg_num = info->GetReg().GetRegNum();
485    RegStorage dp_reg = RegStorage::Solo64(RegStorage::kFloatingPoint | sp_reg_num);
486    RegisterInfo* dp_reg_info = GetRegInfo(dp_reg);
487    // 64-bit xmm vector register's master storage should refer to itself.
488    DCHECK_EQ(dp_reg_info, dp_reg_info->Master());
489    // Redirect 32-bit vector's master storage to 64-bit vector.
490    info->SetMaster(dp_reg_info);
491  }
492
493  // Don't start allocating temps at r0/s0/d0 or you may clobber return regs in early-exit methods.
494  // TODO: adjust for x86/hard float calling convention.
495  reg_pool_->next_core_reg_ = 2;
496  reg_pool_->next_sp_reg_ = 2;
497  reg_pool_->next_dp_reg_ = 1;
498}
499
500void X86Mir2Lir::SpillCoreRegs() {
501  if (num_core_spills_ == 0) {
502    return;
503  }
504  // Spill mask not including fake return address register
505  uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum());
506  int offset = frame_size_ - (4 * num_core_spills_);
507  for (int reg = 0; mask; mask >>= 1, reg++) {
508    if (mask & 0x1) {
509      StoreWordDisp(rs_rX86_SP, offset, RegStorage::Solo32(reg));
510      offset += 4;
511    }
512  }
513}
514
515void X86Mir2Lir::UnSpillCoreRegs() {
516  if (num_core_spills_ == 0) {
517    return;
518  }
519  // Spill mask not including fake return address register
520  uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum());
521  int offset = frame_size_ - (4 * num_core_spills_);
522  for (int reg = 0; mask; mask >>= 1, reg++) {
523    if (mask & 0x1) {
524      LoadWordDisp(rs_rX86_SP, offset, RegStorage::Solo32(reg));
525      offset += 4;
526    }
527  }
528}
529
530bool X86Mir2Lir::IsUnconditionalBranch(LIR* lir) {
531  return (lir->opcode == kX86Jmp8 || lir->opcode == kX86Jmp32);
532}
533
534bool X86Mir2Lir::SupportsVolatileLoadStore(OpSize size) {
535  return true;
536}
537
538RegisterClass X86Mir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volatile) {
539  if (UNLIKELY(is_volatile)) {
540    // On x86, atomic 64-bit load/store requires an fp register.
541    // Smaller aligned load/store is atomic for both core and fp registers.
542    if (size == k64 || size == kDouble) {
543      return kFPReg;
544    }
545  }
546  return RegClassBySize(size);
547}
548
549X86Mir2Lir::X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena)
550    : Mir2Lir(cu, mir_graph, arena),
551      base_of_code_(nullptr), store_method_addr_(false), store_method_addr_used_(false),
552      method_address_insns_(arena, 100, kGrowableArrayMisc),
553      class_type_address_insns_(arena, 100, kGrowableArrayMisc),
554      call_method_insns_(arena, 100, kGrowableArrayMisc),
555      stack_decrement_(nullptr), stack_increment_(nullptr) {
556  if (kIsDebugBuild) {
557    for (int i = 0; i < kX86Last; i++) {
558      if (X86Mir2Lir::EncodingMap[i].opcode != i) {
559        LOG(FATAL) << "Encoding order for " << X86Mir2Lir::EncodingMap[i].name
560            << " is wrong: expecting " << i << ", seeing "
561            << static_cast<int>(X86Mir2Lir::EncodingMap[i].opcode);
562      }
563    }
564  }
565}
566
567Mir2Lir* X86CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
568                          ArenaAllocator* const arena) {
569  return new X86Mir2Lir(cu, mir_graph, arena);
570}
571
572// Not used in x86
573RegStorage X86Mir2Lir::LoadHelper(ThreadOffset<4> offset) {
574  LOG(FATAL) << "Unexpected use of LoadHelper in x86";
575  return RegStorage::InvalidReg();
576}
577
578// Not used in x86
579RegStorage X86Mir2Lir::LoadHelper(ThreadOffset<8> offset) {
580  LOG(FATAL) << "Unexpected use of LoadHelper in x86";
581  return RegStorage::InvalidReg();
582}
583
584LIR* X86Mir2Lir::CheckSuspendUsingLoad() {
585  LOG(FATAL) << "Unexpected use of CheckSuspendUsingLoad in x86";
586  return nullptr;
587}
588
589uint64_t X86Mir2Lir::GetTargetInstFlags(int opcode) {
590  DCHECK(!IsPseudoLirOp(opcode));
591  return X86Mir2Lir::EncodingMap[opcode].flags;
592}
593
594const char* X86Mir2Lir::GetTargetInstName(int opcode) {
595  DCHECK(!IsPseudoLirOp(opcode));
596  return X86Mir2Lir::EncodingMap[opcode].name;
597}
598
599const char* X86Mir2Lir::GetTargetInstFmt(int opcode) {
600  DCHECK(!IsPseudoLirOp(opcode));
601  return X86Mir2Lir::EncodingMap[opcode].fmt;
602}
603
604void X86Mir2Lir::GenConstWide(RegLocation rl_dest, int64_t value) {
605  // Can we do this directly to memory?
606  rl_dest = UpdateLocWide(rl_dest);
607  if ((rl_dest.location == kLocDalvikFrame) ||
608      (rl_dest.location == kLocCompilerTemp)) {
609    int32_t val_lo = Low32Bits(value);
610    int32_t val_hi = High32Bits(value);
611    int r_base = TargetReg(kSp).GetReg();
612    int displacement = SRegOffset(rl_dest.s_reg_low);
613
614    LIR * store = NewLIR3(kX86Mov32MI, r_base, displacement + LOWORD_OFFSET, val_lo);
615    AnnotateDalvikRegAccess(store, (displacement + LOWORD_OFFSET) >> 2,
616                              false /* is_load */, true /* is64bit */);
617    store = NewLIR3(kX86Mov32MI, r_base, displacement + HIWORD_OFFSET, val_hi);
618    AnnotateDalvikRegAccess(store, (displacement + HIWORD_OFFSET) >> 2,
619                              false /* is_load */, true /* is64bit */);
620    return;
621  }
622
623  // Just use the standard code to do the generation.
624  Mir2Lir::GenConstWide(rl_dest, value);
625}
626
627// TODO: Merge with existing RegLocation dumper in vreg_analysis.cc
628void X86Mir2Lir::DumpRegLocation(RegLocation loc) {
629  LOG(INFO)  << "location: " << loc.location << ','
630             << (loc.wide ? " w" : "  ")
631             << (loc.defined ? " D" : "  ")
632             << (loc.is_const ? " c" : "  ")
633             << (loc.fp ? " F" : "  ")
634             << (loc.core ? " C" : "  ")
635             << (loc.ref ? " r" : "  ")
636             << (loc.high_word ? " h" : "  ")
637             << (loc.home ? " H" : "  ")
638             << ", low: " << static_cast<int>(loc.reg.GetLowReg())
639             << ", high: " << static_cast<int>(loc.reg.GetHighReg())
640             << ", s_reg: " << loc.s_reg_low
641             << ", orig: " << loc.orig_sreg;
642}
643
644void X86Mir2Lir::Materialize() {
645  // A good place to put the analysis before starting.
646  AnalyzeMIR();
647
648  // Now continue with regular code generation.
649  Mir2Lir::Materialize();
650}
651
652void X86Mir2Lir::LoadMethodAddress(const MethodReference& target_method, InvokeType type,
653                                   SpecialTargetRegister symbolic_reg) {
654  /*
655   * For x86, just generate a 32 bit move immediate instruction, that will be filled
656   * in at 'link time'.  For now, put a unique value based on target to ensure that
657   * code deduplication works.
658   */
659  int target_method_idx = target_method.dex_method_index;
660  const DexFile* target_dex_file = target_method.dex_file;
661  const DexFile::MethodId& target_method_id = target_dex_file->GetMethodId(target_method_idx);
662  uintptr_t target_method_id_ptr = reinterpret_cast<uintptr_t>(&target_method_id);
663
664  // Generate the move instruction with the unique pointer and save index, dex_file, and type.
665  LIR *move = RawLIR(current_dalvik_offset_, kX86Mov32RI, TargetReg(symbolic_reg).GetReg(),
666                     static_cast<int>(target_method_id_ptr), target_method_idx,
667                     WrapPointer(const_cast<DexFile*>(target_dex_file)), type);
668  AppendLIR(move);
669  method_address_insns_.Insert(move);
670}
671
672void X86Mir2Lir::LoadClassType(uint32_t type_idx, SpecialTargetRegister symbolic_reg) {
673  /*
674   * For x86, just generate a 32 bit move immediate instruction, that will be filled
675   * in at 'link time'.  For now, put a unique value based on target to ensure that
676   * code deduplication works.
677   */
678  const DexFile::TypeId& id = cu_->dex_file->GetTypeId(type_idx);
679  uintptr_t ptr = reinterpret_cast<uintptr_t>(&id);
680
681  // Generate the move instruction with the unique pointer and save index and type.
682  LIR *move = RawLIR(current_dalvik_offset_, kX86Mov32RI, TargetReg(symbolic_reg).GetReg(),
683                     static_cast<int>(ptr), type_idx);
684  AppendLIR(move);
685  class_type_address_insns_.Insert(move);
686}
687
688LIR *X86Mir2Lir::CallWithLinkerFixup(const MethodReference& target_method, InvokeType type) {
689  /*
690   * For x86, just generate a 32 bit call relative instruction, that will be filled
691   * in at 'link time'.  For now, put a unique value based on target to ensure that
692   * code deduplication works.
693   */
694  int target_method_idx = target_method.dex_method_index;
695  const DexFile* target_dex_file = target_method.dex_file;
696  const DexFile::MethodId& target_method_id = target_dex_file->GetMethodId(target_method_idx);
697  uintptr_t target_method_id_ptr = reinterpret_cast<uintptr_t>(&target_method_id);
698
699  // Generate the call instruction with the unique pointer and save index, dex_file, and type.
700  LIR *call = RawLIR(current_dalvik_offset_, kX86CallI, static_cast<int>(target_method_id_ptr),
701                     target_method_idx, WrapPointer(const_cast<DexFile*>(target_dex_file)), type);
702  AppendLIR(call);
703  call_method_insns_.Insert(call);
704  return call;
705}
706
707void X86Mir2Lir::InstallLiteralPools() {
708  // These are handled differently for x86.
709  DCHECK(code_literal_list_ == nullptr);
710  DCHECK(method_literal_list_ == nullptr);
711  DCHECK(class_literal_list_ == nullptr);
712
713  // Handle the fixups for methods.
714  for (uint32_t i = 0; i < method_address_insns_.Size(); i++) {
715      LIR* p = method_address_insns_.Get(i);
716      DCHECK_EQ(p->opcode, kX86Mov32RI);
717      uint32_t target_method_idx = p->operands[2];
718      const DexFile* target_dex_file =
719          reinterpret_cast<const DexFile*>(UnwrapPointer(p->operands[3]));
720
721      // The offset to patch is the last 4 bytes of the instruction.
722      int patch_offset = p->offset + p->flags.size - 4;
723      cu_->compiler_driver->AddMethodPatch(cu_->dex_file, cu_->class_def_idx,
724                                           cu_->method_idx, cu_->invoke_type,
725                                           target_method_idx, target_dex_file,
726                                           static_cast<InvokeType>(p->operands[4]),
727                                           patch_offset);
728  }
729
730  // Handle the fixups for class types.
731  for (uint32_t i = 0; i < class_type_address_insns_.Size(); i++) {
732      LIR* p = class_type_address_insns_.Get(i);
733      DCHECK_EQ(p->opcode, kX86Mov32RI);
734      uint32_t target_method_idx = p->operands[2];
735
736      // The offset to patch is the last 4 bytes of the instruction.
737      int patch_offset = p->offset + p->flags.size - 4;
738      cu_->compiler_driver->AddClassPatch(cu_->dex_file, cu_->class_def_idx,
739                                          cu_->method_idx, target_method_idx, patch_offset);
740  }
741
742  // And now the PC-relative calls to methods.
743  for (uint32_t i = 0; i < call_method_insns_.Size(); i++) {
744      LIR* p = call_method_insns_.Get(i);
745      DCHECK_EQ(p->opcode, kX86CallI);
746      uint32_t target_method_idx = p->operands[1];
747      const DexFile* target_dex_file =
748          reinterpret_cast<const DexFile*>(UnwrapPointer(p->operands[2]));
749
750      // The offset to patch is the last 4 bytes of the instruction.
751      int patch_offset = p->offset + p->flags.size - 4;
752      cu_->compiler_driver->AddRelativeCodePatch(cu_->dex_file, cu_->class_def_idx,
753                                                 cu_->method_idx, cu_->invoke_type,
754                                                 target_method_idx, target_dex_file,
755                                                 static_cast<InvokeType>(p->operands[3]),
756                                                 patch_offset, -4 /* offset */);
757  }
758
759  // And do the normal processing.
760  Mir2Lir::InstallLiteralPools();
761}
762
763/*
764 * Fast string.index_of(I) & (II).  Inline check for simple case of char <= 0xffff,
765 * otherwise bails to standard library code.
766 */
767bool X86Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) {
768  ClobberCallerSave();
769  LockCallTemps();  // Using fixed registers
770
771  // EAX: 16 bit character being searched.
772  // ECX: count: number of words to be searched.
773  // EDI: String being searched.
774  // EDX: temporary during execution.
775  // EBX: temporary during execution.
776
777  RegLocation rl_obj = info->args[0];
778  RegLocation rl_char = info->args[1];
779  RegLocation rl_start;  // Note: only present in III flavor or IndexOf.
780
781  uint32_t char_value =
782    rl_char.is_const ? mir_graph_->ConstantValue(rl_char.orig_sreg) : 0;
783
784  if (char_value > 0xFFFF) {
785    // We have to punt to the real String.indexOf.
786    return false;
787  }
788
789  // Okay, we are commited to inlining this.
790  RegLocation rl_return = GetReturn(false);
791  RegLocation rl_dest = InlineTarget(info);
792
793  // Is the string non-NULL?
794  LoadValueDirectFixed(rl_obj, rs_rDX);
795  GenNullCheck(rs_rDX, info->opt_flags);
796  info->opt_flags |= MIR_IGNORE_NULL_CHECK;  // Record that we've null checked.
797
798  // Does the character fit in 16 bits?
799  LIR* slowpath_branch = nullptr;
800  if (rl_char.is_const) {
801    // We need the value in EAX.
802    LoadConstantNoClobber(rs_rAX, char_value);
803  } else {
804    // Character is not a constant; compare at runtime.
805    LoadValueDirectFixed(rl_char, rs_rAX);
806    slowpath_branch = OpCmpImmBranch(kCondGt, rs_rAX, 0xFFFF, nullptr);
807  }
808
809  // From here down, we know that we are looking for a char that fits in 16 bits.
810  // Location of reference to data array within the String object.
811  int value_offset = mirror::String::ValueOffset().Int32Value();
812  // Location of count within the String object.
813  int count_offset = mirror::String::CountOffset().Int32Value();
814  // Starting offset within data array.
815  int offset_offset = mirror::String::OffsetOffset().Int32Value();
816  // Start of char data with array_.
817  int data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value();
818
819  // Character is in EAX.
820  // Object pointer is in EDX.
821
822  // We need to preserve EDI, but have no spare registers, so push it on the stack.
823  // We have to remember that all stack addresses after this are offset by sizeof(EDI).
824  NewLIR1(kX86Push32R, rs_rDI.GetReg());
825
826  // Compute the number of words to search in to rCX.
827  Load32Disp(rs_rDX, count_offset, rs_rCX);
828  LIR *length_compare = nullptr;
829  int start_value = 0;
830  bool is_index_on_stack = false;
831  if (zero_based) {
832    // We have to handle an empty string.  Use special instruction JECXZ.
833    length_compare = NewLIR0(kX86Jecxz8);
834  } else {
835    rl_start = info->args[2];
836    // We have to offset by the start index.
837    if (rl_start.is_const) {
838      start_value = mir_graph_->ConstantValue(rl_start.orig_sreg);
839      start_value = std::max(start_value, 0);
840
841      // Is the start > count?
842      length_compare = OpCmpImmBranch(kCondLe, rs_rCX, start_value, nullptr);
843
844      if (start_value != 0) {
845        OpRegImm(kOpSub, rs_rCX, start_value);
846      }
847    } else {
848      // Runtime start index.
849      rl_start = UpdateLocTyped(rl_start, kCoreReg);
850      if (rl_start.location == kLocPhysReg) {
851        // Handle "start index < 0" case.
852        OpRegReg(kOpXor, rs_rBX, rs_rBX);
853        OpRegReg(kOpCmp, rl_start.reg, rs_rBX);
854        OpCondRegReg(kOpCmov, kCondLt, rl_start.reg, rs_rBX);
855
856        // The length of the string should be greater than the start index.
857        length_compare = OpCmpBranch(kCondLe, rs_rCX, rl_start.reg, nullptr);
858        OpRegReg(kOpSub, rs_rCX, rl_start.reg);
859        if (rl_start.reg == rs_rDI) {
860          // The special case. We will use EDI further, so lets put start index to stack.
861          NewLIR1(kX86Push32R, rs_rDI.GetReg());
862          is_index_on_stack = true;
863        }
864      } else {
865        // Load the start index from stack, remembering that we pushed EDI.
866        int displacement = SRegOffset(rl_start.s_reg_low) + sizeof(uint32_t);
867        Load32Disp(rs_rX86_SP, displacement, rs_rBX);
868        OpRegReg(kOpXor, rs_rDI, rs_rDI);
869        OpRegReg(kOpCmp, rs_rBX, rs_rDI);
870        OpCondRegReg(kOpCmov, kCondLt, rs_rBX, rs_rDI);
871
872        length_compare = OpCmpBranch(kCondLe, rs_rCX, rs_rBX, nullptr);
873        OpRegReg(kOpSub, rs_rCX, rs_rBX);
874        // Put the start index to stack.
875        NewLIR1(kX86Push32R, rs_rBX.GetReg());
876        is_index_on_stack = true;
877      }
878    }
879  }
880  DCHECK(length_compare != nullptr);
881
882  // ECX now contains the count in words to be searched.
883
884  // Load the address of the string into EBX.
885  // The string starts at VALUE(String) + 2 * OFFSET(String) + DATA_OFFSET.
886  Load32Disp(rs_rDX, value_offset, rs_rDI);
887  Load32Disp(rs_rDX, offset_offset, rs_rBX);
888  OpLea(rs_rBX, rs_rDI, rs_rBX, 1, data_offset);
889
890  // Now compute into EDI where the search will start.
891  if (zero_based || rl_start.is_const) {
892    if (start_value == 0) {
893      OpRegCopy(rs_rDI, rs_rBX);
894    } else {
895      NewLIR3(kX86Lea32RM, rs_rDI.GetReg(), rs_rBX.GetReg(), 2 * start_value);
896    }
897  } else {
898    if (is_index_on_stack == true) {
899      // Load the start index from stack.
900      NewLIR1(kX86Pop32R, rs_rDX.GetReg());
901      OpLea(rs_rDI, rs_rBX, rs_rDX, 1, 0);
902    } else {
903      OpLea(rs_rDI, rs_rBX, rl_start.reg, 1, 0);
904    }
905  }
906
907  // EDI now contains the start of the string to be searched.
908  // We are all prepared to do the search for the character.
909  NewLIR0(kX86RepneScasw);
910
911  // Did we find a match?
912  LIR* failed_branch = OpCondBranch(kCondNe, nullptr);
913
914  // yes, we matched.  Compute the index of the result.
915  // index = ((curr_ptr - orig_ptr) / 2) - 1.
916  OpRegReg(kOpSub, rs_rDI, rs_rBX);
917  OpRegImm(kOpAsr, rs_rDI, 1);
918  NewLIR3(kX86Lea32RM, rl_return.reg.GetReg(), rs_rDI.GetReg(), -1);
919  LIR *all_done = NewLIR1(kX86Jmp8, 0);
920
921  // Failed to match; return -1.
922  LIR *not_found = NewLIR0(kPseudoTargetLabel);
923  length_compare->target = not_found;
924  failed_branch->target = not_found;
925  LoadConstantNoClobber(rl_return.reg, -1);
926
927  // And join up at the end.
928  all_done->target = NewLIR0(kPseudoTargetLabel);
929  // Restore EDI from the stack.
930  NewLIR1(kX86Pop32R, rs_rDI.GetReg());
931
932  // Out of line code returns here.
933  if (slowpath_branch != nullptr) {
934    LIR *return_point = NewLIR0(kPseudoTargetLabel);
935    AddIntrinsicSlowPath(info, slowpath_branch, return_point);
936  }
937
938  StoreValue(rl_dest, rl_return);
939  return true;
940}
941
942/*
943 * @brief Enter a 32 bit quantity into the FDE buffer
944 * @param buf FDE buffer.
945 * @param data Data value.
946 */
947static void PushWord(std::vector<uint8_t>&buf, int data) {
948  buf.push_back(data & 0xff);
949  buf.push_back((data >> 8) & 0xff);
950  buf.push_back((data >> 16) & 0xff);
951  buf.push_back((data >> 24) & 0xff);
952}
953
954/*
955 * @brief Enter an 'advance LOC' into the FDE buffer
956 * @param buf FDE buffer.
957 * @param increment Amount by which to increase the current location.
958 */
959static void AdvanceLoc(std::vector<uint8_t>&buf, uint32_t increment) {
960  if (increment < 64) {
961    // Encoding in opcode.
962    buf.push_back(0x1 << 6 | increment);
963  } else if (increment < 256) {
964    // Single byte delta.
965    buf.push_back(0x02);
966    buf.push_back(increment);
967  } else if (increment < 256 * 256) {
968    // Two byte delta.
969    buf.push_back(0x03);
970    buf.push_back(increment & 0xff);
971    buf.push_back((increment >> 8) & 0xff);
972  } else {
973    // Four byte delta.
974    buf.push_back(0x04);
975    PushWord(buf, increment);
976  }
977}
978
979
980std::vector<uint8_t>* X86CFIInitialization() {
981  return X86Mir2Lir::ReturnCommonCallFrameInformation();
982}
983
984std::vector<uint8_t>* X86Mir2Lir::ReturnCommonCallFrameInformation() {
985  std::vector<uint8_t>*cfi_info = new std::vector<uint8_t>;
986
987  // Length of the CIE (except for this field).
988  PushWord(*cfi_info, 16);
989
990  // CIE id.
991  PushWord(*cfi_info, 0xFFFFFFFFU);
992
993  // Version: 3.
994  cfi_info->push_back(0x03);
995
996  // Augmentation: empty string.
997  cfi_info->push_back(0x0);
998
999  // Code alignment: 1.
1000  cfi_info->push_back(0x01);
1001
1002  // Data alignment: -4.
1003  cfi_info->push_back(0x7C);
1004
1005  // Return address register (R8).
1006  cfi_info->push_back(0x08);
1007
1008  // Initial return PC is 4(ESP): DW_CFA_def_cfa R4 4.
1009  cfi_info->push_back(0x0C);
1010  cfi_info->push_back(0x04);
1011  cfi_info->push_back(0x04);
1012
1013  // Return address location: 0(SP): DW_CFA_offset R8 1 (* -4);.
1014  cfi_info->push_back(0x2 << 6 | 0x08);
1015  cfi_info->push_back(0x01);
1016
1017  // And 2 Noops to align to 4 byte boundary.
1018  cfi_info->push_back(0x0);
1019  cfi_info->push_back(0x0);
1020
1021  DCHECK_EQ(cfi_info->size() & 3, 0U);
1022  return cfi_info;
1023}
1024
1025static void EncodeUnsignedLeb128(std::vector<uint8_t>& buf, uint32_t value) {
1026  uint8_t buffer[12];
1027  uint8_t *ptr = EncodeUnsignedLeb128(buffer, value);
1028  for (uint8_t *p = buffer; p < ptr; p++) {
1029    buf.push_back(*p);
1030  }
1031}
1032
1033std::vector<uint8_t>* X86Mir2Lir::ReturnCallFrameInformation() {
1034  std::vector<uint8_t>*cfi_info = new std::vector<uint8_t>;
1035
1036  // Generate the FDE for the method.
1037  DCHECK_NE(data_offset_, 0U);
1038
1039  // Length (will be filled in later in this routine).
1040  PushWord(*cfi_info, 0);
1041
1042  // CIE_pointer (can be filled in by linker); might be left at 0 if there is only
1043  // one CIE for the whole debug_frame section.
1044  PushWord(*cfi_info, 0);
1045
1046  // 'initial_location' (filled in by linker).
1047  PushWord(*cfi_info, 0);
1048
1049  // 'address_range' (number of bytes in the method).
1050  PushWord(*cfi_info, data_offset_);
1051
1052  // The instructions in the FDE.
1053  if (stack_decrement_ != nullptr) {
1054    // Advance LOC to just past the stack decrement.
1055    uint32_t pc = NEXT_LIR(stack_decrement_)->offset;
1056    AdvanceLoc(*cfi_info, pc);
1057
1058    // Now update the offset to the call frame: DW_CFA_def_cfa_offset frame_size.
1059    cfi_info->push_back(0x0e);
1060    EncodeUnsignedLeb128(*cfi_info, frame_size_);
1061
1062    // We continue with that stack until the epilogue.
1063    if (stack_increment_ != nullptr) {
1064      uint32_t new_pc = NEXT_LIR(stack_increment_)->offset;
1065      AdvanceLoc(*cfi_info, new_pc - pc);
1066
1067      // We probably have code snippets after the epilogue, so save the
1068      // current state: DW_CFA_remember_state.
1069      cfi_info->push_back(0x0a);
1070
1071      // We have now popped the stack: DW_CFA_def_cfa_offset 4.  There is only the return
1072      // PC on the stack now.
1073      cfi_info->push_back(0x0e);
1074      EncodeUnsignedLeb128(*cfi_info, 4);
1075
1076      // Everything after that is the same as before the epilogue.
1077      // Stack bump was followed by RET instruction.
1078      LIR *post_ret_insn = NEXT_LIR(NEXT_LIR(stack_increment_));
1079      if (post_ret_insn != nullptr) {
1080        pc = new_pc;
1081        new_pc = post_ret_insn->offset;
1082        AdvanceLoc(*cfi_info, new_pc - pc);
1083        // Restore the state: DW_CFA_restore_state.
1084        cfi_info->push_back(0x0b);
1085      }
1086    }
1087  }
1088
1089  // Padding to a multiple of 4
1090  while ((cfi_info->size() & 3) != 0) {
1091    // DW_CFA_nop is encoded as 0.
1092    cfi_info->push_back(0);
1093  }
1094
1095  // Set the length of the FDE inside the generated bytes.
1096  uint32_t length = cfi_info->size() - 4;
1097  (*cfi_info)[0] = length;
1098  (*cfi_info)[1] = length >> 8;
1099  (*cfi_info)[2] = length >> 16;
1100  (*cfi_info)[3] = length >> 24;
1101  return cfi_info;
1102}
1103
1104}  // namespace art
1105