target_x86.cc revision e87f9b5185379c8cf8392d65a63e7bf7e51b97e7
1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <string>
18#include <inttypes.h>
19
20#include "codegen_x86.h"
21#include "dex/compiler_internals.h"
22#include "dex/quick/mir_to_lir-inl.h"
23#include "mirror/array.h"
24#include "mirror/string.h"
25#include "x86_lir.h"
26
27namespace art {
28
29static const RegStorage core_regs_arr_32[] = {
30    rs_rAX, rs_rCX, rs_rDX, rs_rBX, rs_rX86_SP_32, rs_rBP, rs_rSI, rs_rDI,
31};
32static const RegStorage core_regs_arr_64[] = {
33    rs_rAX, rs_rCX, rs_rDX, rs_rBX, rs_rX86_SP_64, rs_rBP, rs_rSI, rs_rDI,
34#ifdef TARGET_REX_SUPPORT
35    rs_r8, rs_r9, rs_r10, rs_r11, rs_r12, rs_r13, rs_r14, rs_r15
36#endif
37};
38static const RegStorage sp_regs_arr_32[] = {
39    rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
40};
41static const RegStorage sp_regs_arr_64[] = {
42    rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
43#ifdef TARGET_REX_SUPPORT
44    rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15
45#endif
46};
47static const RegStorage dp_regs_arr_32[] = {
48    rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
49};
50static const RegStorage dp_regs_arr_64[] = {
51    rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
52#ifdef TARGET_REX_SUPPORT
53    rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15
54#endif
55};
56static const RegStorage reserved_regs_arr_32[] = {rs_rX86_SP_32};
57static const RegStorage reserved_regs_arr_64[] = {rs_rX86_SP_64};
58static const RegStorage core_temps_arr_32[] = {rs_rAX, rs_rCX, rs_rDX, rs_rBX};
59static const RegStorage core_temps_arr_64[] = {
60    rs_rAX, rs_rCX, rs_rDX, rs_rSI, rs_rDI,
61#ifdef TARGET_REX_SUPPORT
62    rs_r8, rs_r9, rs_r10, rs_r11
63#endif
64};
65static const RegStorage sp_temps_arr_32[] = {
66    rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
67};
68static const RegStorage sp_temps_arr_64[] = {
69    rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
70#ifdef TARGET_REX_SUPPORT
71    rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15
72#endif
73};
74static const RegStorage dp_temps_arr_32[] = {
75    rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
76};
77static const RegStorage dp_temps_arr_64[] = {
78    rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
79#ifdef TARGET_REX_SUPPORT
80    rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15
81#endif
82};
83
84static const std::vector<RegStorage> core_regs_32(core_regs_arr_32,
85    core_regs_arr_32 + sizeof(core_regs_arr_32) / sizeof(core_regs_arr_32[0]));
86static const std::vector<RegStorage> core_regs_64(core_regs_arr_64,
87    core_regs_arr_64 + sizeof(core_regs_arr_64) / sizeof(core_regs_arr_64[0]));
88static const std::vector<RegStorage> sp_regs_32(sp_regs_arr_32,
89    sp_regs_arr_32 + sizeof(sp_regs_arr_32) / sizeof(sp_regs_arr_32[0]));
90static const std::vector<RegStorage> sp_regs_64(sp_regs_arr_64,
91    sp_regs_arr_64 + sizeof(sp_regs_arr_64) / sizeof(sp_regs_arr_64[0]));
92static const std::vector<RegStorage> dp_regs_32(dp_regs_arr_32,
93    dp_regs_arr_32 + sizeof(dp_regs_arr_32) / sizeof(dp_regs_arr_32[0]));
94static const std::vector<RegStorage> dp_regs_64(dp_regs_arr_64,
95    dp_regs_arr_64 + sizeof(dp_regs_arr_64) / sizeof(dp_regs_arr_64[0]));
96static const std::vector<RegStorage> reserved_regs_32(reserved_regs_arr_32,
97    reserved_regs_arr_32 + sizeof(reserved_regs_arr_32) / sizeof(reserved_regs_arr_32[0]));
98static const std::vector<RegStorage> reserved_regs_64(reserved_regs_arr_64,
99    reserved_regs_arr_64 + sizeof(reserved_regs_arr_64) / sizeof(reserved_regs_arr_64[0]));
100static const std::vector<RegStorage> core_temps_32(core_temps_arr_32,
101    core_temps_arr_32 + sizeof(core_temps_arr_32) / sizeof(core_temps_arr_32[0]));
102static const std::vector<RegStorage> core_temps_64(core_temps_arr_64,
103    core_temps_arr_64 + sizeof(core_temps_arr_64) / sizeof(core_temps_arr_64[0]));
104static const std::vector<RegStorage> sp_temps_32(sp_temps_arr_32,
105    sp_temps_arr_32 + sizeof(sp_temps_arr_32) / sizeof(sp_temps_arr_32[0]));
106static const std::vector<RegStorage> sp_temps_64(sp_temps_arr_64,
107    sp_temps_arr_64 + sizeof(sp_temps_arr_64) / sizeof(sp_temps_arr_64[0]));
108static const std::vector<RegStorage> dp_temps_32(dp_temps_arr_32,
109    dp_temps_arr_32 + sizeof(dp_temps_arr_32) / sizeof(dp_temps_arr_32[0]));
110static const std::vector<RegStorage> dp_temps_64(dp_temps_arr_64,
111    dp_temps_arr_64 + sizeof(dp_temps_arr_64) / sizeof(dp_temps_arr_64[0]));
112
113RegStorage rs_rX86_SP;
114
115X86NativeRegisterPool rX86_ARG0;
116X86NativeRegisterPool rX86_ARG1;
117X86NativeRegisterPool rX86_ARG2;
118X86NativeRegisterPool rX86_ARG3;
119X86NativeRegisterPool rX86_FARG0;
120X86NativeRegisterPool rX86_FARG1;
121X86NativeRegisterPool rX86_FARG2;
122X86NativeRegisterPool rX86_FARG3;
123X86NativeRegisterPool rX86_RET0;
124X86NativeRegisterPool rX86_RET1;
125X86NativeRegisterPool rX86_INVOKE_TGT;
126X86NativeRegisterPool rX86_COUNT;
127
128RegStorage rs_rX86_ARG0;
129RegStorage rs_rX86_ARG1;
130RegStorage rs_rX86_ARG2;
131RegStorage rs_rX86_ARG3;
132RegStorage rs_rX86_FARG0;
133RegStorage rs_rX86_FARG1;
134RegStorage rs_rX86_FARG2;
135RegStorage rs_rX86_FARG3;
136RegStorage rs_rX86_RET0;
137RegStorage rs_rX86_RET1;
138RegStorage rs_rX86_INVOKE_TGT;
139RegStorage rs_rX86_COUNT;
140
141RegLocation X86Mir2Lir::LocCReturn() {
142  return x86_loc_c_return;
143}
144
145RegLocation X86Mir2Lir::LocCReturnWide() {
146  return x86_loc_c_return_wide;
147}
148
149RegLocation X86Mir2Lir::LocCReturnFloat() {
150  return x86_loc_c_return_float;
151}
152
153RegLocation X86Mir2Lir::LocCReturnDouble() {
154  return x86_loc_c_return_double;
155}
156
157// Return a target-dependent special register.
158RegStorage X86Mir2Lir::TargetReg(SpecialTargetRegister reg) {
159  RegStorage res_reg = RegStorage::InvalidReg();
160  switch (reg) {
161    case kSelf: res_reg = RegStorage::InvalidReg(); break;
162    case kSuspend: res_reg =  RegStorage::InvalidReg(); break;
163    case kLr: res_reg =  RegStorage::InvalidReg(); break;
164    case kPc: res_reg =  RegStorage::InvalidReg(); break;
165    case kSp: res_reg =  rs_rX86_SP; break;
166    case kArg0: res_reg = rs_rX86_ARG0; break;
167    case kArg1: res_reg = rs_rX86_ARG1; break;
168    case kArg2: res_reg = rs_rX86_ARG2; break;
169    case kArg3: res_reg = rs_rX86_ARG3; break;
170    case kFArg0: res_reg = rs_rX86_FARG0; break;
171    case kFArg1: res_reg = rs_rX86_FARG1; break;
172    case kFArg2: res_reg = rs_rX86_FARG2; break;
173    case kFArg3: res_reg = rs_rX86_FARG3; break;
174    case kRet0: res_reg = rs_rX86_RET0; break;
175    case kRet1: res_reg = rs_rX86_RET1; break;
176    case kInvokeTgt: res_reg = rs_rX86_INVOKE_TGT; break;
177    case kHiddenArg: res_reg = rs_rAX; break;
178    case kHiddenFpArg: res_reg = rs_fr0; break;
179    case kCount: res_reg = rs_rX86_COUNT; break;
180  }
181  return res_reg;
182}
183
184RegStorage X86Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
185  // For the 32-bit internal ABI, the first 3 arguments are passed in registers.
186  // TODO: This is not 64-bit compliant and depends on new internal ABI.
187  switch (arg_num) {
188    case 0:
189      return rs_rX86_ARG1;
190    case 1:
191      return rs_rX86_ARG2;
192    case 2:
193      return rs_rX86_ARG3;
194    default:
195      return RegStorage::InvalidReg();
196  }
197}
198
199/*
200 * Decode the register id.
201 */
202uint64_t X86Mir2Lir::GetRegMaskCommon(RegStorage reg) {
203  uint64_t seed;
204  int shift;
205  int reg_id;
206
207  reg_id = reg.GetRegNum();
208  /* Double registers in x86 are just a single FP register */
209  seed = 1;
210  /* FP register starts at bit position 16 */
211  shift = reg.IsFloat() ? kX86FPReg0 : 0;
212  /* Expand the double register id into single offset */
213  shift += reg_id;
214  return (seed << shift);
215}
216
217uint64_t X86Mir2Lir::GetPCUseDefEncoding() {
218  /*
219   * FIXME: might make sense to use a virtual resource encoding bit for pc.  Might be
220   * able to clean up some of the x86/Arm_Mips differences
221   */
222  LOG(FATAL) << "Unexpected call to GetPCUseDefEncoding for x86";
223  return 0ULL;
224}
225
226void X86Mir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags) {
227  DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64);
228  DCHECK(!lir->flags.use_def_invalid);
229
230  // X86-specific resource map setup here.
231  if (flags & REG_USE_SP) {
232    lir->u.m.use_mask |= ENCODE_X86_REG_SP;
233  }
234
235  if (flags & REG_DEF_SP) {
236    lir->u.m.def_mask |= ENCODE_X86_REG_SP;
237  }
238
239  if (flags & REG_DEFA) {
240    SetupRegMask(&lir->u.m.def_mask, rs_rAX.GetReg());
241  }
242
243  if (flags & REG_DEFD) {
244    SetupRegMask(&lir->u.m.def_mask, rs_rDX.GetReg());
245  }
246  if (flags & REG_USEA) {
247    SetupRegMask(&lir->u.m.use_mask, rs_rAX.GetReg());
248  }
249
250  if (flags & REG_USEC) {
251    SetupRegMask(&lir->u.m.use_mask, rs_rCX.GetReg());
252  }
253
254  if (flags & REG_USED) {
255    SetupRegMask(&lir->u.m.use_mask, rs_rDX.GetReg());
256  }
257
258  if (flags & REG_USEB) {
259    SetupRegMask(&lir->u.m.use_mask, rs_rBX.GetReg());
260  }
261
262  // Fixup hard to describe instruction: Uses rAX, rCX, rDI; sets rDI.
263  if (lir->opcode == kX86RepneScasw) {
264    SetupRegMask(&lir->u.m.use_mask, rs_rAX.GetReg());
265    SetupRegMask(&lir->u.m.use_mask, rs_rCX.GetReg());
266    SetupRegMask(&lir->u.m.use_mask, rs_rDI.GetReg());
267    SetupRegMask(&lir->u.m.def_mask, rs_rDI.GetReg());
268  }
269
270  if (flags & USE_FP_STACK) {
271    lir->u.m.use_mask |= ENCODE_X86_FP_STACK;
272    lir->u.m.def_mask |= ENCODE_X86_FP_STACK;
273  }
274}
275
276/* For dumping instructions */
277static const char* x86RegName[] = {
278  "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi",
279  "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
280};
281
282static const char* x86CondName[] = {
283  "O",
284  "NO",
285  "B/NAE/C",
286  "NB/AE/NC",
287  "Z/EQ",
288  "NZ/NE",
289  "BE/NA",
290  "NBE/A",
291  "S",
292  "NS",
293  "P/PE",
294  "NP/PO",
295  "L/NGE",
296  "NL/GE",
297  "LE/NG",
298  "NLE/G"
299};
300
301/*
302 * Interpret a format string and build a string no longer than size
303 * See format key in Assemble.cc.
304 */
305std::string X86Mir2Lir::BuildInsnString(const char *fmt, LIR *lir, unsigned char* base_addr) {
306  std::string buf;
307  size_t i = 0;
308  size_t fmt_len = strlen(fmt);
309  while (i < fmt_len) {
310    if (fmt[i] != '!') {
311      buf += fmt[i];
312      i++;
313    } else {
314      i++;
315      DCHECK_LT(i, fmt_len);
316      char operand_number_ch = fmt[i];
317      i++;
318      if (operand_number_ch == '!') {
319        buf += "!";
320      } else {
321        int operand_number = operand_number_ch - '0';
322        DCHECK_LT(operand_number, 6);  // Expect upto 6 LIR operands.
323        DCHECK_LT(i, fmt_len);
324        int operand = lir->operands[operand_number];
325        switch (fmt[i]) {
326          case 'c':
327            DCHECK_LT(static_cast<size_t>(operand), sizeof(x86CondName));
328            buf += x86CondName[operand];
329            break;
330          case 'd':
331            buf += StringPrintf("%d", operand);
332            break;
333          case 'p': {
334            EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(operand));
335            buf += StringPrintf("0x%08x", tab_rec->offset);
336            break;
337          }
338          case 'r':
339            if (RegStorage::IsFloat(operand)) {
340              int fp_reg = RegStorage::RegNum(operand);
341              buf += StringPrintf("xmm%d", fp_reg);
342            } else {
343              int reg_num = RegStorage::RegNum(operand);
344              DCHECK_LT(static_cast<size_t>(reg_num), sizeof(x86RegName));
345              buf += x86RegName[reg_num];
346            }
347            break;
348          case 't':
349            buf += StringPrintf("0x%08" PRIxPTR " (L%p)",
350                                reinterpret_cast<uintptr_t>(base_addr) + lir->offset + operand,
351                                lir->target);
352            break;
353          default:
354            buf += StringPrintf("DecodeError '%c'", fmt[i]);
355            break;
356        }
357        i++;
358      }
359    }
360  }
361  return buf;
362}
363
364void X86Mir2Lir::DumpResourceMask(LIR *x86LIR, uint64_t mask, const char *prefix) {
365  char buf[256];
366  buf[0] = 0;
367
368  if (mask == ENCODE_ALL) {
369    strcpy(buf, "all");
370  } else {
371    char num[8];
372    int i;
373
374    for (i = 0; i < kX86RegEnd; i++) {
375      if (mask & (1ULL << i)) {
376        snprintf(num, arraysize(num), "%d ", i);
377        strcat(buf, num);
378      }
379    }
380
381    if (mask & ENCODE_CCODE) {
382      strcat(buf, "cc ");
383    }
384    /* Memory bits */
385    if (x86LIR && (mask & ENCODE_DALVIK_REG)) {
386      snprintf(buf + strlen(buf), arraysize(buf) - strlen(buf), "dr%d%s",
387               DECODE_ALIAS_INFO_REG(x86LIR->flags.alias_info),
388               (DECODE_ALIAS_INFO_WIDE(x86LIR->flags.alias_info)) ? "(+1)" : "");
389    }
390    if (mask & ENCODE_LITERAL) {
391      strcat(buf, "lit ");
392    }
393
394    if (mask & ENCODE_HEAP_REF) {
395      strcat(buf, "heap ");
396    }
397    if (mask & ENCODE_MUST_NOT_ALIAS) {
398      strcat(buf, "noalias ");
399    }
400  }
401  if (buf[0]) {
402    LOG(INFO) << prefix << ": " <<  buf;
403  }
404}
405
406void X86Mir2Lir::AdjustSpillMask() {
407  // Adjustment for LR spilling, x86 has no LR so nothing to do here
408  core_spill_mask_ |= (1 << rs_rRET.GetRegNum());
409  num_core_spills_++;
410}
411
412/*
413 * Mark a callee-save fp register as promoted.  Note that
414 * vpush/vpop uses contiguous register lists so we must
415 * include any holes in the mask.  Associate holes with
416 * Dalvik register INVALID_VREG (0xFFFFU).
417 */
418void X86Mir2Lir::MarkPreservedSingle(int v_reg, RegStorage reg) {
419  UNIMPLEMENTED(FATAL) << "MarkPreservedSingle";
420}
421
422void X86Mir2Lir::MarkPreservedDouble(int v_reg, RegStorage reg) {
423  UNIMPLEMENTED(FATAL) << "MarkPreservedDouble";
424}
425
426RegStorage X86Mir2Lir::AllocateByteRegister() {
427  return AllocTypedTemp(false, kCoreReg);
428}
429
430/* Clobber all regs that might be used by an external C call */
431void X86Mir2Lir::ClobberCallerSave() {
432  Clobber(rs_rAX);
433  Clobber(rs_rCX);
434  Clobber(rs_rDX);
435  Clobber(rs_rBX);
436}
437
438RegLocation X86Mir2Lir::GetReturnWideAlt() {
439  RegLocation res = LocCReturnWide();
440  DCHECK(res.reg.GetLowReg() == rs_rAX.GetReg());
441  DCHECK(res.reg.GetHighReg() == rs_rDX.GetReg());
442  Clobber(rs_rAX);
443  Clobber(rs_rDX);
444  MarkInUse(rs_rAX);
445  MarkInUse(rs_rDX);
446  MarkWide(res.reg);
447  return res;
448}
449
450RegLocation X86Mir2Lir::GetReturnAlt() {
451  RegLocation res = LocCReturn();
452  res.reg.SetReg(rs_rDX.GetReg());
453  Clobber(rs_rDX);
454  MarkInUse(rs_rDX);
455  return res;
456}
457
458/* To be used when explicitly managing register use */
459void X86Mir2Lir::LockCallTemps() {
460  LockTemp(rs_rX86_ARG0);
461  LockTemp(rs_rX86_ARG1);
462  LockTemp(rs_rX86_ARG2);
463  LockTemp(rs_rX86_ARG3);
464}
465
466/* To be used when explicitly managing register use */
467void X86Mir2Lir::FreeCallTemps() {
468  FreeTemp(rs_rX86_ARG0);
469  FreeTemp(rs_rX86_ARG1);
470  FreeTemp(rs_rX86_ARG2);
471  FreeTemp(rs_rX86_ARG3);
472}
473
474bool X86Mir2Lir::ProvidesFullMemoryBarrier(X86OpCode opcode) {
475    switch (opcode) {
476      case kX86LockCmpxchgMR:
477      case kX86LockCmpxchgAR:
478      case kX86LockCmpxchg8bM:
479      case kX86LockCmpxchg8bA:
480      case kX86XchgMR:
481      case kX86Mfence:
482        // Atomic memory instructions provide full barrier.
483        return true;
484      default:
485        break;
486    }
487
488    // Conservative if cannot prove it provides full barrier.
489    return false;
490}
491
492bool X86Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
493#if ANDROID_SMP != 0
494  // Start off with using the last LIR as the barrier. If it is not enough, then we will update it.
495  LIR* mem_barrier = last_lir_insn_;
496
497  bool ret = false;
498  /*
499   * According to the JSR-133 Cookbook, for x86 only StoreLoad barriers need memory fence. All other barriers
500   * (LoadLoad, LoadStore, StoreStore) are nops due to the x86 memory model. For those cases, all we need
501   * to ensure is that there is a scheduling barrier in place.
502   */
503  if (barrier_kind == kStoreLoad) {
504    // If no LIR exists already that can be used a barrier, then generate an mfence.
505    if (mem_barrier == nullptr) {
506      mem_barrier = NewLIR0(kX86Mfence);
507      ret = true;
508    }
509
510    // If last instruction does not provide full barrier, then insert an mfence.
511    if (ProvidesFullMemoryBarrier(static_cast<X86OpCode>(mem_barrier->opcode)) == false) {
512      mem_barrier = NewLIR0(kX86Mfence);
513      ret = true;
514    }
515  }
516
517  // Now ensure that a scheduling barrier is in place.
518  if (mem_barrier == nullptr) {
519    GenBarrier();
520  } else {
521    // Mark as a scheduling barrier.
522    DCHECK(!mem_barrier->flags.use_def_invalid);
523    mem_barrier->u.m.def_mask = ENCODE_ALL;
524  }
525  return ret;
526#else
527  return false;
528#endif
529}
530
531// Alloc a pair of core registers, or a double.
532RegStorage X86Mir2Lir::AllocTypedTempWide(bool fp_hint, int reg_class) {
533  if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) {
534    return AllocTempDouble();
535  }
536  RegStorage low_reg = AllocTemp();
537  RegStorage high_reg = AllocTemp();
538  return RegStorage::MakeRegPair(low_reg, high_reg);
539}
540
541RegStorage X86Mir2Lir::AllocTypedTemp(bool fp_hint, int reg_class) {
542  if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) {
543    return AllocTempSingle();
544  }
545  return AllocTemp();
546}
547
548void X86Mir2Lir::CompilerInitializeRegAlloc() {
549  if (Gen64Bit()) {
550    reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs_64, sp_regs_64, dp_regs_64, reserved_regs_64,
551                                        core_temps_64, sp_temps_64, dp_temps_64);
552  } else {
553    reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs_32, sp_regs_32, dp_regs_32, reserved_regs_32,
554                                        core_temps_32, sp_temps_32, dp_temps_32);
555  }
556
557  // Target-specific adjustments.
558
559  // Alias single precision xmm to double xmms.
560  // TODO: as needed, add larger vector sizes - alias all to the largest.
561  GrowableArray<RegisterInfo*>::Iterator it(&reg_pool_->sp_regs_);
562  for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
563    int sp_reg_num = info->GetReg().GetRegNum();
564    RegStorage dp_reg = RegStorage::Solo64(RegStorage::kFloatingPoint | sp_reg_num);
565    RegisterInfo* dp_reg_info = GetRegInfo(dp_reg);
566    // 64-bit xmm vector register's master storage should refer to itself.
567    DCHECK_EQ(dp_reg_info, dp_reg_info->Master());
568    // Redirect 32-bit vector's master storage to 64-bit vector.
569    info->SetMaster(dp_reg_info);
570  }
571
572  // Don't start allocating temps at r0/s0/d0 or you may clobber return regs in early-exit methods.
573  // TODO: adjust for x86/hard float calling convention.
574  reg_pool_->next_core_reg_ = 2;
575  reg_pool_->next_sp_reg_ = 2;
576  reg_pool_->next_dp_reg_ = 1;
577}
578
579void X86Mir2Lir::SpillCoreRegs() {
580  if (num_core_spills_ == 0) {
581    return;
582  }
583  // Spill mask not including fake return address register
584  uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum());
585  int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_);
586  for (int reg = 0; mask; mask >>= 1, reg++) {
587    if (mask & 0x1) {
588      StoreWordDisp(rs_rX86_SP, offset, RegStorage::Solo32(reg));
589      offset += GetInstructionSetPointerSize(cu_->instruction_set);
590    }
591  }
592}
593
594void X86Mir2Lir::UnSpillCoreRegs() {
595  if (num_core_spills_ == 0) {
596    return;
597  }
598  // Spill mask not including fake return address register
599  uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum());
600  int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_);
601  for (int reg = 0; mask; mask >>= 1, reg++) {
602    if (mask & 0x1) {
603      LoadWordDisp(rs_rX86_SP, offset, RegStorage::Solo32(reg));
604      offset += GetInstructionSetPointerSize(cu_->instruction_set);
605    }
606  }
607}
608
609bool X86Mir2Lir::IsUnconditionalBranch(LIR* lir) {
610  return (lir->opcode == kX86Jmp8 || lir->opcode == kX86Jmp32);
611}
612
613bool X86Mir2Lir::SupportsVolatileLoadStore(OpSize size) {
614  return true;
615}
616
617RegisterClass X86Mir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volatile) {
618  if (UNLIKELY(is_volatile)) {
619    // On x86, atomic 64-bit load/store requires an fp register.
620    // Smaller aligned load/store is atomic for both core and fp registers.
621    if (size == k64 || size == kDouble) {
622      return kFPReg;
623    }
624  }
625  return RegClassBySize(size);
626}
627
628X86Mir2Lir::X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena, bool gen64bit)
629    : Mir2Lir(cu, mir_graph, arena),
630      base_of_code_(nullptr), store_method_addr_(false), store_method_addr_used_(false),
631      method_address_insns_(arena, 100, kGrowableArrayMisc),
632      class_type_address_insns_(arena, 100, kGrowableArrayMisc),
633      call_method_insns_(arena, 100, kGrowableArrayMisc),
634      stack_decrement_(nullptr), stack_increment_(nullptr), gen64bit_(gen64bit),
635      const_vectors_(nullptr) {
636  store_method_addr_used_ = false;
637  if (kIsDebugBuild) {
638    for (int i = 0; i < kX86Last; i++) {
639      if (X86Mir2Lir::EncodingMap[i].opcode != i) {
640        LOG(FATAL) << "Encoding order for " << X86Mir2Lir::EncodingMap[i].name
641                   << " is wrong: expecting " << i << ", seeing "
642                   << static_cast<int>(X86Mir2Lir::EncodingMap[i].opcode);
643      }
644    }
645  }
646  if (Gen64Bit()) {
647    rs_rX86_SP = rs_rX86_SP_64;
648
649    rs_rX86_ARG0 = rs_rDI;
650    rs_rX86_ARG1 = rs_rSI;
651    rs_rX86_ARG2 = rs_rDX;
652    rs_rX86_ARG3 = rs_rCX;
653    rX86_ARG0 = rDI;
654    rX86_ARG1 = rSI;
655    rX86_ARG2 = rDX;
656    rX86_ARG3 = rCX;
657    // TODO: ARG4(r8), ARG5(r9), floating point args.
658  } else {
659    rs_rX86_SP = rs_rX86_SP_32;
660
661    rs_rX86_ARG0 = rs_rAX;
662    rs_rX86_ARG1 = rs_rCX;
663    rs_rX86_ARG2 = rs_rDX;
664    rs_rX86_ARG3 = rs_rBX;
665    rX86_ARG0 = rAX;
666    rX86_ARG1 = rCX;
667    rX86_ARG2 = rDX;
668    rX86_ARG3 = rBX;
669  }
670  rs_rX86_FARG0 = rs_rAX;
671  rs_rX86_FARG1 = rs_rCX;
672  rs_rX86_FARG2 = rs_rDX;
673  rs_rX86_FARG3 = rs_rBX;
674  rs_rX86_RET0 = rs_rAX;
675  rs_rX86_RET1 = rs_rDX;
676  rs_rX86_INVOKE_TGT = rs_rAX;
677  rs_rX86_COUNT = rs_rCX;
678  rX86_FARG0 = rAX;
679  rX86_FARG1 = rCX;
680  rX86_FARG2 = rDX;
681  rX86_FARG3 = rBX;
682  rX86_RET0 = rAX;
683  rX86_RET1 = rDX;
684  rX86_INVOKE_TGT = rAX;
685  rX86_COUNT = rCX;
686}
687
688Mir2Lir* X86CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
689                          ArenaAllocator* const arena) {
690  return new X86Mir2Lir(cu, mir_graph, arena, false);
691}
692
693Mir2Lir* X86_64CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
694                          ArenaAllocator* const arena) {
695  return new X86Mir2Lir(cu, mir_graph, arena, true);
696}
697
698// Not used in x86
699RegStorage X86Mir2Lir::LoadHelper(ThreadOffset<4> offset) {
700  LOG(FATAL) << "Unexpected use of LoadHelper in x86";
701  return RegStorage::InvalidReg();
702}
703
704// Not used in x86
705RegStorage X86Mir2Lir::LoadHelper(ThreadOffset<8> offset) {
706  LOG(FATAL) << "Unexpected use of LoadHelper in x86";
707  return RegStorage::InvalidReg();
708}
709
710LIR* X86Mir2Lir::CheckSuspendUsingLoad() {
711  LOG(FATAL) << "Unexpected use of CheckSuspendUsingLoad in x86";
712  return nullptr;
713}
714
715uint64_t X86Mir2Lir::GetTargetInstFlags(int opcode) {
716  DCHECK(!IsPseudoLirOp(opcode));
717  return X86Mir2Lir::EncodingMap[opcode].flags;
718}
719
720const char* X86Mir2Lir::GetTargetInstName(int opcode) {
721  DCHECK(!IsPseudoLirOp(opcode));
722  return X86Mir2Lir::EncodingMap[opcode].name;
723}
724
725const char* X86Mir2Lir::GetTargetInstFmt(int opcode) {
726  DCHECK(!IsPseudoLirOp(opcode));
727  return X86Mir2Lir::EncodingMap[opcode].fmt;
728}
729
730void X86Mir2Lir::GenConstWide(RegLocation rl_dest, int64_t value) {
731  // Can we do this directly to memory?
732  rl_dest = UpdateLocWide(rl_dest);
733  if ((rl_dest.location == kLocDalvikFrame) ||
734      (rl_dest.location == kLocCompilerTemp)) {
735    int32_t val_lo = Low32Bits(value);
736    int32_t val_hi = High32Bits(value);
737    int r_base = TargetReg(kSp).GetReg();
738    int displacement = SRegOffset(rl_dest.s_reg_low);
739
740    LIR * store = NewLIR3(kX86Mov32MI, r_base, displacement + LOWORD_OFFSET, val_lo);
741    AnnotateDalvikRegAccess(store, (displacement + LOWORD_OFFSET) >> 2,
742                              false /* is_load */, true /* is64bit */);
743    store = NewLIR3(kX86Mov32MI, r_base, displacement + HIWORD_OFFSET, val_hi);
744    AnnotateDalvikRegAccess(store, (displacement + HIWORD_OFFSET) >> 2,
745                              false /* is_load */, true /* is64bit */);
746    return;
747  }
748
749  // Just use the standard code to do the generation.
750  Mir2Lir::GenConstWide(rl_dest, value);
751}
752
753// TODO: Merge with existing RegLocation dumper in vreg_analysis.cc
754void X86Mir2Lir::DumpRegLocation(RegLocation loc) {
755  LOG(INFO)  << "location: " << loc.location << ','
756             << (loc.wide ? " w" : "  ")
757             << (loc.defined ? " D" : "  ")
758             << (loc.is_const ? " c" : "  ")
759             << (loc.fp ? " F" : "  ")
760             << (loc.core ? " C" : "  ")
761             << (loc.ref ? " r" : "  ")
762             << (loc.high_word ? " h" : "  ")
763             << (loc.home ? " H" : "  ")
764             << ", low: " << static_cast<int>(loc.reg.GetLowReg())
765             << ", high: " << static_cast<int>(loc.reg.GetHighReg())
766             << ", s_reg: " << loc.s_reg_low
767             << ", orig: " << loc.orig_sreg;
768}
769
770void X86Mir2Lir::Materialize() {
771  // A good place to put the analysis before starting.
772  AnalyzeMIR();
773
774  // Now continue with regular code generation.
775  Mir2Lir::Materialize();
776}
777
778void X86Mir2Lir::LoadMethodAddress(const MethodReference& target_method, InvokeType type,
779                                   SpecialTargetRegister symbolic_reg) {
780  /*
781   * For x86, just generate a 32 bit move immediate instruction, that will be filled
782   * in at 'link time'.  For now, put a unique value based on target to ensure that
783   * code deduplication works.
784   */
785  int target_method_idx = target_method.dex_method_index;
786  const DexFile* target_dex_file = target_method.dex_file;
787  const DexFile::MethodId& target_method_id = target_dex_file->GetMethodId(target_method_idx);
788  uintptr_t target_method_id_ptr = reinterpret_cast<uintptr_t>(&target_method_id);
789
790  // Generate the move instruction with the unique pointer and save index, dex_file, and type.
791  LIR *move = RawLIR(current_dalvik_offset_, kX86Mov32RI, TargetReg(symbolic_reg).GetReg(),
792                     static_cast<int>(target_method_id_ptr), target_method_idx,
793                     WrapPointer(const_cast<DexFile*>(target_dex_file)), type);
794  AppendLIR(move);
795  method_address_insns_.Insert(move);
796}
797
798void X86Mir2Lir::LoadClassType(uint32_t type_idx, SpecialTargetRegister symbolic_reg) {
799  /*
800   * For x86, just generate a 32 bit move immediate instruction, that will be filled
801   * in at 'link time'.  For now, put a unique value based on target to ensure that
802   * code deduplication works.
803   */
804  const DexFile::TypeId& id = cu_->dex_file->GetTypeId(type_idx);
805  uintptr_t ptr = reinterpret_cast<uintptr_t>(&id);
806
807  // Generate the move instruction with the unique pointer and save index and type.
808  LIR *move = RawLIR(current_dalvik_offset_, kX86Mov32RI, TargetReg(symbolic_reg).GetReg(),
809                     static_cast<int>(ptr), type_idx);
810  AppendLIR(move);
811  class_type_address_insns_.Insert(move);
812}
813
814LIR *X86Mir2Lir::CallWithLinkerFixup(const MethodReference& target_method, InvokeType type) {
815  /*
816   * For x86, just generate a 32 bit call relative instruction, that will be filled
817   * in at 'link time'.  For now, put a unique value based on target to ensure that
818   * code deduplication works.
819   */
820  int target_method_idx = target_method.dex_method_index;
821  const DexFile* target_dex_file = target_method.dex_file;
822  const DexFile::MethodId& target_method_id = target_dex_file->GetMethodId(target_method_idx);
823  uintptr_t target_method_id_ptr = reinterpret_cast<uintptr_t>(&target_method_id);
824
825  // Generate the call instruction with the unique pointer and save index, dex_file, and type.
826  LIR *call = RawLIR(current_dalvik_offset_, kX86CallI, static_cast<int>(target_method_id_ptr),
827                     target_method_idx, WrapPointer(const_cast<DexFile*>(target_dex_file)), type);
828  AppendLIR(call);
829  call_method_insns_.Insert(call);
830  return call;
831}
832
833/*
834 * @brief Enter a 32 bit quantity into a buffer
835 * @param buf buffer.
836 * @param data Data value.
837 */
838
839static void PushWord(std::vector<uint8_t>&buf, int32_t data) {
840  buf.push_back(data & 0xff);
841  buf.push_back((data >> 8) & 0xff);
842  buf.push_back((data >> 16) & 0xff);
843  buf.push_back((data >> 24) & 0xff);
844}
845
846void X86Mir2Lir::InstallLiteralPools() {
847  // These are handled differently for x86.
848  DCHECK(code_literal_list_ == nullptr);
849  DCHECK(method_literal_list_ == nullptr);
850  DCHECK(class_literal_list_ == nullptr);
851
852  // Align to 16 byte boundary.  We have implicit knowledge that the start of the method is
853  // on a 4 byte boundary.   How can I check this if it changes (other than aligned loads
854  // will fail at runtime)?
855  if (const_vectors_ != nullptr) {
856    int align_size = (16-4) - (code_buffer_.size() & 0xF);
857    if (align_size < 0) {
858      align_size += 16;
859    }
860
861    while (align_size > 0) {
862      code_buffer_.push_back(0);
863      align_size--;
864    }
865    for (LIR *p = const_vectors_; p != nullptr; p = p->next) {
866      PushWord(code_buffer_, p->operands[0]);
867      PushWord(code_buffer_, p->operands[1]);
868      PushWord(code_buffer_, p->operands[2]);
869      PushWord(code_buffer_, p->operands[3]);
870    }
871  }
872
873  // Handle the fixups for methods.
874  for (uint32_t i = 0; i < method_address_insns_.Size(); i++) {
875      LIR* p = method_address_insns_.Get(i);
876      DCHECK_EQ(p->opcode, kX86Mov32RI);
877      uint32_t target_method_idx = p->operands[2];
878      const DexFile* target_dex_file =
879          reinterpret_cast<const DexFile*>(UnwrapPointer(p->operands[3]));
880
881      // The offset to patch is the last 4 bytes of the instruction.
882      int patch_offset = p->offset + p->flags.size - 4;
883      cu_->compiler_driver->AddMethodPatch(cu_->dex_file, cu_->class_def_idx,
884                                           cu_->method_idx, cu_->invoke_type,
885                                           target_method_idx, target_dex_file,
886                                           static_cast<InvokeType>(p->operands[4]),
887                                           patch_offset);
888  }
889
890  // Handle the fixups for class types.
891  for (uint32_t i = 0; i < class_type_address_insns_.Size(); i++) {
892      LIR* p = class_type_address_insns_.Get(i);
893      DCHECK_EQ(p->opcode, kX86Mov32RI);
894      uint32_t target_method_idx = p->operands[2];
895
896      // The offset to patch is the last 4 bytes of the instruction.
897      int patch_offset = p->offset + p->flags.size - 4;
898      cu_->compiler_driver->AddClassPatch(cu_->dex_file, cu_->class_def_idx,
899                                          cu_->method_idx, target_method_idx, patch_offset);
900  }
901
902  // And now the PC-relative calls to methods.
903  for (uint32_t i = 0; i < call_method_insns_.Size(); i++) {
904      LIR* p = call_method_insns_.Get(i);
905      DCHECK_EQ(p->opcode, kX86CallI);
906      uint32_t target_method_idx = p->operands[1];
907      const DexFile* target_dex_file =
908          reinterpret_cast<const DexFile*>(UnwrapPointer(p->operands[2]));
909
910      // The offset to patch is the last 4 bytes of the instruction.
911      int patch_offset = p->offset + p->flags.size - 4;
912      cu_->compiler_driver->AddRelativeCodePatch(cu_->dex_file, cu_->class_def_idx,
913                                                 cu_->method_idx, cu_->invoke_type,
914                                                 target_method_idx, target_dex_file,
915                                                 static_cast<InvokeType>(p->operands[3]),
916                                                 patch_offset, -4 /* offset */);
917  }
918
919  // And do the normal processing.
920  Mir2Lir::InstallLiteralPools();
921}
922
923/*
924 * Fast string.index_of(I) & (II).  Inline check for simple case of char <= 0xffff,
925 * otherwise bails to standard library code.
926 */
927bool X86Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) {
928  ClobberCallerSave();
929  LockCallTemps();  // Using fixed registers
930
931  // EAX: 16 bit character being searched.
932  // ECX: count: number of words to be searched.
933  // EDI: String being searched.
934  // EDX: temporary during execution.
935  // EBX: temporary during execution.
936
937  RegLocation rl_obj = info->args[0];
938  RegLocation rl_char = info->args[1];
939  RegLocation rl_start;  // Note: only present in III flavor or IndexOf.
940
941  uint32_t char_value =
942    rl_char.is_const ? mir_graph_->ConstantValue(rl_char.orig_sreg) : 0;
943
944  if (char_value > 0xFFFF) {
945    // We have to punt to the real String.indexOf.
946    return false;
947  }
948
949  // Okay, we are commited to inlining this.
950  RegLocation rl_return = GetReturn(false);
951  RegLocation rl_dest = InlineTarget(info);
952
953  // Is the string non-NULL?
954  LoadValueDirectFixed(rl_obj, rs_rDX);
955  GenNullCheck(rs_rDX, info->opt_flags);
956  info->opt_flags |= MIR_IGNORE_NULL_CHECK;  // Record that we've null checked.
957
958  // Does the character fit in 16 bits?
959  LIR* slowpath_branch = nullptr;
960  if (rl_char.is_const) {
961    // We need the value in EAX.
962    LoadConstantNoClobber(rs_rAX, char_value);
963  } else {
964    // Character is not a constant; compare at runtime.
965    LoadValueDirectFixed(rl_char, rs_rAX);
966    slowpath_branch = OpCmpImmBranch(kCondGt, rs_rAX, 0xFFFF, nullptr);
967  }
968
969  // From here down, we know that we are looking for a char that fits in 16 bits.
970  // Location of reference to data array within the String object.
971  int value_offset = mirror::String::ValueOffset().Int32Value();
972  // Location of count within the String object.
973  int count_offset = mirror::String::CountOffset().Int32Value();
974  // Starting offset within data array.
975  int offset_offset = mirror::String::OffsetOffset().Int32Value();
976  // Start of char data with array_.
977  int data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value();
978
979  // Character is in EAX.
980  // Object pointer is in EDX.
981
982  // We need to preserve EDI, but have no spare registers, so push it on the stack.
983  // We have to remember that all stack addresses after this are offset by sizeof(EDI).
984  NewLIR1(kX86Push32R, rs_rDI.GetReg());
985
986  // Compute the number of words to search in to rCX.
987  Load32Disp(rs_rDX, count_offset, rs_rCX);
988  LIR *length_compare = nullptr;
989  int start_value = 0;
990  bool is_index_on_stack = false;
991  if (zero_based) {
992    // We have to handle an empty string.  Use special instruction JECXZ.
993    length_compare = NewLIR0(kX86Jecxz8);
994  } else {
995    rl_start = info->args[2];
996    // We have to offset by the start index.
997    if (rl_start.is_const) {
998      start_value = mir_graph_->ConstantValue(rl_start.orig_sreg);
999      start_value = std::max(start_value, 0);
1000
1001      // Is the start > count?
1002      length_compare = OpCmpImmBranch(kCondLe, rs_rCX, start_value, nullptr);
1003
1004      if (start_value != 0) {
1005        OpRegImm(kOpSub, rs_rCX, start_value);
1006      }
1007    } else {
1008      // Runtime start index.
1009      rl_start = UpdateLocTyped(rl_start, kCoreReg);
1010      if (rl_start.location == kLocPhysReg) {
1011        // Handle "start index < 0" case.
1012        OpRegReg(kOpXor, rs_rBX, rs_rBX);
1013        OpRegReg(kOpCmp, rl_start.reg, rs_rBX);
1014        OpCondRegReg(kOpCmov, kCondLt, rl_start.reg, rs_rBX);
1015
1016        // The length of the string should be greater than the start index.
1017        length_compare = OpCmpBranch(kCondLe, rs_rCX, rl_start.reg, nullptr);
1018        OpRegReg(kOpSub, rs_rCX, rl_start.reg);
1019        if (rl_start.reg == rs_rDI) {
1020          // The special case. We will use EDI further, so lets put start index to stack.
1021          NewLIR1(kX86Push32R, rs_rDI.GetReg());
1022          is_index_on_stack = true;
1023        }
1024      } else {
1025        // Load the start index from stack, remembering that we pushed EDI.
1026        int displacement = SRegOffset(rl_start.s_reg_low) + sizeof(uint32_t);
1027        Load32Disp(rs_rX86_SP, displacement, rs_rBX);
1028        OpRegReg(kOpXor, rs_rDI, rs_rDI);
1029        OpRegReg(kOpCmp, rs_rBX, rs_rDI);
1030        OpCondRegReg(kOpCmov, kCondLt, rs_rBX, rs_rDI);
1031
1032        length_compare = OpCmpBranch(kCondLe, rs_rCX, rs_rBX, nullptr);
1033        OpRegReg(kOpSub, rs_rCX, rs_rBX);
1034        // Put the start index to stack.
1035        NewLIR1(kX86Push32R, rs_rBX.GetReg());
1036        is_index_on_stack = true;
1037      }
1038    }
1039  }
1040  DCHECK(length_compare != nullptr);
1041
1042  // ECX now contains the count in words to be searched.
1043
1044  // Load the address of the string into EBX.
1045  // The string starts at VALUE(String) + 2 * OFFSET(String) + DATA_OFFSET.
1046  Load32Disp(rs_rDX, value_offset, rs_rDI);
1047  Load32Disp(rs_rDX, offset_offset, rs_rBX);
1048  OpLea(rs_rBX, rs_rDI, rs_rBX, 1, data_offset);
1049
1050  // Now compute into EDI where the search will start.
1051  if (zero_based || rl_start.is_const) {
1052    if (start_value == 0) {
1053      OpRegCopy(rs_rDI, rs_rBX);
1054    } else {
1055      NewLIR3(kX86Lea32RM, rs_rDI.GetReg(), rs_rBX.GetReg(), 2 * start_value);
1056    }
1057  } else {
1058    if (is_index_on_stack == true) {
1059      // Load the start index from stack.
1060      NewLIR1(kX86Pop32R, rs_rDX.GetReg());
1061      OpLea(rs_rDI, rs_rBX, rs_rDX, 1, 0);
1062    } else {
1063      OpLea(rs_rDI, rs_rBX, rl_start.reg, 1, 0);
1064    }
1065  }
1066
1067  // EDI now contains the start of the string to be searched.
1068  // We are all prepared to do the search for the character.
1069  NewLIR0(kX86RepneScasw);
1070
1071  // Did we find a match?
1072  LIR* failed_branch = OpCondBranch(kCondNe, nullptr);
1073
1074  // yes, we matched.  Compute the index of the result.
1075  // index = ((curr_ptr - orig_ptr) / 2) - 1.
1076  OpRegReg(kOpSub, rs_rDI, rs_rBX);
1077  OpRegImm(kOpAsr, rs_rDI, 1);
1078  NewLIR3(kX86Lea32RM, rl_return.reg.GetReg(), rs_rDI.GetReg(), -1);
1079  LIR *all_done = NewLIR1(kX86Jmp8, 0);
1080
1081  // Failed to match; return -1.
1082  LIR *not_found = NewLIR0(kPseudoTargetLabel);
1083  length_compare->target = not_found;
1084  failed_branch->target = not_found;
1085  LoadConstantNoClobber(rl_return.reg, -1);
1086
1087  // And join up at the end.
1088  all_done->target = NewLIR0(kPseudoTargetLabel);
1089  // Restore EDI from the stack.
1090  NewLIR1(kX86Pop32R, rs_rDI.GetReg());
1091
1092  // Out of line code returns here.
1093  if (slowpath_branch != nullptr) {
1094    LIR *return_point = NewLIR0(kPseudoTargetLabel);
1095    AddIntrinsicSlowPath(info, slowpath_branch, return_point);
1096  }
1097
1098  StoreValue(rl_dest, rl_return);
1099  return true;
1100}
1101
1102/*
1103 * @brief Enter an 'advance LOC' into the FDE buffer
1104 * @param buf FDE buffer.
1105 * @param increment Amount by which to increase the current location.
1106 */
1107static void AdvanceLoc(std::vector<uint8_t>&buf, uint32_t increment) {
1108  if (increment < 64) {
1109    // Encoding in opcode.
1110    buf.push_back(0x1 << 6 | increment);
1111  } else if (increment < 256) {
1112    // Single byte delta.
1113    buf.push_back(0x02);
1114    buf.push_back(increment);
1115  } else if (increment < 256 * 256) {
1116    // Two byte delta.
1117    buf.push_back(0x03);
1118    buf.push_back(increment & 0xff);
1119    buf.push_back((increment >> 8) & 0xff);
1120  } else {
1121    // Four byte delta.
1122    buf.push_back(0x04);
1123    PushWord(buf, increment);
1124  }
1125}
1126
1127
1128std::vector<uint8_t>* X86CFIInitialization() {
1129  return X86Mir2Lir::ReturnCommonCallFrameInformation();
1130}
1131
1132std::vector<uint8_t>* X86Mir2Lir::ReturnCommonCallFrameInformation() {
1133  std::vector<uint8_t>*cfi_info = new std::vector<uint8_t>;
1134
1135  // Length of the CIE (except for this field).
1136  PushWord(*cfi_info, 16);
1137
1138  // CIE id.
1139  PushWord(*cfi_info, 0xFFFFFFFFU);
1140
1141  // Version: 3.
1142  cfi_info->push_back(0x03);
1143
1144  // Augmentation: empty string.
1145  cfi_info->push_back(0x0);
1146
1147  // Code alignment: 1.
1148  cfi_info->push_back(0x01);
1149
1150  // Data alignment: -4.
1151  cfi_info->push_back(0x7C);
1152
1153  // Return address register (R8).
1154  cfi_info->push_back(0x08);
1155
1156  // Initial return PC is 4(ESP): DW_CFA_def_cfa R4 4.
1157  cfi_info->push_back(0x0C);
1158  cfi_info->push_back(0x04);
1159  cfi_info->push_back(0x04);
1160
1161  // Return address location: 0(SP): DW_CFA_offset R8 1 (* -4);.
1162  cfi_info->push_back(0x2 << 6 | 0x08);
1163  cfi_info->push_back(0x01);
1164
1165  // And 2 Noops to align to 4 byte boundary.
1166  cfi_info->push_back(0x0);
1167  cfi_info->push_back(0x0);
1168
1169  DCHECK_EQ(cfi_info->size() & 3, 0U);
1170  return cfi_info;
1171}
1172
1173static void EncodeUnsignedLeb128(std::vector<uint8_t>& buf, uint32_t value) {
1174  uint8_t buffer[12];
1175  uint8_t *ptr = EncodeUnsignedLeb128(buffer, value);
1176  for (uint8_t *p = buffer; p < ptr; p++) {
1177    buf.push_back(*p);
1178  }
1179}
1180
1181std::vector<uint8_t>* X86Mir2Lir::ReturnCallFrameInformation() {
1182  std::vector<uint8_t>*cfi_info = new std::vector<uint8_t>;
1183
1184  // Generate the FDE for the method.
1185  DCHECK_NE(data_offset_, 0U);
1186
1187  // Length (will be filled in later in this routine).
1188  PushWord(*cfi_info, 0);
1189
1190  // CIE_pointer (can be filled in by linker); might be left at 0 if there is only
1191  // one CIE for the whole debug_frame section.
1192  PushWord(*cfi_info, 0);
1193
1194  // 'initial_location' (filled in by linker).
1195  PushWord(*cfi_info, 0);
1196
1197  // 'address_range' (number of bytes in the method).
1198  PushWord(*cfi_info, data_offset_);
1199
1200  // The instructions in the FDE.
1201  if (stack_decrement_ != nullptr) {
1202    // Advance LOC to just past the stack decrement.
1203    uint32_t pc = NEXT_LIR(stack_decrement_)->offset;
1204    AdvanceLoc(*cfi_info, pc);
1205
1206    // Now update the offset to the call frame: DW_CFA_def_cfa_offset frame_size.
1207    cfi_info->push_back(0x0e);
1208    EncodeUnsignedLeb128(*cfi_info, frame_size_);
1209
1210    // We continue with that stack until the epilogue.
1211    if (stack_increment_ != nullptr) {
1212      uint32_t new_pc = NEXT_LIR(stack_increment_)->offset;
1213      AdvanceLoc(*cfi_info, new_pc - pc);
1214
1215      // We probably have code snippets after the epilogue, so save the
1216      // current state: DW_CFA_remember_state.
1217      cfi_info->push_back(0x0a);
1218
1219      // We have now popped the stack: DW_CFA_def_cfa_offset 4.  There is only the return
1220      // PC on the stack now.
1221      cfi_info->push_back(0x0e);
1222      EncodeUnsignedLeb128(*cfi_info, 4);
1223
1224      // Everything after that is the same as before the epilogue.
1225      // Stack bump was followed by RET instruction.
1226      LIR *post_ret_insn = NEXT_LIR(NEXT_LIR(stack_increment_));
1227      if (post_ret_insn != nullptr) {
1228        pc = new_pc;
1229        new_pc = post_ret_insn->offset;
1230        AdvanceLoc(*cfi_info, new_pc - pc);
1231        // Restore the state: DW_CFA_restore_state.
1232        cfi_info->push_back(0x0b);
1233      }
1234    }
1235  }
1236
1237  // Padding to a multiple of 4
1238  while ((cfi_info->size() & 3) != 0) {
1239    // DW_CFA_nop is encoded as 0.
1240    cfi_info->push_back(0);
1241  }
1242
1243  // Set the length of the FDE inside the generated bytes.
1244  uint32_t length = cfi_info->size() - 4;
1245  (*cfi_info)[0] = length;
1246  (*cfi_info)[1] = length >> 8;
1247  (*cfi_info)[2] = length >> 16;
1248  (*cfi_info)[3] = length >> 24;
1249  return cfi_info;
1250}
1251
1252void X86Mir2Lir::GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir) {
1253  switch (static_cast<ExtendedMIROpcode>(mir->dalvikInsn.opcode)) {
1254    case kMirOpConstVector:
1255      GenConst128(bb, mir);
1256      break;
1257    default:
1258      break;
1259  }
1260}
1261
1262void X86Mir2Lir::GenConst128(BasicBlock* bb, MIR* mir) {
1263  int type_size = mir->dalvikInsn.vA;
1264  // We support 128 bit vectors.
1265  DCHECK_EQ(type_size & 0xFFFF, 128);
1266  int reg = mir->dalvikInsn.vB;
1267  DCHECK_LT(reg, 8);
1268  uint32_t *args = mir->dalvikInsn.arg;
1269  // Check for all 0 case.
1270  if (args[0] == 0 && args[1] == 0 && args[2] == 0 && args[3] == 0) {
1271    NewLIR2(kX86XorpsRR, reg, reg);
1272    return;
1273  }
1274  // Okay, load it from the constant vector area.
1275  LIR *data_target = ScanVectorLiteral(mir);
1276  if (data_target == nullptr) {
1277    data_target = AddVectorLiteral(mir);
1278  }
1279
1280  // Address the start of the method.
1281  RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
1282  rl_method = LoadValue(rl_method, kCoreReg);
1283
1284  // Load the proper value from the literal area.
1285  // We don't know the proper offset for the value, so pick one that will force
1286  // 4 byte offset.  We will fix this up in the assembler later to have the right
1287  // value.
1288  LIR *load = NewLIR3(kX86Mova128RM, reg, rl_method.reg.GetReg(),  256 /* bogus */);
1289  load->flags.fixup = kFixupLoad;
1290  load->target = data_target;
1291  SetMemRefType(load, true, kLiteral);
1292}
1293
1294LIR *X86Mir2Lir::ScanVectorLiteral(MIR *mir) {
1295  int *args = reinterpret_cast<int*>(mir->dalvikInsn.arg);
1296  for (LIR *p = const_vectors_; p != nullptr; p = p->next) {
1297    if (args[0] == p->operands[0] && args[1] == p->operands[1] &&
1298        args[2] == p->operands[2] && args[3] == p->operands[3]) {
1299      return p;
1300    }
1301  }
1302  return nullptr;
1303}
1304
1305LIR *X86Mir2Lir::AddVectorLiteral(MIR *mir) {
1306  LIR* new_value = static_cast<LIR*>(arena_->Alloc(sizeof(LIR), kArenaAllocData));
1307  int *args = reinterpret_cast<int*>(mir->dalvikInsn.arg);
1308  new_value->operands[0] = args[0];
1309  new_value->operands[1] = args[1];
1310  new_value->operands[2] = args[2];
1311  new_value->operands[3] = args[3];
1312  new_value->next = const_vectors_;
1313  if (const_vectors_ == nullptr) {
1314    estimated_native_code_size_ += 12;  // Amount needed to align to 16 byte boundary.
1315  }
1316  estimated_native_code_size_ += 16;  // Space for one vector.
1317  const_vectors_ = new_value;
1318  return new_value;
1319}
1320
1321}  // namespace art
1322