target_x86.cc revision d65c51a556e6649db4e18bd083c8fec37607a442
1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <string>
18#include <inttypes.h>
19
20#include "codegen_x86.h"
21#include "dex/compiler_internals.h"
22#include "dex/quick/mir_to_lir-inl.h"
23#include "mirror/array.h"
24#include "mirror/string.h"
25#include "x86_lir.h"
26
27namespace art {
28
29static const RegStorage core_regs_arr_32[] = {
30    rs_rAX, rs_rCX, rs_rDX, rs_rBX, rs_rX86_SP_32, rs_rBP, rs_rSI, rs_rDI,
31};
32static const RegStorage core_regs_arr_64[] = {
33    rs_rAX, rs_rCX, rs_rDX, rs_rBX, rs_rX86_SP_64, rs_rBP, rs_rSI, rs_rDI,
34#ifdef TARGET_REX_SUPPORT
35    rs_r8, rs_r9, rs_r10, rs_r11, rs_r12, rs_r13, rs_r14, rs_r15
36#endif
37};
38static const RegStorage sp_regs_arr_32[] = {
39    rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
40};
41static const RegStorage sp_regs_arr_64[] = {
42    rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
43#ifdef TARGET_REX_SUPPORT
44    rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15
45#endif
46};
47static const RegStorage dp_regs_arr_32[] = {
48    rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
49};
50static const RegStorage dp_regs_arr_64[] = {
51    rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
52#ifdef TARGET_REX_SUPPORT
53    rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15
54#endif
55};
56static const RegStorage reserved_regs_arr_32[] = {rs_rX86_SP_32};
57static const RegStorage reserved_regs_arr_64[] = {rs_rX86_SP_64};
58static const RegStorage core_temps_arr_32[] = {rs_rAX, rs_rCX, rs_rDX, rs_rBX};
59static const RegStorage core_temps_arr_64[] = {
60    rs_rAX, rs_rCX, rs_rDX, rs_rSI, rs_rDI,
61#ifdef TARGET_REX_SUPPORT
62    rs_r8, rs_r9, rs_r10, rs_r11
63#endif
64};
65static const RegStorage sp_temps_arr_32[] = {
66    rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
67};
68static const RegStorage sp_temps_arr_64[] = {
69    rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
70#ifdef TARGET_REX_SUPPORT
71    rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15
72#endif
73};
74static const RegStorage dp_temps_arr_32[] = {
75    rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
76};
77static const RegStorage dp_temps_arr_64[] = {
78    rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
79#ifdef TARGET_REX_SUPPORT
80    rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15
81#endif
82};
83
84static const std::vector<RegStorage> core_regs_32(core_regs_arr_32,
85    core_regs_arr_32 + sizeof(core_regs_arr_32) / sizeof(core_regs_arr_32[0]));
86static const std::vector<RegStorage> core_regs_64(core_regs_arr_64,
87    core_regs_arr_64 + sizeof(core_regs_arr_64) / sizeof(core_regs_arr_64[0]));
88static const std::vector<RegStorage> sp_regs_32(sp_regs_arr_32,
89    sp_regs_arr_32 + sizeof(sp_regs_arr_32) / sizeof(sp_regs_arr_32[0]));
90static const std::vector<RegStorage> sp_regs_64(sp_regs_arr_64,
91    sp_regs_arr_64 + sizeof(sp_regs_arr_64) / sizeof(sp_regs_arr_64[0]));
92static const std::vector<RegStorage> dp_regs_32(dp_regs_arr_32,
93    dp_regs_arr_32 + sizeof(dp_regs_arr_32) / sizeof(dp_regs_arr_32[0]));
94static const std::vector<RegStorage> dp_regs_64(dp_regs_arr_64,
95    dp_regs_arr_64 + sizeof(dp_regs_arr_64) / sizeof(dp_regs_arr_64[0]));
96static const std::vector<RegStorage> reserved_regs_32(reserved_regs_arr_32,
97    reserved_regs_arr_32 + sizeof(reserved_regs_arr_32) / sizeof(reserved_regs_arr_32[0]));
98static const std::vector<RegStorage> reserved_regs_64(reserved_regs_arr_64,
99    reserved_regs_arr_64 + sizeof(reserved_regs_arr_64) / sizeof(reserved_regs_arr_64[0]));
100static const std::vector<RegStorage> core_temps_32(core_temps_arr_32,
101    core_temps_arr_32 + sizeof(core_temps_arr_32) / sizeof(core_temps_arr_32[0]));
102static const std::vector<RegStorage> core_temps_64(core_temps_arr_64,
103    core_temps_arr_64 + sizeof(core_temps_arr_64) / sizeof(core_temps_arr_64[0]));
104static const std::vector<RegStorage> sp_temps_32(sp_temps_arr_32,
105    sp_temps_arr_32 + sizeof(sp_temps_arr_32) / sizeof(sp_temps_arr_32[0]));
106static const std::vector<RegStorage> sp_temps_64(sp_temps_arr_64,
107    sp_temps_arr_64 + sizeof(sp_temps_arr_64) / sizeof(sp_temps_arr_64[0]));
108static const std::vector<RegStorage> dp_temps_32(dp_temps_arr_32,
109    dp_temps_arr_32 + sizeof(dp_temps_arr_32) / sizeof(dp_temps_arr_32[0]));
110static const std::vector<RegStorage> dp_temps_64(dp_temps_arr_64,
111    dp_temps_arr_64 + sizeof(dp_temps_arr_64) / sizeof(dp_temps_arr_64[0]));
112
113RegStorage rs_rX86_SP;
114
115X86NativeRegisterPool rX86_ARG0;
116X86NativeRegisterPool rX86_ARG1;
117X86NativeRegisterPool rX86_ARG2;
118X86NativeRegisterPool rX86_ARG3;
119X86NativeRegisterPool rX86_FARG0;
120X86NativeRegisterPool rX86_FARG1;
121X86NativeRegisterPool rX86_FARG2;
122X86NativeRegisterPool rX86_FARG3;
123X86NativeRegisterPool rX86_RET0;
124X86NativeRegisterPool rX86_RET1;
125X86NativeRegisterPool rX86_INVOKE_TGT;
126X86NativeRegisterPool rX86_COUNT;
127
128RegStorage rs_rX86_ARG0;
129RegStorage rs_rX86_ARG1;
130RegStorage rs_rX86_ARG2;
131RegStorage rs_rX86_ARG3;
132RegStorage rs_rX86_FARG0;
133RegStorage rs_rX86_FARG1;
134RegStorage rs_rX86_FARG2;
135RegStorage rs_rX86_FARG3;
136RegStorage rs_rX86_RET0;
137RegStorage rs_rX86_RET1;
138RegStorage rs_rX86_INVOKE_TGT;
139RegStorage rs_rX86_COUNT;
140
141RegLocation X86Mir2Lir::LocCReturn() {
142  return x86_loc_c_return;
143}
144
145RegLocation X86Mir2Lir::LocCReturnWide() {
146  return x86_loc_c_return_wide;
147}
148
149RegLocation X86Mir2Lir::LocCReturnFloat() {
150  return x86_loc_c_return_float;
151}
152
153RegLocation X86Mir2Lir::LocCReturnDouble() {
154  return x86_loc_c_return_double;
155}
156
157// Return a target-dependent special register.
158RegStorage X86Mir2Lir::TargetReg(SpecialTargetRegister reg) {
159  RegStorage res_reg = RegStorage::InvalidReg();
160  switch (reg) {
161    case kSelf: res_reg = RegStorage::InvalidReg(); break;
162    case kSuspend: res_reg =  RegStorage::InvalidReg(); break;
163    case kLr: res_reg =  RegStorage::InvalidReg(); break;
164    case kPc: res_reg =  RegStorage::InvalidReg(); break;
165    case kSp: res_reg =  rs_rX86_SP; break;
166    case kArg0: res_reg = rs_rX86_ARG0; break;
167    case kArg1: res_reg = rs_rX86_ARG1; break;
168    case kArg2: res_reg = rs_rX86_ARG2; break;
169    case kArg3: res_reg = rs_rX86_ARG3; break;
170    case kFArg0: res_reg = rs_rX86_FARG0; break;
171    case kFArg1: res_reg = rs_rX86_FARG1; break;
172    case kFArg2: res_reg = rs_rX86_FARG2; break;
173    case kFArg3: res_reg = rs_rX86_FARG3; break;
174    case kRet0: res_reg = rs_rX86_RET0; break;
175    case kRet1: res_reg = rs_rX86_RET1; break;
176    case kInvokeTgt: res_reg = rs_rX86_INVOKE_TGT; break;
177    case kHiddenArg: res_reg = rs_rAX; break;
178    case kHiddenFpArg: res_reg = rs_fr0; break;
179    case kCount: res_reg = rs_rX86_COUNT; break;
180  }
181  return res_reg;
182}
183
184RegStorage X86Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
185  // For the 32-bit internal ABI, the first 3 arguments are passed in registers.
186  // TODO: This is not 64-bit compliant and depends on new internal ABI.
187  switch (arg_num) {
188    case 0:
189      return rs_rX86_ARG1;
190    case 1:
191      return rs_rX86_ARG2;
192    case 2:
193      return rs_rX86_ARG3;
194    default:
195      return RegStorage::InvalidReg();
196  }
197}
198
199/*
200 * Decode the register id.
201 */
202uint64_t X86Mir2Lir::GetRegMaskCommon(RegStorage reg) {
203  uint64_t seed;
204  int shift;
205  int reg_id;
206
207  reg_id = reg.GetRegNum();
208  /* Double registers in x86 are just a single FP register */
209  seed = 1;
210  /* FP register starts at bit position 16 */
211  shift = reg.IsFloat() ? kX86FPReg0 : 0;
212  /* Expand the double register id into single offset */
213  shift += reg_id;
214  return (seed << shift);
215}
216
217uint64_t X86Mir2Lir::GetPCUseDefEncoding() {
218  /*
219   * FIXME: might make sense to use a virtual resource encoding bit for pc.  Might be
220   * able to clean up some of the x86/Arm_Mips differences
221   */
222  LOG(FATAL) << "Unexpected call to GetPCUseDefEncoding for x86";
223  return 0ULL;
224}
225
226void X86Mir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags) {
227  DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64);
228  DCHECK(!lir->flags.use_def_invalid);
229
230  // X86-specific resource map setup here.
231  if (flags & REG_USE_SP) {
232    lir->u.m.use_mask |= ENCODE_X86_REG_SP;
233  }
234
235  if (flags & REG_DEF_SP) {
236    lir->u.m.def_mask |= ENCODE_X86_REG_SP;
237  }
238
239  if (flags & REG_DEFA) {
240    SetupRegMask(&lir->u.m.def_mask, rs_rAX.GetReg());
241  }
242
243  if (flags & REG_DEFD) {
244    SetupRegMask(&lir->u.m.def_mask, rs_rDX.GetReg());
245  }
246  if (flags & REG_USEA) {
247    SetupRegMask(&lir->u.m.use_mask, rs_rAX.GetReg());
248  }
249
250  if (flags & REG_USEC) {
251    SetupRegMask(&lir->u.m.use_mask, rs_rCX.GetReg());
252  }
253
254  if (flags & REG_USED) {
255    SetupRegMask(&lir->u.m.use_mask, rs_rDX.GetReg());
256  }
257
258  if (flags & REG_USEB) {
259    SetupRegMask(&lir->u.m.use_mask, rs_rBX.GetReg());
260  }
261
262  // Fixup hard to describe instruction: Uses rAX, rCX, rDI; sets rDI.
263  if (lir->opcode == kX86RepneScasw) {
264    SetupRegMask(&lir->u.m.use_mask, rs_rAX.GetReg());
265    SetupRegMask(&lir->u.m.use_mask, rs_rCX.GetReg());
266    SetupRegMask(&lir->u.m.use_mask, rs_rDI.GetReg());
267    SetupRegMask(&lir->u.m.def_mask, rs_rDI.GetReg());
268  }
269
270  if (flags & USE_FP_STACK) {
271    lir->u.m.use_mask |= ENCODE_X86_FP_STACK;
272    lir->u.m.def_mask |= ENCODE_X86_FP_STACK;
273  }
274}
275
276/* For dumping instructions */
277static const char* x86RegName[] = {
278  "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi",
279  "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
280};
281
282static const char* x86CondName[] = {
283  "O",
284  "NO",
285  "B/NAE/C",
286  "NB/AE/NC",
287  "Z/EQ",
288  "NZ/NE",
289  "BE/NA",
290  "NBE/A",
291  "S",
292  "NS",
293  "P/PE",
294  "NP/PO",
295  "L/NGE",
296  "NL/GE",
297  "LE/NG",
298  "NLE/G"
299};
300
301/*
302 * Interpret a format string and build a string no longer than size
303 * See format key in Assemble.cc.
304 */
305std::string X86Mir2Lir::BuildInsnString(const char *fmt, LIR *lir, unsigned char* base_addr) {
306  std::string buf;
307  size_t i = 0;
308  size_t fmt_len = strlen(fmt);
309  while (i < fmt_len) {
310    if (fmt[i] != '!') {
311      buf += fmt[i];
312      i++;
313    } else {
314      i++;
315      DCHECK_LT(i, fmt_len);
316      char operand_number_ch = fmt[i];
317      i++;
318      if (operand_number_ch == '!') {
319        buf += "!";
320      } else {
321        int operand_number = operand_number_ch - '0';
322        DCHECK_LT(operand_number, 6);  // Expect upto 6 LIR operands.
323        DCHECK_LT(i, fmt_len);
324        int operand = lir->operands[operand_number];
325        switch (fmt[i]) {
326          case 'c':
327            DCHECK_LT(static_cast<size_t>(operand), sizeof(x86CondName));
328            buf += x86CondName[operand];
329            break;
330          case 'd':
331            buf += StringPrintf("%d", operand);
332            break;
333          case 'p': {
334            EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(operand));
335            buf += StringPrintf("0x%08x", tab_rec->offset);
336            break;
337          }
338          case 'r':
339            if (RegStorage::IsFloat(operand)) {
340              int fp_reg = RegStorage::RegNum(operand);
341              buf += StringPrintf("xmm%d", fp_reg);
342            } else {
343              int reg_num = RegStorage::RegNum(operand);
344              DCHECK_LT(static_cast<size_t>(reg_num), sizeof(x86RegName));
345              buf += x86RegName[reg_num];
346            }
347            break;
348          case 't':
349            buf += StringPrintf("0x%08" PRIxPTR " (L%p)",
350                                reinterpret_cast<uintptr_t>(base_addr) + lir->offset + operand,
351                                lir->target);
352            break;
353          default:
354            buf += StringPrintf("DecodeError '%c'", fmt[i]);
355            break;
356        }
357        i++;
358      }
359    }
360  }
361  return buf;
362}
363
364void X86Mir2Lir::DumpResourceMask(LIR *x86LIR, uint64_t mask, const char *prefix) {
365  char buf[256];
366  buf[0] = 0;
367
368  if (mask == ENCODE_ALL) {
369    strcpy(buf, "all");
370  } else {
371    char num[8];
372    int i;
373
374    for (i = 0; i < kX86RegEnd; i++) {
375      if (mask & (1ULL << i)) {
376        snprintf(num, arraysize(num), "%d ", i);
377        strcat(buf, num);
378      }
379    }
380
381    if (mask & ENCODE_CCODE) {
382      strcat(buf, "cc ");
383    }
384    /* Memory bits */
385    if (x86LIR && (mask & ENCODE_DALVIK_REG)) {
386      snprintf(buf + strlen(buf), arraysize(buf) - strlen(buf), "dr%d%s",
387               DECODE_ALIAS_INFO_REG(x86LIR->flags.alias_info),
388               (DECODE_ALIAS_INFO_WIDE(x86LIR->flags.alias_info)) ? "(+1)" : "");
389    }
390    if (mask & ENCODE_LITERAL) {
391      strcat(buf, "lit ");
392    }
393
394    if (mask & ENCODE_HEAP_REF) {
395      strcat(buf, "heap ");
396    }
397    if (mask & ENCODE_MUST_NOT_ALIAS) {
398      strcat(buf, "noalias ");
399    }
400  }
401  if (buf[0]) {
402    LOG(INFO) << prefix << ": " <<  buf;
403  }
404}
405
406void X86Mir2Lir::AdjustSpillMask() {
407  // Adjustment for LR spilling, x86 has no LR so nothing to do here
408  core_spill_mask_ |= (1 << rs_rRET.GetRegNum());
409  num_core_spills_++;
410}
411
412/*
413 * Mark a callee-save fp register as promoted.  Note that
414 * vpush/vpop uses contiguous register lists so we must
415 * include any holes in the mask.  Associate holes with
416 * Dalvik register INVALID_VREG (0xFFFFU).
417 */
418void X86Mir2Lir::MarkPreservedSingle(int v_reg, RegStorage reg) {
419  UNIMPLEMENTED(FATAL) << "MarkPreservedSingle";
420}
421
422void X86Mir2Lir::MarkPreservedDouble(int v_reg, RegStorage reg) {
423  UNIMPLEMENTED(FATAL) << "MarkPreservedDouble";
424}
425
426/* Clobber all regs that might be used by an external C call */
427void X86Mir2Lir::ClobberCallerSave() {
428  Clobber(rs_rAX);
429  Clobber(rs_rCX);
430  Clobber(rs_rDX);
431  Clobber(rs_rBX);
432}
433
434RegLocation X86Mir2Lir::GetReturnWideAlt() {
435  RegLocation res = LocCReturnWide();
436  DCHECK(res.reg.GetLowReg() == rs_rAX.GetReg());
437  DCHECK(res.reg.GetHighReg() == rs_rDX.GetReg());
438  Clobber(rs_rAX);
439  Clobber(rs_rDX);
440  MarkInUse(rs_rAX);
441  MarkInUse(rs_rDX);
442  MarkWide(res.reg);
443  return res;
444}
445
446RegLocation X86Mir2Lir::GetReturnAlt() {
447  RegLocation res = LocCReturn();
448  res.reg.SetReg(rs_rDX.GetReg());
449  Clobber(rs_rDX);
450  MarkInUse(rs_rDX);
451  return res;
452}
453
454/* To be used when explicitly managing register use */
455void X86Mir2Lir::LockCallTemps() {
456  LockTemp(rs_rX86_ARG0);
457  LockTemp(rs_rX86_ARG1);
458  LockTemp(rs_rX86_ARG2);
459  LockTemp(rs_rX86_ARG3);
460}
461
462/* To be used when explicitly managing register use */
463void X86Mir2Lir::FreeCallTemps() {
464  FreeTemp(rs_rX86_ARG0);
465  FreeTemp(rs_rX86_ARG1);
466  FreeTemp(rs_rX86_ARG2);
467  FreeTemp(rs_rX86_ARG3);
468}
469
470bool X86Mir2Lir::ProvidesFullMemoryBarrier(X86OpCode opcode) {
471    switch (opcode) {
472      case kX86LockCmpxchgMR:
473      case kX86LockCmpxchgAR:
474      case kX86LockCmpxchg8bM:
475      case kX86LockCmpxchg8bA:
476      case kX86XchgMR:
477      case kX86Mfence:
478        // Atomic memory instructions provide full barrier.
479        return true;
480      default:
481        break;
482    }
483
484    // Conservative if cannot prove it provides full barrier.
485    return false;
486}
487
488bool X86Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
489#if ANDROID_SMP != 0
490  // Start off with using the last LIR as the barrier. If it is not enough, then we will update it.
491  LIR* mem_barrier = last_lir_insn_;
492
493  bool ret = false;
494  /*
495   * According to the JSR-133 Cookbook, for x86 only StoreLoad barriers need memory fence. All other barriers
496   * (LoadLoad, LoadStore, StoreStore) are nops due to the x86 memory model. For those cases, all we need
497   * to ensure is that there is a scheduling barrier in place.
498   */
499  if (barrier_kind == kStoreLoad) {
500    // If no LIR exists already that can be used a barrier, then generate an mfence.
501    if (mem_barrier == nullptr) {
502      mem_barrier = NewLIR0(kX86Mfence);
503      ret = true;
504    }
505
506    // If last instruction does not provide full barrier, then insert an mfence.
507    if (ProvidesFullMemoryBarrier(static_cast<X86OpCode>(mem_barrier->opcode)) == false) {
508      mem_barrier = NewLIR0(kX86Mfence);
509      ret = true;
510    }
511  }
512
513  // Now ensure that a scheduling barrier is in place.
514  if (mem_barrier == nullptr) {
515    GenBarrier();
516  } else {
517    // Mark as a scheduling barrier.
518    DCHECK(!mem_barrier->flags.use_def_invalid);
519    mem_barrier->u.m.def_mask = ENCODE_ALL;
520  }
521  return ret;
522#else
523  return false;
524#endif
525}
526
527// Alloc a pair of core registers, or a double.
528RegStorage X86Mir2Lir::AllocTypedTempWide(bool fp_hint, int reg_class) {
529  if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) {
530    return AllocTempDouble();
531  }
532  RegStorage low_reg = AllocTemp();
533  RegStorage high_reg = AllocTemp();
534  return RegStorage::MakeRegPair(low_reg, high_reg);
535}
536
537RegStorage X86Mir2Lir::AllocTypedTemp(bool fp_hint, int reg_class) {
538  if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) {
539    return AllocTempSingle();
540  }
541  return AllocTemp();
542}
543
544void X86Mir2Lir::CompilerInitializeRegAlloc() {
545  if (Gen64Bit()) {
546    reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs_64, sp_regs_64, dp_regs_64, reserved_regs_64,
547                                        core_temps_64, sp_temps_64, dp_temps_64);
548  } else {
549    reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs_32, sp_regs_32, dp_regs_32, reserved_regs_32,
550                                        core_temps_32, sp_temps_32, dp_temps_32);
551  }
552
553  // Target-specific adjustments.
554
555  // Alias single precision xmm to double xmms.
556  // TODO: as needed, add larger vector sizes - alias all to the largest.
557  GrowableArray<RegisterInfo*>::Iterator it(&reg_pool_->sp_regs_);
558  for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
559    int sp_reg_num = info->GetReg().GetRegNum();
560    RegStorage dp_reg = RegStorage::Solo64(RegStorage::kFloatingPoint | sp_reg_num);
561    RegisterInfo* dp_reg_info = GetRegInfo(dp_reg);
562    // 64-bit xmm vector register's master storage should refer to itself.
563    DCHECK_EQ(dp_reg_info, dp_reg_info->Master());
564    // Redirect 32-bit vector's master storage to 64-bit vector.
565    info->SetMaster(dp_reg_info);
566  }
567
568  // Don't start allocating temps at r0/s0/d0 or you may clobber return regs in early-exit methods.
569  // TODO: adjust for x86/hard float calling convention.
570  reg_pool_->next_core_reg_ = 2;
571  reg_pool_->next_sp_reg_ = 2;
572  reg_pool_->next_dp_reg_ = 1;
573}
574
575void X86Mir2Lir::FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free) {
576  DCHECK(rl_keep.wide);
577  DCHECK(rl_free.wide);
578  int free_low = rl_free.reg.GetLowReg();
579  int free_high = rl_free.reg.GetHighReg();
580  int keep_low = rl_keep.reg.GetLowReg();
581  int keep_high = rl_keep.reg.GetHighReg();
582  if ((free_low != keep_low) && (free_low != keep_high) &&
583      (free_high != keep_low) && (free_high != keep_high)) {
584    // No overlap, free both
585    FreeTemp(rl_free.reg);
586  }
587}
588
589void X86Mir2Lir::SpillCoreRegs() {
590  if (num_core_spills_ == 0) {
591    return;
592  }
593  // Spill mask not including fake return address register
594  uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum());
595  int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_);
596  for (int reg = 0; mask; mask >>= 1, reg++) {
597    if (mask & 0x1) {
598      StoreWordDisp(rs_rX86_SP, offset, RegStorage::Solo32(reg));
599      offset += GetInstructionSetPointerSize(cu_->instruction_set);
600    }
601  }
602}
603
604void X86Mir2Lir::UnSpillCoreRegs() {
605  if (num_core_spills_ == 0) {
606    return;
607  }
608  // Spill mask not including fake return address register
609  uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum());
610  int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_);
611  for (int reg = 0; mask; mask >>= 1, reg++) {
612    if (mask & 0x1) {
613      LoadWordDisp(rs_rX86_SP, offset, RegStorage::Solo32(reg));
614      offset += GetInstructionSetPointerSize(cu_->instruction_set);
615    }
616  }
617}
618
619bool X86Mir2Lir::IsUnconditionalBranch(LIR* lir) {
620  return (lir->opcode == kX86Jmp8 || lir->opcode == kX86Jmp32);
621}
622
623bool X86Mir2Lir::SupportsVolatileLoadStore(OpSize size) {
624  return true;
625}
626
627RegisterClass X86Mir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volatile) {
628  if (UNLIKELY(is_volatile)) {
629    // On x86, atomic 64-bit load/store requires an fp register.
630    // Smaller aligned load/store is atomic for both core and fp registers.
631    if (size == k64 || size == kDouble) {
632      return kFPReg;
633    }
634  }
635  return RegClassBySize(size);
636}
637
638X86Mir2Lir::X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena, bool gen64bit)
639    : Mir2Lir(cu, mir_graph, arena),
640      base_of_code_(nullptr), store_method_addr_(false), store_method_addr_used_(false),
641      method_address_insns_(arena, 100, kGrowableArrayMisc),
642      class_type_address_insns_(arena, 100, kGrowableArrayMisc),
643      call_method_insns_(arena, 100, kGrowableArrayMisc),
644      stack_decrement_(nullptr), stack_increment_(nullptr), gen64bit_(gen64bit),
645      const_vectors_(nullptr) {
646  store_method_addr_used_ = false;
647  if (kIsDebugBuild) {
648    for (int i = 0; i < kX86Last; i++) {
649      if (X86Mir2Lir::EncodingMap[i].opcode != i) {
650        LOG(FATAL) << "Encoding order for " << X86Mir2Lir::EncodingMap[i].name
651                   << " is wrong: expecting " << i << ", seeing "
652                   << static_cast<int>(X86Mir2Lir::EncodingMap[i].opcode);
653      }
654    }
655  }
656  if (Gen64Bit()) {
657    rs_rX86_SP = rs_rX86_SP_64;
658
659    rs_rX86_ARG0 = rs_rDI;
660    rs_rX86_ARG1 = rs_rSI;
661    rs_rX86_ARG2 = rs_rDX;
662    rs_rX86_ARG3 = rs_rCX;
663    rX86_ARG0 = rDI;
664    rX86_ARG1 = rSI;
665    rX86_ARG2 = rDX;
666    rX86_ARG3 = rCX;
667    // TODO: ARG4(r8), ARG5(r9), floating point args.
668  } else {
669    rs_rX86_SP = rs_rX86_SP_32;
670
671    rs_rX86_ARG0 = rs_rAX;
672    rs_rX86_ARG1 = rs_rCX;
673    rs_rX86_ARG2 = rs_rDX;
674    rs_rX86_ARG3 = rs_rBX;
675    rX86_ARG0 = rAX;
676    rX86_ARG1 = rCX;
677    rX86_ARG2 = rDX;
678    rX86_ARG3 = rBX;
679  }
680  rs_rX86_FARG0 = rs_rAX;
681  rs_rX86_FARG1 = rs_rCX;
682  rs_rX86_FARG2 = rs_rDX;
683  rs_rX86_FARG3 = rs_rBX;
684  rs_rX86_RET0 = rs_rAX;
685  rs_rX86_RET1 = rs_rDX;
686  rs_rX86_INVOKE_TGT = rs_rAX;
687  rs_rX86_COUNT = rs_rCX;
688  rX86_FARG0 = rAX;
689  rX86_FARG1 = rCX;
690  rX86_FARG2 = rDX;
691  rX86_FARG3 = rBX;
692  rX86_RET0 = rAX;
693  rX86_RET1 = rDX;
694  rX86_INVOKE_TGT = rAX;
695  rX86_COUNT = rCX;
696}
697
698Mir2Lir* X86CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
699                          ArenaAllocator* const arena) {
700  return new X86Mir2Lir(cu, mir_graph, arena, false);
701}
702
703Mir2Lir* X86_64CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
704                          ArenaAllocator* const arena) {
705  return new X86Mir2Lir(cu, mir_graph, arena, true);
706}
707
708// Not used in x86
709RegStorage X86Mir2Lir::LoadHelper(ThreadOffset<4> offset) {
710  LOG(FATAL) << "Unexpected use of LoadHelper in x86";
711  return RegStorage::InvalidReg();
712}
713
714// Not used in x86
715RegStorage X86Mir2Lir::LoadHelper(ThreadOffset<8> offset) {
716  LOG(FATAL) << "Unexpected use of LoadHelper in x86";
717  return RegStorage::InvalidReg();
718}
719
720LIR* X86Mir2Lir::CheckSuspendUsingLoad() {
721  LOG(FATAL) << "Unexpected use of CheckSuspendUsingLoad in x86";
722  return nullptr;
723}
724
725uint64_t X86Mir2Lir::GetTargetInstFlags(int opcode) {
726  DCHECK(!IsPseudoLirOp(opcode));
727  return X86Mir2Lir::EncodingMap[opcode].flags;
728}
729
730const char* X86Mir2Lir::GetTargetInstName(int opcode) {
731  DCHECK(!IsPseudoLirOp(opcode));
732  return X86Mir2Lir::EncodingMap[opcode].name;
733}
734
735const char* X86Mir2Lir::GetTargetInstFmt(int opcode) {
736  DCHECK(!IsPseudoLirOp(opcode));
737  return X86Mir2Lir::EncodingMap[opcode].fmt;
738}
739
740void X86Mir2Lir::GenConstWide(RegLocation rl_dest, int64_t value) {
741  // Can we do this directly to memory?
742  rl_dest = UpdateLocWide(rl_dest);
743  if ((rl_dest.location == kLocDalvikFrame) ||
744      (rl_dest.location == kLocCompilerTemp)) {
745    int32_t val_lo = Low32Bits(value);
746    int32_t val_hi = High32Bits(value);
747    int r_base = TargetReg(kSp).GetReg();
748    int displacement = SRegOffset(rl_dest.s_reg_low);
749
750    LIR * store = NewLIR3(kX86Mov32MI, r_base, displacement + LOWORD_OFFSET, val_lo);
751    AnnotateDalvikRegAccess(store, (displacement + LOWORD_OFFSET) >> 2,
752                              false /* is_load */, true /* is64bit */);
753    store = NewLIR3(kX86Mov32MI, r_base, displacement + HIWORD_OFFSET, val_hi);
754    AnnotateDalvikRegAccess(store, (displacement + HIWORD_OFFSET) >> 2,
755                              false /* is_load */, true /* is64bit */);
756    return;
757  }
758
759  // Just use the standard code to do the generation.
760  Mir2Lir::GenConstWide(rl_dest, value);
761}
762
763// TODO: Merge with existing RegLocation dumper in vreg_analysis.cc
764void X86Mir2Lir::DumpRegLocation(RegLocation loc) {
765  LOG(INFO)  << "location: " << loc.location << ','
766             << (loc.wide ? " w" : "  ")
767             << (loc.defined ? " D" : "  ")
768             << (loc.is_const ? " c" : "  ")
769             << (loc.fp ? " F" : "  ")
770             << (loc.core ? " C" : "  ")
771             << (loc.ref ? " r" : "  ")
772             << (loc.high_word ? " h" : "  ")
773             << (loc.home ? " H" : "  ")
774             << ", low: " << static_cast<int>(loc.reg.GetLowReg())
775             << ", high: " << static_cast<int>(loc.reg.GetHighReg())
776             << ", s_reg: " << loc.s_reg_low
777             << ", orig: " << loc.orig_sreg;
778}
779
780void X86Mir2Lir::Materialize() {
781  // A good place to put the analysis before starting.
782  AnalyzeMIR();
783
784  // Now continue with regular code generation.
785  Mir2Lir::Materialize();
786}
787
788void X86Mir2Lir::LoadMethodAddress(const MethodReference& target_method, InvokeType type,
789                                   SpecialTargetRegister symbolic_reg) {
790  /*
791   * For x86, just generate a 32 bit move immediate instruction, that will be filled
792   * in at 'link time'.  For now, put a unique value based on target to ensure that
793   * code deduplication works.
794   */
795  int target_method_idx = target_method.dex_method_index;
796  const DexFile* target_dex_file = target_method.dex_file;
797  const DexFile::MethodId& target_method_id = target_dex_file->GetMethodId(target_method_idx);
798  uintptr_t target_method_id_ptr = reinterpret_cast<uintptr_t>(&target_method_id);
799
800  // Generate the move instruction with the unique pointer and save index, dex_file, and type.
801  LIR *move = RawLIR(current_dalvik_offset_, kX86Mov32RI, TargetReg(symbolic_reg).GetReg(),
802                     static_cast<int>(target_method_id_ptr), target_method_idx,
803                     WrapPointer(const_cast<DexFile*>(target_dex_file)), type);
804  AppendLIR(move);
805  method_address_insns_.Insert(move);
806}
807
808void X86Mir2Lir::LoadClassType(uint32_t type_idx, SpecialTargetRegister symbolic_reg) {
809  /*
810   * For x86, just generate a 32 bit move immediate instruction, that will be filled
811   * in at 'link time'.  For now, put a unique value based on target to ensure that
812   * code deduplication works.
813   */
814  const DexFile::TypeId& id = cu_->dex_file->GetTypeId(type_idx);
815  uintptr_t ptr = reinterpret_cast<uintptr_t>(&id);
816
817  // Generate the move instruction with the unique pointer and save index and type.
818  LIR *move = RawLIR(current_dalvik_offset_, kX86Mov32RI, TargetReg(symbolic_reg).GetReg(),
819                     static_cast<int>(ptr), type_idx);
820  AppendLIR(move);
821  class_type_address_insns_.Insert(move);
822}
823
824LIR *X86Mir2Lir::CallWithLinkerFixup(const MethodReference& target_method, InvokeType type) {
825  /*
826   * For x86, just generate a 32 bit call relative instruction, that will be filled
827   * in at 'link time'.  For now, put a unique value based on target to ensure that
828   * code deduplication works.
829   */
830  int target_method_idx = target_method.dex_method_index;
831  const DexFile* target_dex_file = target_method.dex_file;
832  const DexFile::MethodId& target_method_id = target_dex_file->GetMethodId(target_method_idx);
833  uintptr_t target_method_id_ptr = reinterpret_cast<uintptr_t>(&target_method_id);
834
835  // Generate the call instruction with the unique pointer and save index, dex_file, and type.
836  LIR *call = RawLIR(current_dalvik_offset_, kX86CallI, static_cast<int>(target_method_id_ptr),
837                     target_method_idx, WrapPointer(const_cast<DexFile*>(target_dex_file)), type);
838  AppendLIR(call);
839  call_method_insns_.Insert(call);
840  return call;
841}
842
843/*
844 * @brief Enter a 32 bit quantity into a buffer
845 * @param buf buffer.
846 * @param data Data value.
847 */
848
849static void PushWord(std::vector<uint8_t>&buf, int32_t data) {
850  buf.push_back(data & 0xff);
851  buf.push_back((data >> 8) & 0xff);
852  buf.push_back((data >> 16) & 0xff);
853  buf.push_back((data >> 24) & 0xff);
854}
855
856void X86Mir2Lir::InstallLiteralPools() {
857  // These are handled differently for x86.
858  DCHECK(code_literal_list_ == nullptr);
859  DCHECK(method_literal_list_ == nullptr);
860  DCHECK(class_literal_list_ == nullptr);
861
862  // Align to 16 byte boundary.  We have implicit knowledge that the start of the method is
863  // on a 4 byte boundary.   How can I check this if it changes (other than aligned loads
864  // will fail at runtime)?
865  if (const_vectors_ != nullptr) {
866    int align_size = (16-4) - (code_buffer_.size() & 0xF);
867    if (align_size < 0) {
868      align_size += 16;
869    }
870
871    while (align_size > 0) {
872      code_buffer_.push_back(0);
873      align_size--;
874    }
875    for (LIR *p = const_vectors_; p != nullptr; p = p->next) {
876      PushWord(code_buffer_, p->operands[0]);
877      PushWord(code_buffer_, p->operands[1]);
878      PushWord(code_buffer_, p->operands[2]);
879      PushWord(code_buffer_, p->operands[3]);
880    }
881  }
882
883  // Handle the fixups for methods.
884  for (uint32_t i = 0; i < method_address_insns_.Size(); i++) {
885      LIR* p = method_address_insns_.Get(i);
886      DCHECK_EQ(p->opcode, kX86Mov32RI);
887      uint32_t target_method_idx = p->operands[2];
888      const DexFile* target_dex_file =
889          reinterpret_cast<const DexFile*>(UnwrapPointer(p->operands[3]));
890
891      // The offset to patch is the last 4 bytes of the instruction.
892      int patch_offset = p->offset + p->flags.size - 4;
893      cu_->compiler_driver->AddMethodPatch(cu_->dex_file, cu_->class_def_idx,
894                                           cu_->method_idx, cu_->invoke_type,
895                                           target_method_idx, target_dex_file,
896                                           static_cast<InvokeType>(p->operands[4]),
897                                           patch_offset);
898  }
899
900  // Handle the fixups for class types.
901  for (uint32_t i = 0; i < class_type_address_insns_.Size(); i++) {
902      LIR* p = class_type_address_insns_.Get(i);
903      DCHECK_EQ(p->opcode, kX86Mov32RI);
904      uint32_t target_method_idx = p->operands[2];
905
906      // The offset to patch is the last 4 bytes of the instruction.
907      int patch_offset = p->offset + p->flags.size - 4;
908      cu_->compiler_driver->AddClassPatch(cu_->dex_file, cu_->class_def_idx,
909                                          cu_->method_idx, target_method_idx, patch_offset);
910  }
911
912  // And now the PC-relative calls to methods.
913  for (uint32_t i = 0; i < call_method_insns_.Size(); i++) {
914      LIR* p = call_method_insns_.Get(i);
915      DCHECK_EQ(p->opcode, kX86CallI);
916      uint32_t target_method_idx = p->operands[1];
917      const DexFile* target_dex_file =
918          reinterpret_cast<const DexFile*>(UnwrapPointer(p->operands[2]));
919
920      // The offset to patch is the last 4 bytes of the instruction.
921      int patch_offset = p->offset + p->flags.size - 4;
922      cu_->compiler_driver->AddRelativeCodePatch(cu_->dex_file, cu_->class_def_idx,
923                                                 cu_->method_idx, cu_->invoke_type,
924                                                 target_method_idx, target_dex_file,
925                                                 static_cast<InvokeType>(p->operands[3]),
926                                                 patch_offset, -4 /* offset */);
927  }
928
929  // And do the normal processing.
930  Mir2Lir::InstallLiteralPools();
931}
932
933/*
934 * Fast string.index_of(I) & (II).  Inline check for simple case of char <= 0xffff,
935 * otherwise bails to standard library code.
936 */
937bool X86Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) {
938  ClobberCallerSave();
939  LockCallTemps();  // Using fixed registers
940
941  // EAX: 16 bit character being searched.
942  // ECX: count: number of words to be searched.
943  // EDI: String being searched.
944  // EDX: temporary during execution.
945  // EBX: temporary during execution.
946
947  RegLocation rl_obj = info->args[0];
948  RegLocation rl_char = info->args[1];
949  RegLocation rl_start;  // Note: only present in III flavor or IndexOf.
950
951  uint32_t char_value =
952    rl_char.is_const ? mir_graph_->ConstantValue(rl_char.orig_sreg) : 0;
953
954  if (char_value > 0xFFFF) {
955    // We have to punt to the real String.indexOf.
956    return false;
957  }
958
959  // Okay, we are commited to inlining this.
960  RegLocation rl_return = GetReturn(false);
961  RegLocation rl_dest = InlineTarget(info);
962
963  // Is the string non-NULL?
964  LoadValueDirectFixed(rl_obj, rs_rDX);
965  GenNullCheck(rs_rDX, info->opt_flags);
966  info->opt_flags |= MIR_IGNORE_NULL_CHECK;  // Record that we've null checked.
967
968  // Does the character fit in 16 bits?
969  LIR* slowpath_branch = nullptr;
970  if (rl_char.is_const) {
971    // We need the value in EAX.
972    LoadConstantNoClobber(rs_rAX, char_value);
973  } else {
974    // Character is not a constant; compare at runtime.
975    LoadValueDirectFixed(rl_char, rs_rAX);
976    slowpath_branch = OpCmpImmBranch(kCondGt, rs_rAX, 0xFFFF, nullptr);
977  }
978
979  // From here down, we know that we are looking for a char that fits in 16 bits.
980  // Location of reference to data array within the String object.
981  int value_offset = mirror::String::ValueOffset().Int32Value();
982  // Location of count within the String object.
983  int count_offset = mirror::String::CountOffset().Int32Value();
984  // Starting offset within data array.
985  int offset_offset = mirror::String::OffsetOffset().Int32Value();
986  // Start of char data with array_.
987  int data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value();
988
989  // Character is in EAX.
990  // Object pointer is in EDX.
991
992  // We need to preserve EDI, but have no spare registers, so push it on the stack.
993  // We have to remember that all stack addresses after this are offset by sizeof(EDI).
994  NewLIR1(kX86Push32R, rs_rDI.GetReg());
995
996  // Compute the number of words to search in to rCX.
997  Load32Disp(rs_rDX, count_offset, rs_rCX);
998  LIR *length_compare = nullptr;
999  int start_value = 0;
1000  bool is_index_on_stack = false;
1001  if (zero_based) {
1002    // We have to handle an empty string.  Use special instruction JECXZ.
1003    length_compare = NewLIR0(kX86Jecxz8);
1004  } else {
1005    rl_start = info->args[2];
1006    // We have to offset by the start index.
1007    if (rl_start.is_const) {
1008      start_value = mir_graph_->ConstantValue(rl_start.orig_sreg);
1009      start_value = std::max(start_value, 0);
1010
1011      // Is the start > count?
1012      length_compare = OpCmpImmBranch(kCondLe, rs_rCX, start_value, nullptr);
1013
1014      if (start_value != 0) {
1015        OpRegImm(kOpSub, rs_rCX, start_value);
1016      }
1017    } else {
1018      // Runtime start index.
1019      rl_start = UpdateLocTyped(rl_start, kCoreReg);
1020      if (rl_start.location == kLocPhysReg) {
1021        // Handle "start index < 0" case.
1022        OpRegReg(kOpXor, rs_rBX, rs_rBX);
1023        OpRegReg(kOpCmp, rl_start.reg, rs_rBX);
1024        OpCondRegReg(kOpCmov, kCondLt, rl_start.reg, rs_rBX);
1025
1026        // The length of the string should be greater than the start index.
1027        length_compare = OpCmpBranch(kCondLe, rs_rCX, rl_start.reg, nullptr);
1028        OpRegReg(kOpSub, rs_rCX, rl_start.reg);
1029        if (rl_start.reg == rs_rDI) {
1030          // The special case. We will use EDI further, so lets put start index to stack.
1031          NewLIR1(kX86Push32R, rs_rDI.GetReg());
1032          is_index_on_stack = true;
1033        }
1034      } else {
1035        // Load the start index from stack, remembering that we pushed EDI.
1036        int displacement = SRegOffset(rl_start.s_reg_low) + sizeof(uint32_t);
1037        Load32Disp(rs_rX86_SP, displacement, rs_rBX);
1038        OpRegReg(kOpXor, rs_rDI, rs_rDI);
1039        OpRegReg(kOpCmp, rs_rBX, rs_rDI);
1040        OpCondRegReg(kOpCmov, kCondLt, rs_rBX, rs_rDI);
1041
1042        length_compare = OpCmpBranch(kCondLe, rs_rCX, rs_rBX, nullptr);
1043        OpRegReg(kOpSub, rs_rCX, rs_rBX);
1044        // Put the start index to stack.
1045        NewLIR1(kX86Push32R, rs_rBX.GetReg());
1046        is_index_on_stack = true;
1047      }
1048    }
1049  }
1050  DCHECK(length_compare != nullptr);
1051
1052  // ECX now contains the count in words to be searched.
1053
1054  // Load the address of the string into EBX.
1055  // The string starts at VALUE(String) + 2 * OFFSET(String) + DATA_OFFSET.
1056  Load32Disp(rs_rDX, value_offset, rs_rDI);
1057  Load32Disp(rs_rDX, offset_offset, rs_rBX);
1058  OpLea(rs_rBX, rs_rDI, rs_rBX, 1, data_offset);
1059
1060  // Now compute into EDI where the search will start.
1061  if (zero_based || rl_start.is_const) {
1062    if (start_value == 0) {
1063      OpRegCopy(rs_rDI, rs_rBX);
1064    } else {
1065      NewLIR3(kX86Lea32RM, rs_rDI.GetReg(), rs_rBX.GetReg(), 2 * start_value);
1066    }
1067  } else {
1068    if (is_index_on_stack == true) {
1069      // Load the start index from stack.
1070      NewLIR1(kX86Pop32R, rs_rDX.GetReg());
1071      OpLea(rs_rDI, rs_rBX, rs_rDX, 1, 0);
1072    } else {
1073      OpLea(rs_rDI, rs_rBX, rl_start.reg, 1, 0);
1074    }
1075  }
1076
1077  // EDI now contains the start of the string to be searched.
1078  // We are all prepared to do the search for the character.
1079  NewLIR0(kX86RepneScasw);
1080
1081  // Did we find a match?
1082  LIR* failed_branch = OpCondBranch(kCondNe, nullptr);
1083
1084  // yes, we matched.  Compute the index of the result.
1085  // index = ((curr_ptr - orig_ptr) / 2) - 1.
1086  OpRegReg(kOpSub, rs_rDI, rs_rBX);
1087  OpRegImm(kOpAsr, rs_rDI, 1);
1088  NewLIR3(kX86Lea32RM, rl_return.reg.GetReg(), rs_rDI.GetReg(), -1);
1089  LIR *all_done = NewLIR1(kX86Jmp8, 0);
1090
1091  // Failed to match; return -1.
1092  LIR *not_found = NewLIR0(kPseudoTargetLabel);
1093  length_compare->target = not_found;
1094  failed_branch->target = not_found;
1095  LoadConstantNoClobber(rl_return.reg, -1);
1096
1097  // And join up at the end.
1098  all_done->target = NewLIR0(kPseudoTargetLabel);
1099  // Restore EDI from the stack.
1100  NewLIR1(kX86Pop32R, rs_rDI.GetReg());
1101
1102  // Out of line code returns here.
1103  if (slowpath_branch != nullptr) {
1104    LIR *return_point = NewLIR0(kPseudoTargetLabel);
1105    AddIntrinsicSlowPath(info, slowpath_branch, return_point);
1106  }
1107
1108  StoreValue(rl_dest, rl_return);
1109  return true;
1110}
1111
1112/*
1113 * @brief Enter an 'advance LOC' into the FDE buffer
1114 * @param buf FDE buffer.
1115 * @param increment Amount by which to increase the current location.
1116 */
1117static void AdvanceLoc(std::vector<uint8_t>&buf, uint32_t increment) {
1118  if (increment < 64) {
1119    // Encoding in opcode.
1120    buf.push_back(0x1 << 6 | increment);
1121  } else if (increment < 256) {
1122    // Single byte delta.
1123    buf.push_back(0x02);
1124    buf.push_back(increment);
1125  } else if (increment < 256 * 256) {
1126    // Two byte delta.
1127    buf.push_back(0x03);
1128    buf.push_back(increment & 0xff);
1129    buf.push_back((increment >> 8) & 0xff);
1130  } else {
1131    // Four byte delta.
1132    buf.push_back(0x04);
1133    PushWord(buf, increment);
1134  }
1135}
1136
1137
1138std::vector<uint8_t>* X86CFIInitialization() {
1139  return X86Mir2Lir::ReturnCommonCallFrameInformation();
1140}
1141
1142std::vector<uint8_t>* X86Mir2Lir::ReturnCommonCallFrameInformation() {
1143  std::vector<uint8_t>*cfi_info = new std::vector<uint8_t>;
1144
1145  // Length of the CIE (except for this field).
1146  PushWord(*cfi_info, 16);
1147
1148  // CIE id.
1149  PushWord(*cfi_info, 0xFFFFFFFFU);
1150
1151  // Version: 3.
1152  cfi_info->push_back(0x03);
1153
1154  // Augmentation: empty string.
1155  cfi_info->push_back(0x0);
1156
1157  // Code alignment: 1.
1158  cfi_info->push_back(0x01);
1159
1160  // Data alignment: -4.
1161  cfi_info->push_back(0x7C);
1162
1163  // Return address register (R8).
1164  cfi_info->push_back(0x08);
1165
1166  // Initial return PC is 4(ESP): DW_CFA_def_cfa R4 4.
1167  cfi_info->push_back(0x0C);
1168  cfi_info->push_back(0x04);
1169  cfi_info->push_back(0x04);
1170
1171  // Return address location: 0(SP): DW_CFA_offset R8 1 (* -4);.
1172  cfi_info->push_back(0x2 << 6 | 0x08);
1173  cfi_info->push_back(0x01);
1174
1175  // And 2 Noops to align to 4 byte boundary.
1176  cfi_info->push_back(0x0);
1177  cfi_info->push_back(0x0);
1178
1179  DCHECK_EQ(cfi_info->size() & 3, 0U);
1180  return cfi_info;
1181}
1182
1183static void EncodeUnsignedLeb128(std::vector<uint8_t>& buf, uint32_t value) {
1184  uint8_t buffer[12];
1185  uint8_t *ptr = EncodeUnsignedLeb128(buffer, value);
1186  for (uint8_t *p = buffer; p < ptr; p++) {
1187    buf.push_back(*p);
1188  }
1189}
1190
1191std::vector<uint8_t>* X86Mir2Lir::ReturnCallFrameInformation() {
1192  std::vector<uint8_t>*cfi_info = new std::vector<uint8_t>;
1193
1194  // Generate the FDE for the method.
1195  DCHECK_NE(data_offset_, 0U);
1196
1197  // Length (will be filled in later in this routine).
1198  PushWord(*cfi_info, 0);
1199
1200  // CIE_pointer (can be filled in by linker); might be left at 0 if there is only
1201  // one CIE for the whole debug_frame section.
1202  PushWord(*cfi_info, 0);
1203
1204  // 'initial_location' (filled in by linker).
1205  PushWord(*cfi_info, 0);
1206
1207  // 'address_range' (number of bytes in the method).
1208  PushWord(*cfi_info, data_offset_);
1209
1210  // The instructions in the FDE.
1211  if (stack_decrement_ != nullptr) {
1212    // Advance LOC to just past the stack decrement.
1213    uint32_t pc = NEXT_LIR(stack_decrement_)->offset;
1214    AdvanceLoc(*cfi_info, pc);
1215
1216    // Now update the offset to the call frame: DW_CFA_def_cfa_offset frame_size.
1217    cfi_info->push_back(0x0e);
1218    EncodeUnsignedLeb128(*cfi_info, frame_size_);
1219
1220    // We continue with that stack until the epilogue.
1221    if (stack_increment_ != nullptr) {
1222      uint32_t new_pc = NEXT_LIR(stack_increment_)->offset;
1223      AdvanceLoc(*cfi_info, new_pc - pc);
1224
1225      // We probably have code snippets after the epilogue, so save the
1226      // current state: DW_CFA_remember_state.
1227      cfi_info->push_back(0x0a);
1228
1229      // We have now popped the stack: DW_CFA_def_cfa_offset 4.  There is only the return
1230      // PC on the stack now.
1231      cfi_info->push_back(0x0e);
1232      EncodeUnsignedLeb128(*cfi_info, 4);
1233
1234      // Everything after that is the same as before the epilogue.
1235      // Stack bump was followed by RET instruction.
1236      LIR *post_ret_insn = NEXT_LIR(NEXT_LIR(stack_increment_));
1237      if (post_ret_insn != nullptr) {
1238        pc = new_pc;
1239        new_pc = post_ret_insn->offset;
1240        AdvanceLoc(*cfi_info, new_pc - pc);
1241        // Restore the state: DW_CFA_restore_state.
1242        cfi_info->push_back(0x0b);
1243      }
1244    }
1245  }
1246
1247  // Padding to a multiple of 4
1248  while ((cfi_info->size() & 3) != 0) {
1249    // DW_CFA_nop is encoded as 0.
1250    cfi_info->push_back(0);
1251  }
1252
1253  // Set the length of the FDE inside the generated bytes.
1254  uint32_t length = cfi_info->size() - 4;
1255  (*cfi_info)[0] = length;
1256  (*cfi_info)[1] = length >> 8;
1257  (*cfi_info)[2] = length >> 16;
1258  (*cfi_info)[3] = length >> 24;
1259  return cfi_info;
1260}
1261
1262void X86Mir2Lir::GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir) {
1263  switch (static_cast<ExtendedMIROpcode>(mir->dalvikInsn.opcode)) {
1264    case kMirOpConstVector:
1265      GenConst128(bb, mir);
1266      break;
1267    default:
1268      break;
1269  }
1270}
1271
1272void X86Mir2Lir::GenConst128(BasicBlock* bb, MIR* mir) {
1273  int type_size = mir->dalvikInsn.vA;
1274  // We support 128 bit vectors.
1275  DCHECK_EQ(type_size & 0xFFFF, 128);
1276  int reg = mir->dalvikInsn.vB;
1277  DCHECK_LT(reg, 8);
1278  uint32_t *args = mir->dalvikInsn.arg;
1279  // Check for all 0 case.
1280  if (args[0] == 0 && args[1] == 0 && args[2] == 0 && args[3] == 0) {
1281    NewLIR2(kX86XorpsRR, reg, reg);
1282    return;
1283  }
1284  // Okay, load it from the constant vector area.
1285  LIR *data_target = ScanVectorLiteral(mir);
1286  if (data_target == nullptr) {
1287    data_target = AddVectorLiteral(mir);
1288  }
1289
1290  // Address the start of the method.
1291  RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
1292  rl_method = LoadValue(rl_method, kCoreReg);
1293
1294  // Load the proper value from the literal area.
1295  // We don't know the proper offset for the value, so pick one that will force
1296  // 4 byte offset.  We will fix this up in the assembler later to have the right
1297  // value.
1298  LIR *load = NewLIR3(kX86Mova128RM, reg, rl_method.reg.GetReg(),  256 /* bogus */);
1299  load->flags.fixup = kFixupLoad;
1300  load->target = data_target;
1301  SetMemRefType(load, true, kLiteral);
1302}
1303
1304LIR *X86Mir2Lir::ScanVectorLiteral(MIR *mir) {
1305  int *args = reinterpret_cast<int*>(mir->dalvikInsn.arg);
1306  for (LIR *p = const_vectors_; p != nullptr; p = p->next) {
1307    if (args[0] == p->operands[0] && args[1] == p->operands[1] &&
1308        args[2] == p->operands[2] && args[3] == p->operands[3]) {
1309      return p;
1310    }
1311  }
1312  return nullptr;
1313}
1314
1315LIR *X86Mir2Lir::AddVectorLiteral(MIR *mir) {
1316  LIR* new_value = static_cast<LIR*>(arena_->Alloc(sizeof(LIR), kArenaAllocData));
1317  int *args = reinterpret_cast<int*>(mir->dalvikInsn.arg);
1318  new_value->operands[0] = args[0];
1319  new_value->operands[1] = args[1];
1320  new_value->operands[2] = args[2];
1321  new_value->operands[3] = args[3];
1322  new_value->next = const_vectors_;
1323  if (const_vectors_ == nullptr) {
1324    estimated_native_code_size_ += 12;  // Amount needed to align to 16 byte boundary.
1325  }
1326  estimated_native_code_size_ += 16;  // Space for one vector.
1327  const_vectors_ = new_value;
1328  return new_value;
1329}
1330
1331}  // namespace art
1332