target_x86.cc revision 0999a6f7c83d10aa59b75f079f0d2fdbac982cf7
1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <string>
18#include <inttypes.h>
19
20#include "codegen_x86.h"
21#include "dex/compiler_internals.h"
22#include "dex/quick/mir_to_lir-inl.h"
23#include "mirror/array.h"
24#include "mirror/string.h"
25#include "x86_lir.h"
26
27namespace art {
28
29static const RegStorage core_regs_arr_32[] = {
30    rs_rAX, rs_rCX, rs_rDX, rs_rBX, rs_rX86_SP_32, rs_rBP, rs_rSI, rs_rDI,
31};
32static const RegStorage core_regs_arr_64[] = {
33    rs_rAX, rs_rCX, rs_rDX, rs_rBX, rs_rX86_SP_64, rs_rBP, rs_rSI, rs_rDI,
34#ifdef TARGET_REX_SUPPORT
35    rs_r8, rs_r9, rs_r10, rs_r11, rs_r12, rs_r13, rs_r14, rs_r15
36#endif
37};
38static const RegStorage core_regs_arr_64q[] = {
39    rs_r0q, rs_r1q, rs_r2q, rs_r3q, rs_rX86_SP_64, rs_r5q, rs_r6q, rs_r7q,
40#ifdef TARGET_REX_SUPPORT
41    rs_r8, rs_r9, rs_r10, rs_r11, rs_r12, rs_r13, rs_r14, rs_r15
42#endif
43};
44static const RegStorage sp_regs_arr_32[] = {
45    rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
46};
47static const RegStorage sp_regs_arr_64[] = {
48    rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
49#ifdef TARGET_REX_SUPPORT
50    rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15
51#endif
52};
53static const RegStorage dp_regs_arr_32[] = {
54    rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
55};
56static const RegStorage dp_regs_arr_64[] = {
57    rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
58#ifdef TARGET_REX_SUPPORT
59    rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15
60#endif
61};
62static const RegStorage reserved_regs_arr_32[] = {rs_rX86_SP_32};
63static const RegStorage reserved_regs_arr_64[] = {rs_rX86_SP_64};
64static const RegStorage reserved_regs_arr_64q[] = {rs_rX86_SP_64};
65static const RegStorage core_temps_arr_32[] = {rs_rAX, rs_rCX, rs_rDX, rs_rBX};
66static const RegStorage core_temps_arr_64[] = {
67    rs_rAX, rs_rCX, rs_rDX, rs_rSI, rs_rDI,
68#ifdef TARGET_REX_SUPPORT
69    rs_r8, rs_r9, rs_r10, rs_r11
70#endif
71};
72static const RegStorage core_temps_arr_64q[] = {
73    rs_r0q, rs_r1q, rs_r2q, rs_r6q, rs_r7q,
74#ifdef TARGET_REX_SUPPORT
75    rs_r8q, rs_r9q, rs_r10q, rs_r11q
76#endif
77};
78static const RegStorage sp_temps_arr_32[] = {
79    rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
80};
81static const RegStorage sp_temps_arr_64[] = {
82    rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
83#ifdef TARGET_REX_SUPPORT
84    rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15
85#endif
86};
87static const RegStorage dp_temps_arr_32[] = {
88    rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
89};
90static const RegStorage dp_temps_arr_64[] = {
91    rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
92#ifdef TARGET_REX_SUPPORT
93    rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15
94#endif
95};
96
97static const std::vector<RegStorage> empty_pool;
98static const std::vector<RegStorage> core_regs_32(core_regs_arr_32,
99    core_regs_arr_32 + sizeof(core_regs_arr_32) / sizeof(core_regs_arr_32[0]));
100static const std::vector<RegStorage> core_regs_64(core_regs_arr_64,
101    core_regs_arr_64 + sizeof(core_regs_arr_64) / sizeof(core_regs_arr_64[0]));
102static const std::vector<RegStorage> core_regs_64q(core_regs_arr_64q,
103    core_regs_arr_64q + sizeof(core_regs_arr_64q) / sizeof(core_regs_arr_64q[0]));
104static const std::vector<RegStorage> sp_regs_32(sp_regs_arr_32,
105    sp_regs_arr_32 + sizeof(sp_regs_arr_32) / sizeof(sp_regs_arr_32[0]));
106static const std::vector<RegStorage> sp_regs_64(sp_regs_arr_64,
107    sp_regs_arr_64 + sizeof(sp_regs_arr_64) / sizeof(sp_regs_arr_64[0]));
108static const std::vector<RegStorage> dp_regs_32(dp_regs_arr_32,
109    dp_regs_arr_32 + sizeof(dp_regs_arr_32) / sizeof(dp_regs_arr_32[0]));
110static const std::vector<RegStorage> dp_regs_64(dp_regs_arr_64,
111    dp_regs_arr_64 + sizeof(dp_regs_arr_64) / sizeof(dp_regs_arr_64[0]));
112static const std::vector<RegStorage> reserved_regs_32(reserved_regs_arr_32,
113    reserved_regs_arr_32 + sizeof(reserved_regs_arr_32) / sizeof(reserved_regs_arr_32[0]));
114static const std::vector<RegStorage> reserved_regs_64(reserved_regs_arr_64,
115    reserved_regs_arr_64 + sizeof(reserved_regs_arr_64) / sizeof(reserved_regs_arr_64[0]));
116static const std::vector<RegStorage> reserved_regs_64q(reserved_regs_arr_64q,
117    reserved_regs_arr_64q + sizeof(reserved_regs_arr_64q) / sizeof(reserved_regs_arr_64q[0]));
118static const std::vector<RegStorage> core_temps_32(core_temps_arr_32,
119    core_temps_arr_32 + sizeof(core_temps_arr_32) / sizeof(core_temps_arr_32[0]));
120static const std::vector<RegStorage> core_temps_64(core_temps_arr_64,
121    core_temps_arr_64 + sizeof(core_temps_arr_64) / sizeof(core_temps_arr_64[0]));
122static const std::vector<RegStorage> core_temps_64q(core_temps_arr_64q,
123    core_temps_arr_64q + sizeof(core_temps_arr_64q) / sizeof(core_temps_arr_64q[0]));
124static const std::vector<RegStorage> sp_temps_32(sp_temps_arr_32,
125    sp_temps_arr_32 + sizeof(sp_temps_arr_32) / sizeof(sp_temps_arr_32[0]));
126static const std::vector<RegStorage> sp_temps_64(sp_temps_arr_64,
127    sp_temps_arr_64 + sizeof(sp_temps_arr_64) / sizeof(sp_temps_arr_64[0]));
128static const std::vector<RegStorage> dp_temps_32(dp_temps_arr_32,
129    dp_temps_arr_32 + sizeof(dp_temps_arr_32) / sizeof(dp_temps_arr_32[0]));
130static const std::vector<RegStorage> dp_temps_64(dp_temps_arr_64,
131    dp_temps_arr_64 + sizeof(dp_temps_arr_64) / sizeof(dp_temps_arr_64[0]));
132
133RegStorage rs_rX86_SP;
134
135X86NativeRegisterPool rX86_ARG0;
136X86NativeRegisterPool rX86_ARG1;
137X86NativeRegisterPool rX86_ARG2;
138X86NativeRegisterPool rX86_ARG3;
139X86NativeRegisterPool rX86_FARG0;
140X86NativeRegisterPool rX86_FARG1;
141X86NativeRegisterPool rX86_FARG2;
142X86NativeRegisterPool rX86_FARG3;
143X86NativeRegisterPool rX86_RET0;
144X86NativeRegisterPool rX86_RET1;
145X86NativeRegisterPool rX86_INVOKE_TGT;
146X86NativeRegisterPool rX86_COUNT;
147
148RegStorage rs_rX86_ARG0;
149RegStorage rs_rX86_ARG1;
150RegStorage rs_rX86_ARG2;
151RegStorage rs_rX86_ARG3;
152RegStorage rs_rX86_FARG0;
153RegStorage rs_rX86_FARG1;
154RegStorage rs_rX86_FARG2;
155RegStorage rs_rX86_FARG3;
156RegStorage rs_rX86_RET0;
157RegStorage rs_rX86_RET1;
158RegStorage rs_rX86_INVOKE_TGT;
159RegStorage rs_rX86_COUNT;
160
161RegLocation X86Mir2Lir::LocCReturn() {
162  return x86_loc_c_return;
163}
164
165RegLocation X86Mir2Lir::LocCReturnWide() {
166  return x86_loc_c_return_wide;
167}
168
169RegLocation X86Mir2Lir::LocCReturnFloat() {
170  return x86_loc_c_return_float;
171}
172
173RegLocation X86Mir2Lir::LocCReturnDouble() {
174  return x86_loc_c_return_double;
175}
176
177// Return a target-dependent special register.
178RegStorage X86Mir2Lir::TargetReg(SpecialTargetRegister reg) {
179  RegStorage res_reg = RegStorage::InvalidReg();
180  switch (reg) {
181    case kSelf: res_reg = RegStorage::InvalidReg(); break;
182    case kSuspend: res_reg =  RegStorage::InvalidReg(); break;
183    case kLr: res_reg =  RegStorage::InvalidReg(); break;
184    case kPc: res_reg =  RegStorage::InvalidReg(); break;
185    case kSp: res_reg =  rs_rX86_SP; break;
186    case kArg0: res_reg = rs_rX86_ARG0; break;
187    case kArg1: res_reg = rs_rX86_ARG1; break;
188    case kArg2: res_reg = rs_rX86_ARG2; break;
189    case kArg3: res_reg = rs_rX86_ARG3; break;
190    case kFArg0: res_reg = rs_rX86_FARG0; break;
191    case kFArg1: res_reg = rs_rX86_FARG1; break;
192    case kFArg2: res_reg = rs_rX86_FARG2; break;
193    case kFArg3: res_reg = rs_rX86_FARG3; break;
194    case kRet0: res_reg = rs_rX86_RET0; break;
195    case kRet1: res_reg = rs_rX86_RET1; break;
196    case kInvokeTgt: res_reg = rs_rX86_INVOKE_TGT; break;
197    case kHiddenArg: res_reg = rs_rAX; break;
198    case kHiddenFpArg: res_reg = rs_fr0; break;
199    case kCount: res_reg = rs_rX86_COUNT; break;
200  }
201  return res_reg;
202}
203
204RegStorage X86Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
205  // For the 32-bit internal ABI, the first 3 arguments are passed in registers.
206  // TODO: This is not 64-bit compliant and depends on new internal ABI.
207  switch (arg_num) {
208    case 0:
209      return rs_rX86_ARG1;
210    case 1:
211      return rs_rX86_ARG2;
212    case 2:
213      return rs_rX86_ARG3;
214    default:
215      return RegStorage::InvalidReg();
216  }
217}
218
219/*
220 * Decode the register id.
221 */
222uint64_t X86Mir2Lir::GetRegMaskCommon(RegStorage reg) {
223  uint64_t seed;
224  int shift;
225  int reg_id;
226
227  reg_id = reg.GetRegNum();
228  /* Double registers in x86 are just a single FP register */
229  seed = 1;
230  /* FP register starts at bit position 16 */
231  shift = reg.IsFloat() ? kX86FPReg0 : 0;
232  /* Expand the double register id into single offset */
233  shift += reg_id;
234  return (seed << shift);
235}
236
237uint64_t X86Mir2Lir::GetPCUseDefEncoding() {
238  /*
239   * FIXME: might make sense to use a virtual resource encoding bit for pc.  Might be
240   * able to clean up some of the x86/Arm_Mips differences
241   */
242  LOG(FATAL) << "Unexpected call to GetPCUseDefEncoding for x86";
243  return 0ULL;
244}
245
246void X86Mir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags) {
247  DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64);
248  DCHECK(!lir->flags.use_def_invalid);
249
250  // X86-specific resource map setup here.
251  if (flags & REG_USE_SP) {
252    lir->u.m.use_mask |= ENCODE_X86_REG_SP;
253  }
254
255  if (flags & REG_DEF_SP) {
256    lir->u.m.def_mask |= ENCODE_X86_REG_SP;
257  }
258
259  if (flags & REG_DEFA) {
260    SetupRegMask(&lir->u.m.def_mask, rs_rAX.GetReg());
261  }
262
263  if (flags & REG_DEFD) {
264    SetupRegMask(&lir->u.m.def_mask, rs_rDX.GetReg());
265  }
266  if (flags & REG_USEA) {
267    SetupRegMask(&lir->u.m.use_mask, rs_rAX.GetReg());
268  }
269
270  if (flags & REG_USEC) {
271    SetupRegMask(&lir->u.m.use_mask, rs_rCX.GetReg());
272  }
273
274  if (flags & REG_USED) {
275    SetupRegMask(&lir->u.m.use_mask, rs_rDX.GetReg());
276  }
277
278  if (flags & REG_USEB) {
279    SetupRegMask(&lir->u.m.use_mask, rs_rBX.GetReg());
280  }
281
282  // Fixup hard to describe instruction: Uses rAX, rCX, rDI; sets rDI.
283  if (lir->opcode == kX86RepneScasw) {
284    SetupRegMask(&lir->u.m.use_mask, rs_rAX.GetReg());
285    SetupRegMask(&lir->u.m.use_mask, rs_rCX.GetReg());
286    SetupRegMask(&lir->u.m.use_mask, rs_rDI.GetReg());
287    SetupRegMask(&lir->u.m.def_mask, rs_rDI.GetReg());
288  }
289
290  if (flags & USE_FP_STACK) {
291    lir->u.m.use_mask |= ENCODE_X86_FP_STACK;
292    lir->u.m.def_mask |= ENCODE_X86_FP_STACK;
293  }
294}
295
296/* For dumping instructions */
297static const char* x86RegName[] = {
298  "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi",
299  "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
300};
301
302static const char* x86CondName[] = {
303  "O",
304  "NO",
305  "B/NAE/C",
306  "NB/AE/NC",
307  "Z/EQ",
308  "NZ/NE",
309  "BE/NA",
310  "NBE/A",
311  "S",
312  "NS",
313  "P/PE",
314  "NP/PO",
315  "L/NGE",
316  "NL/GE",
317  "LE/NG",
318  "NLE/G"
319};
320
321/*
322 * Interpret a format string and build a string no longer than size
323 * See format key in Assemble.cc.
324 */
325std::string X86Mir2Lir::BuildInsnString(const char *fmt, LIR *lir, unsigned char* base_addr) {
326  std::string buf;
327  size_t i = 0;
328  size_t fmt_len = strlen(fmt);
329  while (i < fmt_len) {
330    if (fmt[i] != '!') {
331      buf += fmt[i];
332      i++;
333    } else {
334      i++;
335      DCHECK_LT(i, fmt_len);
336      char operand_number_ch = fmt[i];
337      i++;
338      if (operand_number_ch == '!') {
339        buf += "!";
340      } else {
341        int operand_number = operand_number_ch - '0';
342        DCHECK_LT(operand_number, 6);  // Expect upto 6 LIR operands.
343        DCHECK_LT(i, fmt_len);
344        int operand = lir->operands[operand_number];
345        switch (fmt[i]) {
346          case 'c':
347            DCHECK_LT(static_cast<size_t>(operand), sizeof(x86CondName));
348            buf += x86CondName[operand];
349            break;
350          case 'd':
351            buf += StringPrintf("%d", operand);
352            break;
353          case 'p': {
354            EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(operand));
355            buf += StringPrintf("0x%08x", tab_rec->offset);
356            break;
357          }
358          case 'r':
359            if (RegStorage::IsFloat(operand)) {
360              int fp_reg = RegStorage::RegNum(operand);
361              buf += StringPrintf("xmm%d", fp_reg);
362            } else {
363              int reg_num = RegStorage::RegNum(operand);
364              DCHECK_LT(static_cast<size_t>(reg_num), sizeof(x86RegName));
365              buf += x86RegName[reg_num];
366            }
367            break;
368          case 't':
369            buf += StringPrintf("0x%08" PRIxPTR " (L%p)",
370                                reinterpret_cast<uintptr_t>(base_addr) + lir->offset + operand,
371                                lir->target);
372            break;
373          default:
374            buf += StringPrintf("DecodeError '%c'", fmt[i]);
375            break;
376        }
377        i++;
378      }
379    }
380  }
381  return buf;
382}
383
384void X86Mir2Lir::DumpResourceMask(LIR *x86LIR, uint64_t mask, const char *prefix) {
385  char buf[256];
386  buf[0] = 0;
387
388  if (mask == ENCODE_ALL) {
389    strcpy(buf, "all");
390  } else {
391    char num[8];
392    int i;
393
394    for (i = 0; i < kX86RegEnd; i++) {
395      if (mask & (1ULL << i)) {
396        snprintf(num, arraysize(num), "%d ", i);
397        strcat(buf, num);
398      }
399    }
400
401    if (mask & ENCODE_CCODE) {
402      strcat(buf, "cc ");
403    }
404    /* Memory bits */
405    if (x86LIR && (mask & ENCODE_DALVIK_REG)) {
406      snprintf(buf + strlen(buf), arraysize(buf) - strlen(buf), "dr%d%s",
407               DECODE_ALIAS_INFO_REG(x86LIR->flags.alias_info),
408               (DECODE_ALIAS_INFO_WIDE(x86LIR->flags.alias_info)) ? "(+1)" : "");
409    }
410    if (mask & ENCODE_LITERAL) {
411      strcat(buf, "lit ");
412    }
413
414    if (mask & ENCODE_HEAP_REF) {
415      strcat(buf, "heap ");
416    }
417    if (mask & ENCODE_MUST_NOT_ALIAS) {
418      strcat(buf, "noalias ");
419    }
420  }
421  if (buf[0]) {
422    LOG(INFO) << prefix << ": " <<  buf;
423  }
424}
425
426void X86Mir2Lir::AdjustSpillMask() {
427  // Adjustment for LR spilling, x86 has no LR so nothing to do here
428  core_spill_mask_ |= (1 << rs_rRET.GetRegNum());
429  num_core_spills_++;
430}
431
432/*
433 * Mark a callee-save fp register as promoted.  Note that
434 * vpush/vpop uses contiguous register lists so we must
435 * include any holes in the mask.  Associate holes with
436 * Dalvik register INVALID_VREG (0xFFFFU).
437 */
438void X86Mir2Lir::MarkPreservedSingle(int v_reg, RegStorage reg) {
439  UNIMPLEMENTED(FATAL) << "MarkPreservedSingle";
440}
441
442void X86Mir2Lir::MarkPreservedDouble(int v_reg, RegStorage reg) {
443  UNIMPLEMENTED(FATAL) << "MarkPreservedDouble";
444}
445
446RegStorage X86Mir2Lir::AllocateByteRegister() {
447  return AllocTypedTemp(false, kCoreReg);
448}
449
450/* Clobber all regs that might be used by an external C call */
451void X86Mir2Lir::ClobberCallerSave() {
452  Clobber(rs_rAX);
453  Clobber(rs_rCX);
454  Clobber(rs_rDX);
455  Clobber(rs_rBX);
456}
457
458RegLocation X86Mir2Lir::GetReturnWideAlt() {
459  RegLocation res = LocCReturnWide();
460  DCHECK(res.reg.GetLowReg() == rs_rAX.GetReg());
461  DCHECK(res.reg.GetHighReg() == rs_rDX.GetReg());
462  Clobber(rs_rAX);
463  Clobber(rs_rDX);
464  MarkInUse(rs_rAX);
465  MarkInUse(rs_rDX);
466  MarkWide(res.reg);
467  return res;
468}
469
470RegLocation X86Mir2Lir::GetReturnAlt() {
471  RegLocation res = LocCReturn();
472  res.reg.SetReg(rs_rDX.GetReg());
473  Clobber(rs_rDX);
474  MarkInUse(rs_rDX);
475  return res;
476}
477
478/* To be used when explicitly managing register use */
479void X86Mir2Lir::LockCallTemps() {
480  LockTemp(rs_rX86_ARG0);
481  LockTemp(rs_rX86_ARG1);
482  LockTemp(rs_rX86_ARG2);
483  LockTemp(rs_rX86_ARG3);
484}
485
486/* To be used when explicitly managing register use */
487void X86Mir2Lir::FreeCallTemps() {
488  FreeTemp(rs_rX86_ARG0);
489  FreeTemp(rs_rX86_ARG1);
490  FreeTemp(rs_rX86_ARG2);
491  FreeTemp(rs_rX86_ARG3);
492}
493
494bool X86Mir2Lir::ProvidesFullMemoryBarrier(X86OpCode opcode) {
495    switch (opcode) {
496      case kX86LockCmpxchgMR:
497      case kX86LockCmpxchgAR:
498      case kX86LockCmpxchg8bM:
499      case kX86LockCmpxchg8bA:
500      case kX86XchgMR:
501      case kX86Mfence:
502        // Atomic memory instructions provide full barrier.
503        return true;
504      default:
505        break;
506    }
507
508    // Conservative if cannot prove it provides full barrier.
509    return false;
510}
511
512bool X86Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
513#if ANDROID_SMP != 0
514  // Start off with using the last LIR as the barrier. If it is not enough, then we will update it.
515  LIR* mem_barrier = last_lir_insn_;
516
517  bool ret = false;
518  /*
519   * According to the JSR-133 Cookbook, for x86 only StoreLoad barriers need memory fence. All other barriers
520   * (LoadLoad, LoadStore, StoreStore) are nops due to the x86 memory model. For those cases, all we need
521   * to ensure is that there is a scheduling barrier in place.
522   */
523  if (barrier_kind == kStoreLoad) {
524    // If no LIR exists already that can be used a barrier, then generate an mfence.
525    if (mem_barrier == nullptr) {
526      mem_barrier = NewLIR0(kX86Mfence);
527      ret = true;
528    }
529
530    // If last instruction does not provide full barrier, then insert an mfence.
531    if (ProvidesFullMemoryBarrier(static_cast<X86OpCode>(mem_barrier->opcode)) == false) {
532      mem_barrier = NewLIR0(kX86Mfence);
533      ret = true;
534    }
535  }
536
537  // Now ensure that a scheduling barrier is in place.
538  if (mem_barrier == nullptr) {
539    GenBarrier();
540  } else {
541    // Mark as a scheduling barrier.
542    DCHECK(!mem_barrier->flags.use_def_invalid);
543    mem_barrier->u.m.def_mask = ENCODE_ALL;
544  }
545  return ret;
546#else
547  return false;
548#endif
549}
550
551void X86Mir2Lir::CompilerInitializeRegAlloc() {
552  if (Gen64Bit()) {
553    reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs_64, empty_pool/*core_regs_64q*/, sp_regs_64,
554                                          dp_regs_64, reserved_regs_64, empty_pool/*reserved_regs_64q*/,
555                                          core_temps_64, empty_pool/*core_temps_64q*/, sp_temps_64, dp_temps_64);
556  } else {
557    reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs_32, empty_pool, sp_regs_32,
558                                          dp_regs_32, reserved_regs_32, empty_pool,
559                                          core_temps_32, empty_pool, sp_temps_32, dp_temps_32);
560  }
561
562  // Target-specific adjustments.
563
564  // Alias single precision xmm to double xmms.
565  // TODO: as needed, add larger vector sizes - alias all to the largest.
566  GrowableArray<RegisterInfo*>::Iterator it(&reg_pool_->sp_regs_);
567  for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
568    int sp_reg_num = info->GetReg().GetRegNum();
569    RegStorage dp_reg = RegStorage::Solo64(RegStorage::kFloatingPoint | sp_reg_num);
570    RegisterInfo* dp_reg_info = GetRegInfo(dp_reg);
571    // 64-bit xmm vector register's master storage should refer to itself.
572    DCHECK_EQ(dp_reg_info, dp_reg_info->Master());
573    // Redirect 32-bit vector's master storage to 64-bit vector.
574    info->SetMaster(dp_reg_info);
575  }
576
577  // Don't start allocating temps at r0/s0/d0 or you may clobber return regs in early-exit methods.
578  // TODO: adjust for x86/hard float calling convention.
579  reg_pool_->next_core_reg_ = 2;
580  reg_pool_->next_sp_reg_ = 2;
581  reg_pool_->next_dp_reg_ = 1;
582}
583
584void X86Mir2Lir::SpillCoreRegs() {
585  if (num_core_spills_ == 0) {
586    return;
587  }
588  // Spill mask not including fake return address register
589  uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum());
590  int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_);
591  for (int reg = 0; mask; mask >>= 1, reg++) {
592    if (mask & 0x1) {
593      StoreWordDisp(rs_rX86_SP, offset, RegStorage::Solo32(reg));
594      offset += GetInstructionSetPointerSize(cu_->instruction_set);
595    }
596  }
597}
598
599void X86Mir2Lir::UnSpillCoreRegs() {
600  if (num_core_spills_ == 0) {
601    return;
602  }
603  // Spill mask not including fake return address register
604  uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum());
605  int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_);
606  for (int reg = 0; mask; mask >>= 1, reg++) {
607    if (mask & 0x1) {
608      LoadWordDisp(rs_rX86_SP, offset, RegStorage::Solo32(reg));
609      offset += GetInstructionSetPointerSize(cu_->instruction_set);
610    }
611  }
612}
613
614bool X86Mir2Lir::IsUnconditionalBranch(LIR* lir) {
615  return (lir->opcode == kX86Jmp8 || lir->opcode == kX86Jmp32);
616}
617
618bool X86Mir2Lir::SupportsVolatileLoadStore(OpSize size) {
619  return true;
620}
621
622RegisterClass X86Mir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volatile) {
623  if (UNLIKELY(is_volatile)) {
624    // On x86, atomic 64-bit load/store requires an fp register.
625    // Smaller aligned load/store is atomic for both core and fp registers.
626    if (size == k64 || size == kDouble) {
627      return kFPReg;
628    }
629  }
630  return RegClassBySize(size);
631}
632
633X86Mir2Lir::X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena, bool gen64bit)
634    : Mir2Lir(cu, mir_graph, arena),
635      base_of_code_(nullptr), store_method_addr_(false), store_method_addr_used_(false),
636      method_address_insns_(arena, 100, kGrowableArrayMisc),
637      class_type_address_insns_(arena, 100, kGrowableArrayMisc),
638      call_method_insns_(arena, 100, kGrowableArrayMisc),
639      stack_decrement_(nullptr), stack_increment_(nullptr), gen64bit_(gen64bit),
640      const_vectors_(nullptr) {
641  store_method_addr_used_ = false;
642  if (kIsDebugBuild) {
643    for (int i = 0; i < kX86Last; i++) {
644      if (X86Mir2Lir::EncodingMap[i].opcode != i) {
645        LOG(FATAL) << "Encoding order for " << X86Mir2Lir::EncodingMap[i].name
646                   << " is wrong: expecting " << i << ", seeing "
647                   << static_cast<int>(X86Mir2Lir::EncodingMap[i].opcode);
648      }
649    }
650  }
651  if (Gen64Bit()) {
652    rs_rX86_SP = rs_rX86_SP_64;
653
654    rs_rX86_ARG0 = rs_rDI;
655    rs_rX86_ARG1 = rs_rSI;
656    rs_rX86_ARG2 = rs_rDX;
657    rs_rX86_ARG3 = rs_rCX;
658    rX86_ARG0 = rDI;
659    rX86_ARG1 = rSI;
660    rX86_ARG2 = rDX;
661    rX86_ARG3 = rCX;
662    // TODO: ARG4(r8), ARG5(r9), floating point args.
663  } else {
664    rs_rX86_SP = rs_rX86_SP_32;
665
666    rs_rX86_ARG0 = rs_rAX;
667    rs_rX86_ARG1 = rs_rCX;
668    rs_rX86_ARG2 = rs_rDX;
669    rs_rX86_ARG3 = rs_rBX;
670    rX86_ARG0 = rAX;
671    rX86_ARG1 = rCX;
672    rX86_ARG2 = rDX;
673    rX86_ARG3 = rBX;
674  }
675  rs_rX86_FARG0 = rs_rAX;
676  rs_rX86_FARG1 = rs_rCX;
677  rs_rX86_FARG2 = rs_rDX;
678  rs_rX86_FARG3 = rs_rBX;
679  rs_rX86_RET0 = rs_rAX;
680  rs_rX86_RET1 = rs_rDX;
681  rs_rX86_INVOKE_TGT = rs_rAX;
682  rs_rX86_COUNT = rs_rCX;
683  rX86_FARG0 = rAX;
684  rX86_FARG1 = rCX;
685  rX86_FARG2 = rDX;
686  rX86_FARG3 = rBX;
687  rX86_RET0 = rAX;
688  rX86_RET1 = rDX;
689  rX86_INVOKE_TGT = rAX;
690  rX86_COUNT = rCX;
691}
692
693Mir2Lir* X86CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
694                          ArenaAllocator* const arena) {
695  return new X86Mir2Lir(cu, mir_graph, arena, false);
696}
697
698Mir2Lir* X86_64CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
699                          ArenaAllocator* const arena) {
700  return new X86Mir2Lir(cu, mir_graph, arena, true);
701}
702
703// Not used in x86
704RegStorage X86Mir2Lir::LoadHelper(ThreadOffset<4> offset) {
705  LOG(FATAL) << "Unexpected use of LoadHelper in x86";
706  return RegStorage::InvalidReg();
707}
708
709// Not used in x86
710RegStorage X86Mir2Lir::LoadHelper(ThreadOffset<8> offset) {
711  LOG(FATAL) << "Unexpected use of LoadHelper in x86";
712  return RegStorage::InvalidReg();
713}
714
715LIR* X86Mir2Lir::CheckSuspendUsingLoad() {
716  LOG(FATAL) << "Unexpected use of CheckSuspendUsingLoad in x86";
717  return nullptr;
718}
719
720uint64_t X86Mir2Lir::GetTargetInstFlags(int opcode) {
721  DCHECK(!IsPseudoLirOp(opcode));
722  return X86Mir2Lir::EncodingMap[opcode].flags;
723}
724
725const char* X86Mir2Lir::GetTargetInstName(int opcode) {
726  DCHECK(!IsPseudoLirOp(opcode));
727  return X86Mir2Lir::EncodingMap[opcode].name;
728}
729
730const char* X86Mir2Lir::GetTargetInstFmt(int opcode) {
731  DCHECK(!IsPseudoLirOp(opcode));
732  return X86Mir2Lir::EncodingMap[opcode].fmt;
733}
734
735void X86Mir2Lir::GenConstWide(RegLocation rl_dest, int64_t value) {
736  // Can we do this directly to memory?
737  rl_dest = UpdateLocWide(rl_dest);
738  if ((rl_dest.location == kLocDalvikFrame) ||
739      (rl_dest.location == kLocCompilerTemp)) {
740    int32_t val_lo = Low32Bits(value);
741    int32_t val_hi = High32Bits(value);
742    int r_base = TargetReg(kSp).GetReg();
743    int displacement = SRegOffset(rl_dest.s_reg_low);
744
745    LIR * store = NewLIR3(kX86Mov32MI, r_base, displacement + LOWORD_OFFSET, val_lo);
746    AnnotateDalvikRegAccess(store, (displacement + LOWORD_OFFSET) >> 2,
747                              false /* is_load */, true /* is64bit */);
748    store = NewLIR3(kX86Mov32MI, r_base, displacement + HIWORD_OFFSET, val_hi);
749    AnnotateDalvikRegAccess(store, (displacement + HIWORD_OFFSET) >> 2,
750                              false /* is_load */, true /* is64bit */);
751    return;
752  }
753
754  // Just use the standard code to do the generation.
755  Mir2Lir::GenConstWide(rl_dest, value);
756}
757
758// TODO: Merge with existing RegLocation dumper in vreg_analysis.cc
759void X86Mir2Lir::DumpRegLocation(RegLocation loc) {
760  LOG(INFO)  << "location: " << loc.location << ','
761             << (loc.wide ? " w" : "  ")
762             << (loc.defined ? " D" : "  ")
763             << (loc.is_const ? " c" : "  ")
764             << (loc.fp ? " F" : "  ")
765             << (loc.core ? " C" : "  ")
766             << (loc.ref ? " r" : "  ")
767             << (loc.high_word ? " h" : "  ")
768             << (loc.home ? " H" : "  ")
769             << ", low: " << static_cast<int>(loc.reg.GetLowReg())
770             << ", high: " << static_cast<int>(loc.reg.GetHighReg())
771             << ", s_reg: " << loc.s_reg_low
772             << ", orig: " << loc.orig_sreg;
773}
774
775void X86Mir2Lir::Materialize() {
776  // A good place to put the analysis before starting.
777  AnalyzeMIR();
778
779  // Now continue with regular code generation.
780  Mir2Lir::Materialize();
781}
782
783void X86Mir2Lir::LoadMethodAddress(const MethodReference& target_method, InvokeType type,
784                                   SpecialTargetRegister symbolic_reg) {
785  /*
786   * For x86, just generate a 32 bit move immediate instruction, that will be filled
787   * in at 'link time'.  For now, put a unique value based on target to ensure that
788   * code deduplication works.
789   */
790  int target_method_idx = target_method.dex_method_index;
791  const DexFile* target_dex_file = target_method.dex_file;
792  const DexFile::MethodId& target_method_id = target_dex_file->GetMethodId(target_method_idx);
793  uintptr_t target_method_id_ptr = reinterpret_cast<uintptr_t>(&target_method_id);
794
795  // Generate the move instruction with the unique pointer and save index, dex_file, and type.
796  LIR *move = RawLIR(current_dalvik_offset_, kX86Mov32RI, TargetReg(symbolic_reg).GetReg(),
797                     static_cast<int>(target_method_id_ptr), target_method_idx,
798                     WrapPointer(const_cast<DexFile*>(target_dex_file)), type);
799  AppendLIR(move);
800  method_address_insns_.Insert(move);
801}
802
803void X86Mir2Lir::LoadClassType(uint32_t type_idx, SpecialTargetRegister symbolic_reg) {
804  /*
805   * For x86, just generate a 32 bit move immediate instruction, that will be filled
806   * in at 'link time'.  For now, put a unique value based on target to ensure that
807   * code deduplication works.
808   */
809  const DexFile::TypeId& id = cu_->dex_file->GetTypeId(type_idx);
810  uintptr_t ptr = reinterpret_cast<uintptr_t>(&id);
811
812  // Generate the move instruction with the unique pointer and save index and type.
813  LIR *move = RawLIR(current_dalvik_offset_, kX86Mov32RI, TargetReg(symbolic_reg).GetReg(),
814                     static_cast<int>(ptr), type_idx);
815  AppendLIR(move);
816  class_type_address_insns_.Insert(move);
817}
818
819LIR *X86Mir2Lir::CallWithLinkerFixup(const MethodReference& target_method, InvokeType type) {
820  /*
821   * For x86, just generate a 32 bit call relative instruction, that will be filled
822   * in at 'link time'.  For now, put a unique value based on target to ensure that
823   * code deduplication works.
824   */
825  int target_method_idx = target_method.dex_method_index;
826  const DexFile* target_dex_file = target_method.dex_file;
827  const DexFile::MethodId& target_method_id = target_dex_file->GetMethodId(target_method_idx);
828  uintptr_t target_method_id_ptr = reinterpret_cast<uintptr_t>(&target_method_id);
829
830  // Generate the call instruction with the unique pointer and save index, dex_file, and type.
831  LIR *call = RawLIR(current_dalvik_offset_, kX86CallI, static_cast<int>(target_method_id_ptr),
832                     target_method_idx, WrapPointer(const_cast<DexFile*>(target_dex_file)), type);
833  AppendLIR(call);
834  call_method_insns_.Insert(call);
835  return call;
836}
837
838/*
839 * @brief Enter a 32 bit quantity into a buffer
840 * @param buf buffer.
841 * @param data Data value.
842 */
843
844static void PushWord(std::vector<uint8_t>&buf, int32_t data) {
845  buf.push_back(data & 0xff);
846  buf.push_back((data >> 8) & 0xff);
847  buf.push_back((data >> 16) & 0xff);
848  buf.push_back((data >> 24) & 0xff);
849}
850
851void X86Mir2Lir::InstallLiteralPools() {
852  // These are handled differently for x86.
853  DCHECK(code_literal_list_ == nullptr);
854  DCHECK(method_literal_list_ == nullptr);
855  DCHECK(class_literal_list_ == nullptr);
856
857  // Align to 16 byte boundary.  We have implicit knowledge that the start of the method is
858  // on a 4 byte boundary.   How can I check this if it changes (other than aligned loads
859  // will fail at runtime)?
860  if (const_vectors_ != nullptr) {
861    int align_size = (16-4) - (code_buffer_.size() & 0xF);
862    if (align_size < 0) {
863      align_size += 16;
864    }
865
866    while (align_size > 0) {
867      code_buffer_.push_back(0);
868      align_size--;
869    }
870    for (LIR *p = const_vectors_; p != nullptr; p = p->next) {
871      PushWord(code_buffer_, p->operands[0]);
872      PushWord(code_buffer_, p->operands[1]);
873      PushWord(code_buffer_, p->operands[2]);
874      PushWord(code_buffer_, p->operands[3]);
875    }
876  }
877
878  // Handle the fixups for methods.
879  for (uint32_t i = 0; i < method_address_insns_.Size(); i++) {
880      LIR* p = method_address_insns_.Get(i);
881      DCHECK_EQ(p->opcode, kX86Mov32RI);
882      uint32_t target_method_idx = p->operands[2];
883      const DexFile* target_dex_file =
884          reinterpret_cast<const DexFile*>(UnwrapPointer(p->operands[3]));
885
886      // The offset to patch is the last 4 bytes of the instruction.
887      int patch_offset = p->offset + p->flags.size - 4;
888      cu_->compiler_driver->AddMethodPatch(cu_->dex_file, cu_->class_def_idx,
889                                           cu_->method_idx, cu_->invoke_type,
890                                           target_method_idx, target_dex_file,
891                                           static_cast<InvokeType>(p->operands[4]),
892                                           patch_offset);
893  }
894
895  // Handle the fixups for class types.
896  for (uint32_t i = 0; i < class_type_address_insns_.Size(); i++) {
897      LIR* p = class_type_address_insns_.Get(i);
898      DCHECK_EQ(p->opcode, kX86Mov32RI);
899      uint32_t target_method_idx = p->operands[2];
900
901      // The offset to patch is the last 4 bytes of the instruction.
902      int patch_offset = p->offset + p->flags.size - 4;
903      cu_->compiler_driver->AddClassPatch(cu_->dex_file, cu_->class_def_idx,
904                                          cu_->method_idx, target_method_idx, patch_offset);
905  }
906
907  // And now the PC-relative calls to methods.
908  for (uint32_t i = 0; i < call_method_insns_.Size(); i++) {
909      LIR* p = call_method_insns_.Get(i);
910      DCHECK_EQ(p->opcode, kX86CallI);
911      uint32_t target_method_idx = p->operands[1];
912      const DexFile* target_dex_file =
913          reinterpret_cast<const DexFile*>(UnwrapPointer(p->operands[2]));
914
915      // The offset to patch is the last 4 bytes of the instruction.
916      int patch_offset = p->offset + p->flags.size - 4;
917      cu_->compiler_driver->AddRelativeCodePatch(cu_->dex_file, cu_->class_def_idx,
918                                                 cu_->method_idx, cu_->invoke_type,
919                                                 target_method_idx, target_dex_file,
920                                                 static_cast<InvokeType>(p->operands[3]),
921                                                 patch_offset, -4 /* offset */);
922  }
923
924  // And do the normal processing.
925  Mir2Lir::InstallLiteralPools();
926}
927
928/*
929 * Fast string.index_of(I) & (II).  Inline check for simple case of char <= 0xffff,
930 * otherwise bails to standard library code.
931 */
932bool X86Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) {
933  ClobberCallerSave();
934  LockCallTemps();  // Using fixed registers
935
936  // EAX: 16 bit character being searched.
937  // ECX: count: number of words to be searched.
938  // EDI: String being searched.
939  // EDX: temporary during execution.
940  // EBX: temporary during execution.
941
942  RegLocation rl_obj = info->args[0];
943  RegLocation rl_char = info->args[1];
944  RegLocation rl_start;  // Note: only present in III flavor or IndexOf.
945
946  uint32_t char_value =
947    rl_char.is_const ? mir_graph_->ConstantValue(rl_char.orig_sreg) : 0;
948
949  if (char_value > 0xFFFF) {
950    // We have to punt to the real String.indexOf.
951    return false;
952  }
953
954  // Okay, we are commited to inlining this.
955  RegLocation rl_return = GetReturn(false);
956  RegLocation rl_dest = InlineTarget(info);
957
958  // Is the string non-NULL?
959  LoadValueDirectFixed(rl_obj, rs_rDX);
960  GenNullCheck(rs_rDX, info->opt_flags);
961  info->opt_flags |= MIR_IGNORE_NULL_CHECK;  // Record that we've null checked.
962
963  // Does the character fit in 16 bits?
964  LIR* slowpath_branch = nullptr;
965  if (rl_char.is_const) {
966    // We need the value in EAX.
967    LoadConstantNoClobber(rs_rAX, char_value);
968  } else {
969    // Character is not a constant; compare at runtime.
970    LoadValueDirectFixed(rl_char, rs_rAX);
971    slowpath_branch = OpCmpImmBranch(kCondGt, rs_rAX, 0xFFFF, nullptr);
972  }
973
974  // From here down, we know that we are looking for a char that fits in 16 bits.
975  // Location of reference to data array within the String object.
976  int value_offset = mirror::String::ValueOffset().Int32Value();
977  // Location of count within the String object.
978  int count_offset = mirror::String::CountOffset().Int32Value();
979  // Starting offset within data array.
980  int offset_offset = mirror::String::OffsetOffset().Int32Value();
981  // Start of char data with array_.
982  int data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value();
983
984  // Character is in EAX.
985  // Object pointer is in EDX.
986
987  // We need to preserve EDI, but have no spare registers, so push it on the stack.
988  // We have to remember that all stack addresses after this are offset by sizeof(EDI).
989  NewLIR1(kX86Push32R, rs_rDI.GetReg());
990
991  // Compute the number of words to search in to rCX.
992  Load32Disp(rs_rDX, count_offset, rs_rCX);
993  LIR *length_compare = nullptr;
994  int start_value = 0;
995  bool is_index_on_stack = false;
996  if (zero_based) {
997    // We have to handle an empty string.  Use special instruction JECXZ.
998    length_compare = NewLIR0(kX86Jecxz8);
999  } else {
1000    rl_start = info->args[2];
1001    // We have to offset by the start index.
1002    if (rl_start.is_const) {
1003      start_value = mir_graph_->ConstantValue(rl_start.orig_sreg);
1004      start_value = std::max(start_value, 0);
1005
1006      // Is the start > count?
1007      length_compare = OpCmpImmBranch(kCondLe, rs_rCX, start_value, nullptr);
1008
1009      if (start_value != 0) {
1010        OpRegImm(kOpSub, rs_rCX, start_value);
1011      }
1012    } else {
1013      // Runtime start index.
1014      rl_start = UpdateLocTyped(rl_start, kCoreReg);
1015      if (rl_start.location == kLocPhysReg) {
1016        // Handle "start index < 0" case.
1017        OpRegReg(kOpXor, rs_rBX, rs_rBX);
1018        OpRegReg(kOpCmp, rl_start.reg, rs_rBX);
1019        OpCondRegReg(kOpCmov, kCondLt, rl_start.reg, rs_rBX);
1020
1021        // The length of the string should be greater than the start index.
1022        length_compare = OpCmpBranch(kCondLe, rs_rCX, rl_start.reg, nullptr);
1023        OpRegReg(kOpSub, rs_rCX, rl_start.reg);
1024        if (rl_start.reg == rs_rDI) {
1025          // The special case. We will use EDI further, so lets put start index to stack.
1026          NewLIR1(kX86Push32R, rs_rDI.GetReg());
1027          is_index_on_stack = true;
1028        }
1029      } else {
1030        // Load the start index from stack, remembering that we pushed EDI.
1031        int displacement = SRegOffset(rl_start.s_reg_low) + sizeof(uint32_t);
1032        Load32Disp(rs_rX86_SP, displacement, rs_rBX);
1033        OpRegReg(kOpXor, rs_rDI, rs_rDI);
1034        OpRegReg(kOpCmp, rs_rBX, rs_rDI);
1035        OpCondRegReg(kOpCmov, kCondLt, rs_rBX, rs_rDI);
1036
1037        length_compare = OpCmpBranch(kCondLe, rs_rCX, rs_rBX, nullptr);
1038        OpRegReg(kOpSub, rs_rCX, rs_rBX);
1039        // Put the start index to stack.
1040        NewLIR1(kX86Push32R, rs_rBX.GetReg());
1041        is_index_on_stack = true;
1042      }
1043    }
1044  }
1045  DCHECK(length_compare != nullptr);
1046
1047  // ECX now contains the count in words to be searched.
1048
1049  // Load the address of the string into EBX.
1050  // The string starts at VALUE(String) + 2 * OFFSET(String) + DATA_OFFSET.
1051  Load32Disp(rs_rDX, value_offset, rs_rDI);
1052  Load32Disp(rs_rDX, offset_offset, rs_rBX);
1053  OpLea(rs_rBX, rs_rDI, rs_rBX, 1, data_offset);
1054
1055  // Now compute into EDI where the search will start.
1056  if (zero_based || rl_start.is_const) {
1057    if (start_value == 0) {
1058      OpRegCopy(rs_rDI, rs_rBX);
1059    } else {
1060      NewLIR3(kX86Lea32RM, rs_rDI.GetReg(), rs_rBX.GetReg(), 2 * start_value);
1061    }
1062  } else {
1063    if (is_index_on_stack == true) {
1064      // Load the start index from stack.
1065      NewLIR1(kX86Pop32R, rs_rDX.GetReg());
1066      OpLea(rs_rDI, rs_rBX, rs_rDX, 1, 0);
1067    } else {
1068      OpLea(rs_rDI, rs_rBX, rl_start.reg, 1, 0);
1069    }
1070  }
1071
1072  // EDI now contains the start of the string to be searched.
1073  // We are all prepared to do the search for the character.
1074  NewLIR0(kX86RepneScasw);
1075
1076  // Did we find a match?
1077  LIR* failed_branch = OpCondBranch(kCondNe, nullptr);
1078
1079  // yes, we matched.  Compute the index of the result.
1080  // index = ((curr_ptr - orig_ptr) / 2) - 1.
1081  OpRegReg(kOpSub, rs_rDI, rs_rBX);
1082  OpRegImm(kOpAsr, rs_rDI, 1);
1083  NewLIR3(kX86Lea32RM, rl_return.reg.GetReg(), rs_rDI.GetReg(), -1);
1084  LIR *all_done = NewLIR1(kX86Jmp8, 0);
1085
1086  // Failed to match; return -1.
1087  LIR *not_found = NewLIR0(kPseudoTargetLabel);
1088  length_compare->target = not_found;
1089  failed_branch->target = not_found;
1090  LoadConstantNoClobber(rl_return.reg, -1);
1091
1092  // And join up at the end.
1093  all_done->target = NewLIR0(kPseudoTargetLabel);
1094  // Restore EDI from the stack.
1095  NewLIR1(kX86Pop32R, rs_rDI.GetReg());
1096
1097  // Out of line code returns here.
1098  if (slowpath_branch != nullptr) {
1099    LIR *return_point = NewLIR0(kPseudoTargetLabel);
1100    AddIntrinsicSlowPath(info, slowpath_branch, return_point);
1101  }
1102
1103  StoreValue(rl_dest, rl_return);
1104  return true;
1105}
1106
1107/*
1108 * @brief Enter an 'advance LOC' into the FDE buffer
1109 * @param buf FDE buffer.
1110 * @param increment Amount by which to increase the current location.
1111 */
1112static void AdvanceLoc(std::vector<uint8_t>&buf, uint32_t increment) {
1113  if (increment < 64) {
1114    // Encoding in opcode.
1115    buf.push_back(0x1 << 6 | increment);
1116  } else if (increment < 256) {
1117    // Single byte delta.
1118    buf.push_back(0x02);
1119    buf.push_back(increment);
1120  } else if (increment < 256 * 256) {
1121    // Two byte delta.
1122    buf.push_back(0x03);
1123    buf.push_back(increment & 0xff);
1124    buf.push_back((increment >> 8) & 0xff);
1125  } else {
1126    // Four byte delta.
1127    buf.push_back(0x04);
1128    PushWord(buf, increment);
1129  }
1130}
1131
1132
1133std::vector<uint8_t>* X86CFIInitialization() {
1134  return X86Mir2Lir::ReturnCommonCallFrameInformation();
1135}
1136
1137std::vector<uint8_t>* X86Mir2Lir::ReturnCommonCallFrameInformation() {
1138  std::vector<uint8_t>*cfi_info = new std::vector<uint8_t>;
1139
1140  // Length of the CIE (except for this field).
1141  PushWord(*cfi_info, 16);
1142
1143  // CIE id.
1144  PushWord(*cfi_info, 0xFFFFFFFFU);
1145
1146  // Version: 3.
1147  cfi_info->push_back(0x03);
1148
1149  // Augmentation: empty string.
1150  cfi_info->push_back(0x0);
1151
1152  // Code alignment: 1.
1153  cfi_info->push_back(0x01);
1154
1155  // Data alignment: -4.
1156  cfi_info->push_back(0x7C);
1157
1158  // Return address register (R8).
1159  cfi_info->push_back(0x08);
1160
1161  // Initial return PC is 4(ESP): DW_CFA_def_cfa R4 4.
1162  cfi_info->push_back(0x0C);
1163  cfi_info->push_back(0x04);
1164  cfi_info->push_back(0x04);
1165
1166  // Return address location: 0(SP): DW_CFA_offset R8 1 (* -4);.
1167  cfi_info->push_back(0x2 << 6 | 0x08);
1168  cfi_info->push_back(0x01);
1169
1170  // And 2 Noops to align to 4 byte boundary.
1171  cfi_info->push_back(0x0);
1172  cfi_info->push_back(0x0);
1173
1174  DCHECK_EQ(cfi_info->size() & 3, 0U);
1175  return cfi_info;
1176}
1177
1178static void EncodeUnsignedLeb128(std::vector<uint8_t>& buf, uint32_t value) {
1179  uint8_t buffer[12];
1180  uint8_t *ptr = EncodeUnsignedLeb128(buffer, value);
1181  for (uint8_t *p = buffer; p < ptr; p++) {
1182    buf.push_back(*p);
1183  }
1184}
1185
1186std::vector<uint8_t>* X86Mir2Lir::ReturnCallFrameInformation() {
1187  std::vector<uint8_t>*cfi_info = new std::vector<uint8_t>;
1188
1189  // Generate the FDE for the method.
1190  DCHECK_NE(data_offset_, 0U);
1191
1192  // Length (will be filled in later in this routine).
1193  PushWord(*cfi_info, 0);
1194
1195  // CIE_pointer (can be filled in by linker); might be left at 0 if there is only
1196  // one CIE for the whole debug_frame section.
1197  PushWord(*cfi_info, 0);
1198
1199  // 'initial_location' (filled in by linker).
1200  PushWord(*cfi_info, 0);
1201
1202  // 'address_range' (number of bytes in the method).
1203  PushWord(*cfi_info, data_offset_);
1204
1205  // The instructions in the FDE.
1206  if (stack_decrement_ != nullptr) {
1207    // Advance LOC to just past the stack decrement.
1208    uint32_t pc = NEXT_LIR(stack_decrement_)->offset;
1209    AdvanceLoc(*cfi_info, pc);
1210
1211    // Now update the offset to the call frame: DW_CFA_def_cfa_offset frame_size.
1212    cfi_info->push_back(0x0e);
1213    EncodeUnsignedLeb128(*cfi_info, frame_size_);
1214
1215    // We continue with that stack until the epilogue.
1216    if (stack_increment_ != nullptr) {
1217      uint32_t new_pc = NEXT_LIR(stack_increment_)->offset;
1218      AdvanceLoc(*cfi_info, new_pc - pc);
1219
1220      // We probably have code snippets after the epilogue, so save the
1221      // current state: DW_CFA_remember_state.
1222      cfi_info->push_back(0x0a);
1223
1224      // We have now popped the stack: DW_CFA_def_cfa_offset 4.  There is only the return
1225      // PC on the stack now.
1226      cfi_info->push_back(0x0e);
1227      EncodeUnsignedLeb128(*cfi_info, 4);
1228
1229      // Everything after that is the same as before the epilogue.
1230      // Stack bump was followed by RET instruction.
1231      LIR *post_ret_insn = NEXT_LIR(NEXT_LIR(stack_increment_));
1232      if (post_ret_insn != nullptr) {
1233        pc = new_pc;
1234        new_pc = post_ret_insn->offset;
1235        AdvanceLoc(*cfi_info, new_pc - pc);
1236        // Restore the state: DW_CFA_restore_state.
1237        cfi_info->push_back(0x0b);
1238      }
1239    }
1240  }
1241
1242  // Padding to a multiple of 4
1243  while ((cfi_info->size() & 3) != 0) {
1244    // DW_CFA_nop is encoded as 0.
1245    cfi_info->push_back(0);
1246  }
1247
1248  // Set the length of the FDE inside the generated bytes.
1249  uint32_t length = cfi_info->size() - 4;
1250  (*cfi_info)[0] = length;
1251  (*cfi_info)[1] = length >> 8;
1252  (*cfi_info)[2] = length >> 16;
1253  (*cfi_info)[3] = length >> 24;
1254  return cfi_info;
1255}
1256
1257void X86Mir2Lir::GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir) {
1258  switch (static_cast<ExtendedMIROpcode>(mir->dalvikInsn.opcode)) {
1259    case kMirOpConstVector:
1260      GenConst128(bb, mir);
1261      break;
1262    default:
1263      break;
1264  }
1265}
1266
1267void X86Mir2Lir::GenConst128(BasicBlock* bb, MIR* mir) {
1268  int type_size = mir->dalvikInsn.vA;
1269  // We support 128 bit vectors.
1270  DCHECK_EQ(type_size & 0xFFFF, 128);
1271  int reg = mir->dalvikInsn.vB;
1272  DCHECK_LT(reg, 8);
1273  uint32_t *args = mir->dalvikInsn.arg;
1274  // Check for all 0 case.
1275  if (args[0] == 0 && args[1] == 0 && args[2] == 0 && args[3] == 0) {
1276    NewLIR2(kX86XorpsRR, reg, reg);
1277    return;
1278  }
1279  // Okay, load it from the constant vector area.
1280  LIR *data_target = ScanVectorLiteral(mir);
1281  if (data_target == nullptr) {
1282    data_target = AddVectorLiteral(mir);
1283  }
1284
1285  // Address the start of the method.
1286  RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
1287  rl_method = LoadValue(rl_method, kCoreReg);
1288
1289  // Load the proper value from the literal area.
1290  // We don't know the proper offset for the value, so pick one that will force
1291  // 4 byte offset.  We will fix this up in the assembler later to have the right
1292  // value.
1293  LIR *load = NewLIR3(kX86Mova128RM, reg, rl_method.reg.GetReg(),  256 /* bogus */);
1294  load->flags.fixup = kFixupLoad;
1295  load->target = data_target;
1296  SetMemRefType(load, true, kLiteral);
1297}
1298
1299LIR *X86Mir2Lir::ScanVectorLiteral(MIR *mir) {
1300  int *args = reinterpret_cast<int*>(mir->dalvikInsn.arg);
1301  for (LIR *p = const_vectors_; p != nullptr; p = p->next) {
1302    if (args[0] == p->operands[0] && args[1] == p->operands[1] &&
1303        args[2] == p->operands[2] && args[3] == p->operands[3]) {
1304      return p;
1305    }
1306  }
1307  return nullptr;
1308}
1309
1310LIR *X86Mir2Lir::AddVectorLiteral(MIR *mir) {
1311  LIR* new_value = static_cast<LIR*>(arena_->Alloc(sizeof(LIR), kArenaAllocData));
1312  int *args = reinterpret_cast<int*>(mir->dalvikInsn.arg);
1313  new_value->operands[0] = args[0];
1314  new_value->operands[1] = args[1];
1315  new_value->operands[2] = args[2];
1316  new_value->operands[3] = args[3];
1317  new_value->next = const_vectors_;
1318  if (const_vectors_ == nullptr) {
1319    estimated_native_code_size_ += 12;  // Amount needed to align to 16 byte boundary.
1320  }
1321  estimated_native_code_size_ += 16;  // Space for one vector.
1322  const_vectors_ = new_value;
1323  return new_value;
1324}
1325
1326}  // namespace art
1327