target_x86.cc revision a0cd2d701f29e0bc6275f1b13c0edfd4ec391879
1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <string>
18#include <inttypes.h>
19
20#include "codegen_x86.h"
21#include "dex/compiler_internals.h"
22#include "dex/quick/mir_to_lir-inl.h"
23#include "mirror/array.h"
24#include "mirror/string.h"
25#include "x86_lir.h"
26
27namespace art {
28
29static const RegStorage core_regs_arr_32[] = {
30    rs_rAX, rs_rCX, rs_rDX, rs_rBX, rs_rX86_SP_32, rs_rBP, rs_rSI, rs_rDI,
31};
32static const RegStorage core_regs_arr_64[] = {
33    rs_rAX, rs_rCX, rs_rDX, rs_rBX, rs_rX86_SP_64, rs_rBP, rs_rSI, rs_rDI,
34#ifdef TARGET_REX_SUPPORT
35    rs_r8, rs_r9, rs_r10, rs_r11, rs_r12, rs_r13, rs_r14, rs_r15
36#endif
37};
38static const RegStorage core_regs_arr_64q[] = {
39    rs_r0q, rs_r1q, rs_r2q, rs_r3q, rs_rX86_SP_64, rs_r5q, rs_r6q, rs_r7q,
40#ifdef TARGET_REX_SUPPORT
41    rs_r8q, rs_r9q, rs_r10q, rs_r11q, rs_r12q, rs_r13q, rs_r14q, rs_r15q
42#endif
43};
44static const RegStorage sp_regs_arr_32[] = {
45    rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
46};
47static const RegStorage sp_regs_arr_64[] = {
48    rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
49#ifdef TARGET_REX_SUPPORT
50    rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15
51#endif
52};
53static const RegStorage dp_regs_arr_32[] = {
54    rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
55};
56static const RegStorage dp_regs_arr_64[] = {
57    rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
58#ifdef TARGET_REX_SUPPORT
59    rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15
60#endif
61};
62static const RegStorage reserved_regs_arr_32[] = {rs_rX86_SP_32};
63static const RegStorage reserved_regs_arr_64[] = {rs_rX86_SP_64};
64static const RegStorage reserved_regs_arr_64q[] = {rs_rX86_SP_64};
65static const RegStorage core_temps_arr_32[] = {rs_rAX, rs_rCX, rs_rDX, rs_rBX};
66static const RegStorage core_temps_arr_64[] = {
67    rs_rAX, rs_rCX, rs_rDX, rs_rSI, rs_rDI,
68#ifdef TARGET_REX_SUPPORT
69    rs_r8, rs_r9, rs_r10, rs_r11
70#endif
71};
72static const RegStorage core_temps_arr_64q[] = {
73    rs_r0q, rs_r1q, rs_r2q, rs_r6q, rs_r7q,
74#ifdef TARGET_REX_SUPPORT
75    rs_r8q, rs_r9q, rs_r10q, rs_r11q
76#endif
77};
78static const RegStorage sp_temps_arr_32[] = {
79    rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
80};
81static const RegStorage sp_temps_arr_64[] = {
82    rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
83#ifdef TARGET_REX_SUPPORT
84    rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15
85#endif
86};
87static const RegStorage dp_temps_arr_32[] = {
88    rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
89};
90static const RegStorage dp_temps_arr_64[] = {
91    rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
92#ifdef TARGET_REX_SUPPORT
93    rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15
94#endif
95};
96
97static const RegStorage xp_temps_arr_32[] = {
98    rs_xr0, rs_xr1, rs_xr2, rs_xr3, rs_xr4, rs_xr5, rs_xr6, rs_xr7,
99};
100static const RegStorage xp_temps_arr_64[] = {
101    rs_xr0, rs_xr1, rs_xr2, rs_xr3, rs_xr4, rs_xr5, rs_xr6, rs_xr7,
102#ifdef TARGET_REX_SUPPORT
103    rs_xr8, rs_xr9, rs_xr10, rs_xr11, rs_xr12, rs_xr13, rs_xr14, rs_xr15
104#endif
105};
106
107static const std::vector<RegStorage> empty_pool;
108static const std::vector<RegStorage> core_regs_32(core_regs_arr_32,
109    core_regs_arr_32 + sizeof(core_regs_arr_32) / sizeof(core_regs_arr_32[0]));
110static const std::vector<RegStorage> core_regs_64(core_regs_arr_64,
111    core_regs_arr_64 + sizeof(core_regs_arr_64) / sizeof(core_regs_arr_64[0]));
112static const std::vector<RegStorage> core_regs_64q(core_regs_arr_64q,
113    core_regs_arr_64q + sizeof(core_regs_arr_64q) / sizeof(core_regs_arr_64q[0]));
114static const std::vector<RegStorage> sp_regs_32(sp_regs_arr_32,
115    sp_regs_arr_32 + sizeof(sp_regs_arr_32) / sizeof(sp_regs_arr_32[0]));
116static const std::vector<RegStorage> sp_regs_64(sp_regs_arr_64,
117    sp_regs_arr_64 + sizeof(sp_regs_arr_64) / sizeof(sp_regs_arr_64[0]));
118static const std::vector<RegStorage> dp_regs_32(dp_regs_arr_32,
119    dp_regs_arr_32 + sizeof(dp_regs_arr_32) / sizeof(dp_regs_arr_32[0]));
120static const std::vector<RegStorage> dp_regs_64(dp_regs_arr_64,
121    dp_regs_arr_64 + sizeof(dp_regs_arr_64) / sizeof(dp_regs_arr_64[0]));
122static const std::vector<RegStorage> reserved_regs_32(reserved_regs_arr_32,
123    reserved_regs_arr_32 + sizeof(reserved_regs_arr_32) / sizeof(reserved_regs_arr_32[0]));
124static const std::vector<RegStorage> reserved_regs_64(reserved_regs_arr_64,
125    reserved_regs_arr_64 + sizeof(reserved_regs_arr_64) / sizeof(reserved_regs_arr_64[0]));
126static const std::vector<RegStorage> reserved_regs_64q(reserved_regs_arr_64q,
127    reserved_regs_arr_64q + sizeof(reserved_regs_arr_64q) / sizeof(reserved_regs_arr_64q[0]));
128static const std::vector<RegStorage> core_temps_32(core_temps_arr_32,
129    core_temps_arr_32 + sizeof(core_temps_arr_32) / sizeof(core_temps_arr_32[0]));
130static const std::vector<RegStorage> core_temps_64(core_temps_arr_64,
131    core_temps_arr_64 + sizeof(core_temps_arr_64) / sizeof(core_temps_arr_64[0]));
132static const std::vector<RegStorage> core_temps_64q(core_temps_arr_64q,
133    core_temps_arr_64q + sizeof(core_temps_arr_64q) / sizeof(core_temps_arr_64q[0]));
134static const std::vector<RegStorage> sp_temps_32(sp_temps_arr_32,
135    sp_temps_arr_32 + sizeof(sp_temps_arr_32) / sizeof(sp_temps_arr_32[0]));
136static const std::vector<RegStorage> sp_temps_64(sp_temps_arr_64,
137    sp_temps_arr_64 + sizeof(sp_temps_arr_64) / sizeof(sp_temps_arr_64[0]));
138static const std::vector<RegStorage> dp_temps_32(dp_temps_arr_32,
139    dp_temps_arr_32 + sizeof(dp_temps_arr_32) / sizeof(dp_temps_arr_32[0]));
140static const std::vector<RegStorage> dp_temps_64(dp_temps_arr_64,
141    dp_temps_arr_64 + sizeof(dp_temps_arr_64) / sizeof(dp_temps_arr_64[0]));
142
143static const std::vector<RegStorage> xp_temps_32(xp_temps_arr_32,
144    xp_temps_arr_32 + sizeof(xp_temps_arr_32) / sizeof(xp_temps_arr_32[0]));
145static const std::vector<RegStorage> xp_temps_64(xp_temps_arr_64,
146    xp_temps_arr_64 + sizeof(xp_temps_arr_64) / sizeof(xp_temps_arr_64[0]));
147
148RegStorage rs_rX86_SP;
149
150X86NativeRegisterPool rX86_ARG0;
151X86NativeRegisterPool rX86_ARG1;
152X86NativeRegisterPool rX86_ARG2;
153X86NativeRegisterPool rX86_ARG3;
154X86NativeRegisterPool rX86_FARG0;
155X86NativeRegisterPool rX86_FARG1;
156X86NativeRegisterPool rX86_FARG2;
157X86NativeRegisterPool rX86_FARG3;
158X86NativeRegisterPool rX86_RET0;
159X86NativeRegisterPool rX86_RET1;
160X86NativeRegisterPool rX86_INVOKE_TGT;
161X86NativeRegisterPool rX86_COUNT;
162
163RegStorage rs_rX86_ARG0;
164RegStorage rs_rX86_ARG1;
165RegStorage rs_rX86_ARG2;
166RegStorage rs_rX86_ARG3;
167RegStorage rs_rX86_FARG0;
168RegStorage rs_rX86_FARG1;
169RegStorage rs_rX86_FARG2;
170RegStorage rs_rX86_FARG3;
171RegStorage rs_rX86_RET0;
172RegStorage rs_rX86_RET1;
173RegStorage rs_rX86_INVOKE_TGT;
174RegStorage rs_rX86_COUNT;
175
176RegLocation X86Mir2Lir::LocCReturn() {
177  return x86_loc_c_return;
178}
179
180RegLocation X86Mir2Lir::LocCReturnRef() {
181  // FIXME: return x86_loc_c_return_wide for x86_64 when wide refs supported.
182  return x86_loc_c_return;
183}
184
185RegLocation X86Mir2Lir::LocCReturnWide() {
186  return x86_loc_c_return_wide;
187}
188
189RegLocation X86Mir2Lir::LocCReturnFloat() {
190  return x86_loc_c_return_float;
191}
192
193RegLocation X86Mir2Lir::LocCReturnDouble() {
194  return x86_loc_c_return_double;
195}
196
197// Return a target-dependent special register.
198RegStorage X86Mir2Lir::TargetReg(SpecialTargetRegister reg) {
199  RegStorage res_reg = RegStorage::InvalidReg();
200  switch (reg) {
201    case kSelf: res_reg = RegStorage::InvalidReg(); break;
202    case kSuspend: res_reg =  RegStorage::InvalidReg(); break;
203    case kLr: res_reg =  RegStorage::InvalidReg(); break;
204    case kPc: res_reg =  RegStorage::InvalidReg(); break;
205    case kSp: res_reg =  rs_rX86_SP; break;
206    case kArg0: res_reg = rs_rX86_ARG0; break;
207    case kArg1: res_reg = rs_rX86_ARG1; break;
208    case kArg2: res_reg = rs_rX86_ARG2; break;
209    case kArg3: res_reg = rs_rX86_ARG3; break;
210    case kFArg0: res_reg = rs_rX86_FARG0; break;
211    case kFArg1: res_reg = rs_rX86_FARG1; break;
212    case kFArg2: res_reg = rs_rX86_FARG2; break;
213    case kFArg3: res_reg = rs_rX86_FARG3; break;
214    case kRet0: res_reg = rs_rX86_RET0; break;
215    case kRet1: res_reg = rs_rX86_RET1; break;
216    case kInvokeTgt: res_reg = rs_rX86_INVOKE_TGT; break;
217    case kHiddenArg: res_reg = rs_rAX; break;
218    case kHiddenFpArg: res_reg = rs_fr0; break;
219    case kCount: res_reg = rs_rX86_COUNT; break;
220  }
221  return res_reg;
222}
223
224RegStorage X86Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
225  // For the 32-bit internal ABI, the first 3 arguments are passed in registers.
226  // TODO: This is not 64-bit compliant and depends on new internal ABI.
227  switch (arg_num) {
228    case 0:
229      return rs_rX86_ARG1;
230    case 1:
231      return rs_rX86_ARG2;
232    case 2:
233      return rs_rX86_ARG3;
234    default:
235      return RegStorage::InvalidReg();
236  }
237}
238
239/*
240 * Decode the register id.
241 */
242uint64_t X86Mir2Lir::GetRegMaskCommon(RegStorage reg) {
243  uint64_t seed;
244  int shift;
245  int reg_id;
246
247  reg_id = reg.GetRegNum();
248  /* Double registers in x86 are just a single FP register */
249  seed = 1;
250  /* FP register starts at bit position 16 */
251  shift = (reg.IsFloat() || reg.StorageSize() > 8) ? kX86FPReg0 : 0;
252  /* Expand the double register id into single offset */
253  shift += reg_id;
254  return (seed << shift);
255}
256
257uint64_t X86Mir2Lir::GetPCUseDefEncoding() {
258  /*
259   * FIXME: might make sense to use a virtual resource encoding bit for pc.  Might be
260   * able to clean up some of the x86/Arm_Mips differences
261   */
262  LOG(FATAL) << "Unexpected call to GetPCUseDefEncoding for x86";
263  return 0ULL;
264}
265
266void X86Mir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags) {
267  DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64);
268  DCHECK(!lir->flags.use_def_invalid);
269
270  // X86-specific resource map setup here.
271  if (flags & REG_USE_SP) {
272    lir->u.m.use_mask |= ENCODE_X86_REG_SP;
273  }
274
275  if (flags & REG_DEF_SP) {
276    lir->u.m.def_mask |= ENCODE_X86_REG_SP;
277  }
278
279  if (flags & REG_DEFA) {
280    SetupRegMask(&lir->u.m.def_mask, rs_rAX.GetReg());
281  }
282
283  if (flags & REG_DEFD) {
284    SetupRegMask(&lir->u.m.def_mask, rs_rDX.GetReg());
285  }
286  if (flags & REG_USEA) {
287    SetupRegMask(&lir->u.m.use_mask, rs_rAX.GetReg());
288  }
289
290  if (flags & REG_USEC) {
291    SetupRegMask(&lir->u.m.use_mask, rs_rCX.GetReg());
292  }
293
294  if (flags & REG_USED) {
295    SetupRegMask(&lir->u.m.use_mask, rs_rDX.GetReg());
296  }
297
298  if (flags & REG_USEB) {
299    SetupRegMask(&lir->u.m.use_mask, rs_rBX.GetReg());
300  }
301
302  // Fixup hard to describe instruction: Uses rAX, rCX, rDI; sets rDI.
303  if (lir->opcode == kX86RepneScasw) {
304    SetupRegMask(&lir->u.m.use_mask, rs_rAX.GetReg());
305    SetupRegMask(&lir->u.m.use_mask, rs_rCX.GetReg());
306    SetupRegMask(&lir->u.m.use_mask, rs_rDI.GetReg());
307    SetupRegMask(&lir->u.m.def_mask, rs_rDI.GetReg());
308  }
309
310  if (flags & USE_FP_STACK) {
311    lir->u.m.use_mask |= ENCODE_X86_FP_STACK;
312    lir->u.m.def_mask |= ENCODE_X86_FP_STACK;
313  }
314}
315
316/* For dumping instructions */
317static const char* x86RegName[] = {
318  "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi",
319  "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
320};
321
322static const char* x86CondName[] = {
323  "O",
324  "NO",
325  "B/NAE/C",
326  "NB/AE/NC",
327  "Z/EQ",
328  "NZ/NE",
329  "BE/NA",
330  "NBE/A",
331  "S",
332  "NS",
333  "P/PE",
334  "NP/PO",
335  "L/NGE",
336  "NL/GE",
337  "LE/NG",
338  "NLE/G"
339};
340
341/*
342 * Interpret a format string and build a string no longer than size
343 * See format key in Assemble.cc.
344 */
345std::string X86Mir2Lir::BuildInsnString(const char *fmt, LIR *lir, unsigned char* base_addr) {
346  std::string buf;
347  size_t i = 0;
348  size_t fmt_len = strlen(fmt);
349  while (i < fmt_len) {
350    if (fmt[i] != '!') {
351      buf += fmt[i];
352      i++;
353    } else {
354      i++;
355      DCHECK_LT(i, fmt_len);
356      char operand_number_ch = fmt[i];
357      i++;
358      if (operand_number_ch == '!') {
359        buf += "!";
360      } else {
361        int operand_number = operand_number_ch - '0';
362        DCHECK_LT(operand_number, 6);  // Expect upto 6 LIR operands.
363        DCHECK_LT(i, fmt_len);
364        int operand = lir->operands[operand_number];
365        switch (fmt[i]) {
366          case 'c':
367            DCHECK_LT(static_cast<size_t>(operand), sizeof(x86CondName));
368            buf += x86CondName[operand];
369            break;
370          case 'd':
371            buf += StringPrintf("%d", operand);
372            break;
373          case 'p': {
374            EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(operand));
375            buf += StringPrintf("0x%08x", tab_rec->offset);
376            break;
377          }
378          case 'r':
379            if (RegStorage::IsFloat(operand)) {
380              int fp_reg = RegStorage::RegNum(operand);
381              buf += StringPrintf("xmm%d", fp_reg);
382            } else {
383              int reg_num = RegStorage::RegNum(operand);
384              DCHECK_LT(static_cast<size_t>(reg_num), sizeof(x86RegName));
385              buf += x86RegName[reg_num];
386            }
387            break;
388          case 't':
389            buf += StringPrintf("0x%08" PRIxPTR " (L%p)",
390                                reinterpret_cast<uintptr_t>(base_addr) + lir->offset + operand,
391                                lir->target);
392            break;
393          default:
394            buf += StringPrintf("DecodeError '%c'", fmt[i]);
395            break;
396        }
397        i++;
398      }
399    }
400  }
401  return buf;
402}
403
404void X86Mir2Lir::DumpResourceMask(LIR *x86LIR, uint64_t mask, const char *prefix) {
405  char buf[256];
406  buf[0] = 0;
407
408  if (mask == ENCODE_ALL) {
409    strcpy(buf, "all");
410  } else {
411    char num[8];
412    int i;
413
414    for (i = 0; i < kX86RegEnd; i++) {
415      if (mask & (1ULL << i)) {
416        snprintf(num, arraysize(num), "%d ", i);
417        strcat(buf, num);
418      }
419    }
420
421    if (mask & ENCODE_CCODE) {
422      strcat(buf, "cc ");
423    }
424    /* Memory bits */
425    if (x86LIR && (mask & ENCODE_DALVIK_REG)) {
426      snprintf(buf + strlen(buf), arraysize(buf) - strlen(buf), "dr%d%s",
427               DECODE_ALIAS_INFO_REG(x86LIR->flags.alias_info),
428               (DECODE_ALIAS_INFO_WIDE(x86LIR->flags.alias_info)) ? "(+1)" : "");
429    }
430    if (mask & ENCODE_LITERAL) {
431      strcat(buf, "lit ");
432    }
433
434    if (mask & ENCODE_HEAP_REF) {
435      strcat(buf, "heap ");
436    }
437    if (mask & ENCODE_MUST_NOT_ALIAS) {
438      strcat(buf, "noalias ");
439    }
440  }
441  if (buf[0]) {
442    LOG(INFO) << prefix << ": " <<  buf;
443  }
444}
445
446void X86Mir2Lir::AdjustSpillMask() {
447  // Adjustment for LR spilling, x86 has no LR so nothing to do here
448  core_spill_mask_ |= (1 << rs_rRET.GetRegNum());
449  num_core_spills_++;
450}
451
452/*
453 * Mark a callee-save fp register as promoted.  Note that
454 * vpush/vpop uses contiguous register lists so we must
455 * include any holes in the mask.  Associate holes with
456 * Dalvik register INVALID_VREG (0xFFFFU).
457 */
458void X86Mir2Lir::MarkPreservedSingle(int v_reg, RegStorage reg) {
459  UNIMPLEMENTED(FATAL) << "MarkPreservedSingle";
460}
461
462void X86Mir2Lir::MarkPreservedDouble(int v_reg, RegStorage reg) {
463  UNIMPLEMENTED(FATAL) << "MarkPreservedDouble";
464}
465
466RegStorage X86Mir2Lir::AllocateByteRegister() {
467  return AllocTypedTemp(false, kCoreReg);
468}
469
470/* Clobber all regs that might be used by an external C call */
471void X86Mir2Lir::ClobberCallerSave() {
472  Clobber(rs_rAX);
473  Clobber(rs_rCX);
474  Clobber(rs_rDX);
475  Clobber(rs_rBX);
476}
477
478RegLocation X86Mir2Lir::GetReturnWideAlt() {
479  RegLocation res = LocCReturnWide();
480  DCHECK(res.reg.GetLowReg() == rs_rAX.GetReg());
481  DCHECK(res.reg.GetHighReg() == rs_rDX.GetReg());
482  Clobber(rs_rAX);
483  Clobber(rs_rDX);
484  MarkInUse(rs_rAX);
485  MarkInUse(rs_rDX);
486  MarkWide(res.reg);
487  return res;
488}
489
490RegLocation X86Mir2Lir::GetReturnAlt() {
491  RegLocation res = LocCReturn();
492  res.reg.SetReg(rs_rDX.GetReg());
493  Clobber(rs_rDX);
494  MarkInUse(rs_rDX);
495  return res;
496}
497
498/* To be used when explicitly managing register use */
499void X86Mir2Lir::LockCallTemps() {
500  LockTemp(rs_rX86_ARG0);
501  LockTemp(rs_rX86_ARG1);
502  LockTemp(rs_rX86_ARG2);
503  LockTemp(rs_rX86_ARG3);
504}
505
506/* To be used when explicitly managing register use */
507void X86Mir2Lir::FreeCallTemps() {
508  FreeTemp(rs_rX86_ARG0);
509  FreeTemp(rs_rX86_ARG1);
510  FreeTemp(rs_rX86_ARG2);
511  FreeTemp(rs_rX86_ARG3);
512}
513
514bool X86Mir2Lir::ProvidesFullMemoryBarrier(X86OpCode opcode) {
515    switch (opcode) {
516      case kX86LockCmpxchgMR:
517      case kX86LockCmpxchgAR:
518      case kX86LockCmpxchg8bM:
519      case kX86LockCmpxchg8bA:
520      case kX86XchgMR:
521      case kX86Mfence:
522        // Atomic memory instructions provide full barrier.
523        return true;
524      default:
525        break;
526    }
527
528    // Conservative if cannot prove it provides full barrier.
529    return false;
530}
531
532bool X86Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
533#if ANDROID_SMP != 0
534  // Start off with using the last LIR as the barrier. If it is not enough, then we will update it.
535  LIR* mem_barrier = last_lir_insn_;
536
537  bool ret = false;
538  /*
539   * According to the JSR-133 Cookbook, for x86 only StoreLoad barriers need memory fence. All other barriers
540   * (LoadLoad, LoadStore, StoreStore) are nops due to the x86 memory model. For those cases, all we need
541   * to ensure is that there is a scheduling barrier in place.
542   */
543  if (barrier_kind == kStoreLoad) {
544    // If no LIR exists already that can be used a barrier, then generate an mfence.
545    if (mem_barrier == nullptr) {
546      mem_barrier = NewLIR0(kX86Mfence);
547      ret = true;
548    }
549
550    // If last instruction does not provide full barrier, then insert an mfence.
551    if (ProvidesFullMemoryBarrier(static_cast<X86OpCode>(mem_barrier->opcode)) == false) {
552      mem_barrier = NewLIR0(kX86Mfence);
553      ret = true;
554    }
555  }
556
557  // Now ensure that a scheduling barrier is in place.
558  if (mem_barrier == nullptr) {
559    GenBarrier();
560  } else {
561    // Mark as a scheduling barrier.
562    DCHECK(!mem_barrier->flags.use_def_invalid);
563    mem_barrier->u.m.def_mask = ENCODE_ALL;
564  }
565  return ret;
566#else
567  return false;
568#endif
569}
570
571void X86Mir2Lir::CompilerInitializeRegAlloc() {
572  if (Gen64Bit()) {
573    reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs_64, empty_pool/*core_regs_64q*/, sp_regs_64,
574                                          dp_regs_64, reserved_regs_64, empty_pool/*reserved_regs_64q*/,
575                                          core_temps_64, empty_pool/*core_temps_64q*/, sp_temps_64, dp_temps_64);
576  } else {
577    reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs_32, empty_pool, sp_regs_32,
578                                          dp_regs_32, reserved_regs_32, empty_pool,
579                                          core_temps_32, empty_pool, sp_temps_32, dp_temps_32);
580  }
581
582  // Target-specific adjustments.
583
584  // Add in XMM registers.
585  const std::vector<RegStorage> *xp_temps = Gen64Bit() ? &xp_temps_64 : &xp_temps_32;
586  for (RegStorage reg : *xp_temps) {
587    RegisterInfo* info = new (arena_) RegisterInfo(reg, GetRegMaskCommon(reg));
588    reginfo_map_.Put(reg.GetReg(), info);
589    info->SetIsTemp(true);
590  }
591
592  // Alias single precision xmm to double xmms.
593  // TODO: as needed, add larger vector sizes - alias all to the largest.
594  GrowableArray<RegisterInfo*>::Iterator it(&reg_pool_->sp_regs_);
595  for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
596    int sp_reg_num = info->GetReg().GetRegNum();
597    RegStorage xp_reg = RegStorage::Solo128(sp_reg_num);
598    RegisterInfo* xp_reg_info = GetRegInfo(xp_reg);
599    // 128-bit xmm vector register's master storage should refer to itself.
600    DCHECK_EQ(xp_reg_info, xp_reg_info->Master());
601
602    // Redirect 32-bit vector's master storage to 128-bit vector.
603    info->SetMaster(xp_reg_info);
604
605    RegStorage dp_reg = RegStorage::Solo64(RegStorage::kFloatingPoint | sp_reg_num);
606    RegisterInfo* dp_reg_info = GetRegInfo(dp_reg);
607    // Redirect 64-bit vector's master storage to 128-bit vector.
608    dp_reg_info->SetMaster(xp_reg_info);
609  }
610
611  // Don't start allocating temps at r0/s0/d0 or you may clobber return regs in early-exit methods.
612  // TODO: adjust for x86/hard float calling convention.
613  reg_pool_->next_core_reg_ = 2;
614  reg_pool_->next_sp_reg_ = 2;
615  reg_pool_->next_dp_reg_ = 1;
616}
617
618void X86Mir2Lir::SpillCoreRegs() {
619  if (num_core_spills_ == 0) {
620    return;
621  }
622  // Spill mask not including fake return address register
623  uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum());
624  int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_);
625  for (int reg = 0; mask; mask >>= 1, reg++) {
626    if (mask & 0x1) {
627      StoreWordDisp(rs_rX86_SP, offset, RegStorage::Solo32(reg));
628      offset += GetInstructionSetPointerSize(cu_->instruction_set);
629    }
630  }
631}
632
633void X86Mir2Lir::UnSpillCoreRegs() {
634  if (num_core_spills_ == 0) {
635    return;
636  }
637  // Spill mask not including fake return address register
638  uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum());
639  int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_);
640  for (int reg = 0; mask; mask >>= 1, reg++) {
641    if (mask & 0x1) {
642      LoadWordDisp(rs_rX86_SP, offset, RegStorage::Solo32(reg));
643      offset += GetInstructionSetPointerSize(cu_->instruction_set);
644    }
645  }
646}
647
648bool X86Mir2Lir::IsUnconditionalBranch(LIR* lir) {
649  return (lir->opcode == kX86Jmp8 || lir->opcode == kX86Jmp32);
650}
651
652bool X86Mir2Lir::SupportsVolatileLoadStore(OpSize size) {
653  return true;
654}
655
656RegisterClass X86Mir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volatile) {
657  if (UNLIKELY(is_volatile)) {
658    // On x86, atomic 64-bit load/store requires an fp register.
659    // Smaller aligned load/store is atomic for both core and fp registers.
660    if (size == k64 || size == kDouble) {
661      return kFPReg;
662    }
663  }
664  return RegClassBySize(size);
665}
666
667X86Mir2Lir::X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena, bool gen64bit)
668    : Mir2Lir(cu, mir_graph, arena),
669      base_of_code_(nullptr), store_method_addr_(false), store_method_addr_used_(false),
670      method_address_insns_(arena, 100, kGrowableArrayMisc),
671      class_type_address_insns_(arena, 100, kGrowableArrayMisc),
672      call_method_insns_(arena, 100, kGrowableArrayMisc),
673      stack_decrement_(nullptr), stack_increment_(nullptr), gen64bit_(gen64bit),
674      const_vectors_(nullptr) {
675  store_method_addr_used_ = false;
676  if (kIsDebugBuild) {
677    for (int i = 0; i < kX86Last; i++) {
678      if (X86Mir2Lir::EncodingMap[i].opcode != i) {
679        LOG(FATAL) << "Encoding order for " << X86Mir2Lir::EncodingMap[i].name
680                   << " is wrong: expecting " << i << ", seeing "
681                   << static_cast<int>(X86Mir2Lir::EncodingMap[i].opcode);
682      }
683    }
684  }
685  if (Gen64Bit()) {
686    rs_rX86_SP = rs_rX86_SP_64;
687
688    rs_rX86_ARG0 = rs_rDI;
689    rs_rX86_ARG1 = rs_rSI;
690    rs_rX86_ARG2 = rs_rDX;
691    rs_rX86_ARG3 = rs_rCX;
692    rX86_ARG0 = rDI;
693    rX86_ARG1 = rSI;
694    rX86_ARG2 = rDX;
695    rX86_ARG3 = rCX;
696    // TODO: ARG4(r8), ARG5(r9), floating point args.
697  } else {
698    rs_rX86_SP = rs_rX86_SP_32;
699
700    rs_rX86_ARG0 = rs_rAX;
701    rs_rX86_ARG1 = rs_rCX;
702    rs_rX86_ARG2 = rs_rDX;
703    rs_rX86_ARG3 = rs_rBX;
704    rX86_ARG0 = rAX;
705    rX86_ARG1 = rCX;
706    rX86_ARG2 = rDX;
707    rX86_ARG3 = rBX;
708  }
709  rs_rX86_FARG0 = rs_rAX;
710  rs_rX86_FARG1 = rs_rCX;
711  rs_rX86_FARG2 = rs_rDX;
712  rs_rX86_FARG3 = rs_rBX;
713  rs_rX86_RET0 = rs_rAX;
714  rs_rX86_RET1 = rs_rDX;
715  rs_rX86_INVOKE_TGT = rs_rAX;
716  rs_rX86_COUNT = rs_rCX;
717  rX86_FARG0 = rAX;
718  rX86_FARG1 = rCX;
719  rX86_FARG2 = rDX;
720  rX86_FARG3 = rBX;
721  rX86_RET0 = rAX;
722  rX86_RET1 = rDX;
723  rX86_INVOKE_TGT = rAX;
724  rX86_COUNT = rCX;
725}
726
727Mir2Lir* X86CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
728                          ArenaAllocator* const arena) {
729  return new X86Mir2Lir(cu, mir_graph, arena, false);
730}
731
732Mir2Lir* X86_64CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
733                          ArenaAllocator* const arena) {
734  return new X86Mir2Lir(cu, mir_graph, arena, true);
735}
736
737// Not used in x86
738RegStorage X86Mir2Lir::LoadHelper(ThreadOffset<4> offset) {
739  LOG(FATAL) << "Unexpected use of LoadHelper in x86";
740  return RegStorage::InvalidReg();
741}
742
743// Not used in x86
744RegStorage X86Mir2Lir::LoadHelper(ThreadOffset<8> offset) {
745  LOG(FATAL) << "Unexpected use of LoadHelper in x86";
746  return RegStorage::InvalidReg();
747}
748
749LIR* X86Mir2Lir::CheckSuspendUsingLoad() {
750  LOG(FATAL) << "Unexpected use of CheckSuspendUsingLoad in x86";
751  return nullptr;
752}
753
754uint64_t X86Mir2Lir::GetTargetInstFlags(int opcode) {
755  DCHECK(!IsPseudoLirOp(opcode));
756  return X86Mir2Lir::EncodingMap[opcode].flags;
757}
758
759const char* X86Mir2Lir::GetTargetInstName(int opcode) {
760  DCHECK(!IsPseudoLirOp(opcode));
761  return X86Mir2Lir::EncodingMap[opcode].name;
762}
763
764const char* X86Mir2Lir::GetTargetInstFmt(int opcode) {
765  DCHECK(!IsPseudoLirOp(opcode));
766  return X86Mir2Lir::EncodingMap[opcode].fmt;
767}
768
769void X86Mir2Lir::GenConstWide(RegLocation rl_dest, int64_t value) {
770  // Can we do this directly to memory?
771  rl_dest = UpdateLocWide(rl_dest);
772  if ((rl_dest.location == kLocDalvikFrame) ||
773      (rl_dest.location == kLocCompilerTemp)) {
774    int32_t val_lo = Low32Bits(value);
775    int32_t val_hi = High32Bits(value);
776    int r_base = TargetReg(kSp).GetReg();
777    int displacement = SRegOffset(rl_dest.s_reg_low);
778
779    LIR * store = NewLIR3(kX86Mov32MI, r_base, displacement + LOWORD_OFFSET, val_lo);
780    AnnotateDalvikRegAccess(store, (displacement + LOWORD_OFFSET) >> 2,
781                              false /* is_load */, true /* is64bit */);
782    store = NewLIR3(kX86Mov32MI, r_base, displacement + HIWORD_OFFSET, val_hi);
783    AnnotateDalvikRegAccess(store, (displacement + HIWORD_OFFSET) >> 2,
784                              false /* is_load */, true /* is64bit */);
785    return;
786  }
787
788  // Just use the standard code to do the generation.
789  Mir2Lir::GenConstWide(rl_dest, value);
790}
791
792// TODO: Merge with existing RegLocation dumper in vreg_analysis.cc
793void X86Mir2Lir::DumpRegLocation(RegLocation loc) {
794  LOG(INFO)  << "location: " << loc.location << ','
795             << (loc.wide ? " w" : "  ")
796             << (loc.defined ? " D" : "  ")
797             << (loc.is_const ? " c" : "  ")
798             << (loc.fp ? " F" : "  ")
799             << (loc.core ? " C" : "  ")
800             << (loc.ref ? " r" : "  ")
801             << (loc.high_word ? " h" : "  ")
802             << (loc.home ? " H" : "  ")
803             << ", low: " << static_cast<int>(loc.reg.GetLowReg())
804             << ", high: " << static_cast<int>(loc.reg.GetHighReg())
805             << ", s_reg: " << loc.s_reg_low
806             << ", orig: " << loc.orig_sreg;
807}
808
809void X86Mir2Lir::Materialize() {
810  // A good place to put the analysis before starting.
811  AnalyzeMIR();
812
813  // Now continue with regular code generation.
814  Mir2Lir::Materialize();
815}
816
817void X86Mir2Lir::LoadMethodAddress(const MethodReference& target_method, InvokeType type,
818                                   SpecialTargetRegister symbolic_reg) {
819  /*
820   * For x86, just generate a 32 bit move immediate instruction, that will be filled
821   * in at 'link time'.  For now, put a unique value based on target to ensure that
822   * code deduplication works.
823   */
824  int target_method_idx = target_method.dex_method_index;
825  const DexFile* target_dex_file = target_method.dex_file;
826  const DexFile::MethodId& target_method_id = target_dex_file->GetMethodId(target_method_idx);
827  uintptr_t target_method_id_ptr = reinterpret_cast<uintptr_t>(&target_method_id);
828
829  // Generate the move instruction with the unique pointer and save index, dex_file, and type.
830  LIR *move = RawLIR(current_dalvik_offset_, kX86Mov32RI, TargetReg(symbolic_reg).GetReg(),
831                     static_cast<int>(target_method_id_ptr), target_method_idx,
832                     WrapPointer(const_cast<DexFile*>(target_dex_file)), type);
833  AppendLIR(move);
834  method_address_insns_.Insert(move);
835}
836
837void X86Mir2Lir::LoadClassType(uint32_t type_idx, SpecialTargetRegister symbolic_reg) {
838  /*
839   * For x86, just generate a 32 bit move immediate instruction, that will be filled
840   * in at 'link time'.  For now, put a unique value based on target to ensure that
841   * code deduplication works.
842   */
843  const DexFile::TypeId& id = cu_->dex_file->GetTypeId(type_idx);
844  uintptr_t ptr = reinterpret_cast<uintptr_t>(&id);
845
846  // Generate the move instruction with the unique pointer and save index and type.
847  LIR *move = RawLIR(current_dalvik_offset_, kX86Mov32RI, TargetReg(symbolic_reg).GetReg(),
848                     static_cast<int>(ptr), type_idx);
849  AppendLIR(move);
850  class_type_address_insns_.Insert(move);
851}
852
853LIR *X86Mir2Lir::CallWithLinkerFixup(const MethodReference& target_method, InvokeType type) {
854  /*
855   * For x86, just generate a 32 bit call relative instruction, that will be filled
856   * in at 'link time'.  For now, put a unique value based on target to ensure that
857   * code deduplication works.
858   */
859  int target_method_idx = target_method.dex_method_index;
860  const DexFile* target_dex_file = target_method.dex_file;
861  const DexFile::MethodId& target_method_id = target_dex_file->GetMethodId(target_method_idx);
862  uintptr_t target_method_id_ptr = reinterpret_cast<uintptr_t>(&target_method_id);
863
864  // Generate the call instruction with the unique pointer and save index, dex_file, and type.
865  LIR *call = RawLIR(current_dalvik_offset_, kX86CallI, static_cast<int>(target_method_id_ptr),
866                     target_method_idx, WrapPointer(const_cast<DexFile*>(target_dex_file)), type);
867  AppendLIR(call);
868  call_method_insns_.Insert(call);
869  return call;
870}
871
872/*
873 * @brief Enter a 32 bit quantity into a buffer
874 * @param buf buffer.
875 * @param data Data value.
876 */
877
878static void PushWord(std::vector<uint8_t>&buf, int32_t data) {
879  buf.push_back(data & 0xff);
880  buf.push_back((data >> 8) & 0xff);
881  buf.push_back((data >> 16) & 0xff);
882  buf.push_back((data >> 24) & 0xff);
883}
884
885void X86Mir2Lir::InstallLiteralPools() {
886  // These are handled differently for x86.
887  DCHECK(code_literal_list_ == nullptr);
888  DCHECK(method_literal_list_ == nullptr);
889  DCHECK(class_literal_list_ == nullptr);
890
891  // Align to 16 byte boundary.  We have implicit knowledge that the start of the method is
892  // on a 4 byte boundary.   How can I check this if it changes (other than aligned loads
893  // will fail at runtime)?
894  if (const_vectors_ != nullptr) {
895    int align_size = (16-4) - (code_buffer_.size() & 0xF);
896    if (align_size < 0) {
897      align_size += 16;
898    }
899
900    while (align_size > 0) {
901      code_buffer_.push_back(0);
902      align_size--;
903    }
904    for (LIR *p = const_vectors_; p != nullptr; p = p->next) {
905      PushWord(code_buffer_, p->operands[0]);
906      PushWord(code_buffer_, p->operands[1]);
907      PushWord(code_buffer_, p->operands[2]);
908      PushWord(code_buffer_, p->operands[3]);
909    }
910  }
911
912  // Handle the fixups for methods.
913  for (uint32_t i = 0; i < method_address_insns_.Size(); i++) {
914      LIR* p = method_address_insns_.Get(i);
915      DCHECK_EQ(p->opcode, kX86Mov32RI);
916      uint32_t target_method_idx = p->operands[2];
917      const DexFile* target_dex_file =
918          reinterpret_cast<const DexFile*>(UnwrapPointer(p->operands[3]));
919
920      // The offset to patch is the last 4 bytes of the instruction.
921      int patch_offset = p->offset + p->flags.size - 4;
922      cu_->compiler_driver->AddMethodPatch(cu_->dex_file, cu_->class_def_idx,
923                                           cu_->method_idx, cu_->invoke_type,
924                                           target_method_idx, target_dex_file,
925                                           static_cast<InvokeType>(p->operands[4]),
926                                           patch_offset);
927  }
928
929  // Handle the fixups for class types.
930  for (uint32_t i = 0; i < class_type_address_insns_.Size(); i++) {
931      LIR* p = class_type_address_insns_.Get(i);
932      DCHECK_EQ(p->opcode, kX86Mov32RI);
933      uint32_t target_method_idx = p->operands[2];
934
935      // The offset to patch is the last 4 bytes of the instruction.
936      int patch_offset = p->offset + p->flags.size - 4;
937      cu_->compiler_driver->AddClassPatch(cu_->dex_file, cu_->class_def_idx,
938                                          cu_->method_idx, target_method_idx, patch_offset);
939  }
940
941  // And now the PC-relative calls to methods.
942  for (uint32_t i = 0; i < call_method_insns_.Size(); i++) {
943      LIR* p = call_method_insns_.Get(i);
944      DCHECK_EQ(p->opcode, kX86CallI);
945      uint32_t target_method_idx = p->operands[1];
946      const DexFile* target_dex_file =
947          reinterpret_cast<const DexFile*>(UnwrapPointer(p->operands[2]));
948
949      // The offset to patch is the last 4 bytes of the instruction.
950      int patch_offset = p->offset + p->flags.size - 4;
951      cu_->compiler_driver->AddRelativeCodePatch(cu_->dex_file, cu_->class_def_idx,
952                                                 cu_->method_idx, cu_->invoke_type,
953                                                 target_method_idx, target_dex_file,
954                                                 static_cast<InvokeType>(p->operands[3]),
955                                                 patch_offset, -4 /* offset */);
956  }
957
958  // And do the normal processing.
959  Mir2Lir::InstallLiteralPools();
960}
961
962/*
963 * Fast string.index_of(I) & (II).  Inline check for simple case of char <= 0xffff,
964 * otherwise bails to standard library code.
965 */
966bool X86Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) {
967  ClobberCallerSave();
968  LockCallTemps();  // Using fixed registers
969
970  // EAX: 16 bit character being searched.
971  // ECX: count: number of words to be searched.
972  // EDI: String being searched.
973  // EDX: temporary during execution.
974  // EBX: temporary during execution.
975
976  RegLocation rl_obj = info->args[0];
977  RegLocation rl_char = info->args[1];
978  RegLocation rl_start;  // Note: only present in III flavor or IndexOf.
979
980  uint32_t char_value =
981    rl_char.is_const ? mir_graph_->ConstantValue(rl_char.orig_sreg) : 0;
982
983  if (char_value > 0xFFFF) {
984    // We have to punt to the real String.indexOf.
985    return false;
986  }
987
988  // Okay, we are commited to inlining this.
989  RegLocation rl_return = GetReturn(kCoreReg);
990  RegLocation rl_dest = InlineTarget(info);
991
992  // Is the string non-NULL?
993  LoadValueDirectFixed(rl_obj, rs_rDX);
994  GenNullCheck(rs_rDX, info->opt_flags);
995  info->opt_flags |= MIR_IGNORE_NULL_CHECK;  // Record that we've null checked.
996
997  // Does the character fit in 16 bits?
998  LIR* slowpath_branch = nullptr;
999  if (rl_char.is_const) {
1000    // We need the value in EAX.
1001    LoadConstantNoClobber(rs_rAX, char_value);
1002  } else {
1003    // Character is not a constant; compare at runtime.
1004    LoadValueDirectFixed(rl_char, rs_rAX);
1005    slowpath_branch = OpCmpImmBranch(kCondGt, rs_rAX, 0xFFFF, nullptr);
1006  }
1007
1008  // From here down, we know that we are looking for a char that fits in 16 bits.
1009  // Location of reference to data array within the String object.
1010  int value_offset = mirror::String::ValueOffset().Int32Value();
1011  // Location of count within the String object.
1012  int count_offset = mirror::String::CountOffset().Int32Value();
1013  // Starting offset within data array.
1014  int offset_offset = mirror::String::OffsetOffset().Int32Value();
1015  // Start of char data with array_.
1016  int data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value();
1017
1018  // Character is in EAX.
1019  // Object pointer is in EDX.
1020
1021  // We need to preserve EDI, but have no spare registers, so push it on the stack.
1022  // We have to remember that all stack addresses after this are offset by sizeof(EDI).
1023  NewLIR1(kX86Push32R, rs_rDI.GetReg());
1024
1025  // Compute the number of words to search in to rCX.
1026  Load32Disp(rs_rDX, count_offset, rs_rCX);
1027  LIR *length_compare = nullptr;
1028  int start_value = 0;
1029  bool is_index_on_stack = false;
1030  if (zero_based) {
1031    // We have to handle an empty string.  Use special instruction JECXZ.
1032    length_compare = NewLIR0(kX86Jecxz8);
1033  } else {
1034    rl_start = info->args[2];
1035    // We have to offset by the start index.
1036    if (rl_start.is_const) {
1037      start_value = mir_graph_->ConstantValue(rl_start.orig_sreg);
1038      start_value = std::max(start_value, 0);
1039
1040      // Is the start > count?
1041      length_compare = OpCmpImmBranch(kCondLe, rs_rCX, start_value, nullptr);
1042
1043      if (start_value != 0) {
1044        OpRegImm(kOpSub, rs_rCX, start_value);
1045      }
1046    } else {
1047      // Runtime start index.
1048      rl_start = UpdateLocTyped(rl_start, kCoreReg);
1049      if (rl_start.location == kLocPhysReg) {
1050        // Handle "start index < 0" case.
1051        OpRegReg(kOpXor, rs_rBX, rs_rBX);
1052        OpRegReg(kOpCmp, rl_start.reg, rs_rBX);
1053        OpCondRegReg(kOpCmov, kCondLt, rl_start.reg, rs_rBX);
1054
1055        // The length of the string should be greater than the start index.
1056        length_compare = OpCmpBranch(kCondLe, rs_rCX, rl_start.reg, nullptr);
1057        OpRegReg(kOpSub, rs_rCX, rl_start.reg);
1058        if (rl_start.reg == rs_rDI) {
1059          // The special case. We will use EDI further, so lets put start index to stack.
1060          NewLIR1(kX86Push32R, rs_rDI.GetReg());
1061          is_index_on_stack = true;
1062        }
1063      } else {
1064        // Load the start index from stack, remembering that we pushed EDI.
1065        int displacement = SRegOffset(rl_start.s_reg_low) + sizeof(uint32_t);
1066        Load32Disp(rs_rX86_SP, displacement, rs_rBX);
1067        OpRegReg(kOpXor, rs_rDI, rs_rDI);
1068        OpRegReg(kOpCmp, rs_rBX, rs_rDI);
1069        OpCondRegReg(kOpCmov, kCondLt, rs_rBX, rs_rDI);
1070
1071        length_compare = OpCmpBranch(kCondLe, rs_rCX, rs_rBX, nullptr);
1072        OpRegReg(kOpSub, rs_rCX, rs_rBX);
1073        // Put the start index to stack.
1074        NewLIR1(kX86Push32R, rs_rBX.GetReg());
1075        is_index_on_stack = true;
1076      }
1077    }
1078  }
1079  DCHECK(length_compare != nullptr);
1080
1081  // ECX now contains the count in words to be searched.
1082
1083  // Load the address of the string into EBX.
1084  // The string starts at VALUE(String) + 2 * OFFSET(String) + DATA_OFFSET.
1085  Load32Disp(rs_rDX, value_offset, rs_rDI);
1086  Load32Disp(rs_rDX, offset_offset, rs_rBX);
1087  OpLea(rs_rBX, rs_rDI, rs_rBX, 1, data_offset);
1088
1089  // Now compute into EDI where the search will start.
1090  if (zero_based || rl_start.is_const) {
1091    if (start_value == 0) {
1092      OpRegCopy(rs_rDI, rs_rBX);
1093    } else {
1094      NewLIR3(kX86Lea32RM, rs_rDI.GetReg(), rs_rBX.GetReg(), 2 * start_value);
1095    }
1096  } else {
1097    if (is_index_on_stack == true) {
1098      // Load the start index from stack.
1099      NewLIR1(kX86Pop32R, rs_rDX.GetReg());
1100      OpLea(rs_rDI, rs_rBX, rs_rDX, 1, 0);
1101    } else {
1102      OpLea(rs_rDI, rs_rBX, rl_start.reg, 1, 0);
1103    }
1104  }
1105
1106  // EDI now contains the start of the string to be searched.
1107  // We are all prepared to do the search for the character.
1108  NewLIR0(kX86RepneScasw);
1109
1110  // Did we find a match?
1111  LIR* failed_branch = OpCondBranch(kCondNe, nullptr);
1112
1113  // yes, we matched.  Compute the index of the result.
1114  // index = ((curr_ptr - orig_ptr) / 2) - 1.
1115  OpRegReg(kOpSub, rs_rDI, rs_rBX);
1116  OpRegImm(kOpAsr, rs_rDI, 1);
1117  NewLIR3(kX86Lea32RM, rl_return.reg.GetReg(), rs_rDI.GetReg(), -1);
1118  LIR *all_done = NewLIR1(kX86Jmp8, 0);
1119
1120  // Failed to match; return -1.
1121  LIR *not_found = NewLIR0(kPseudoTargetLabel);
1122  length_compare->target = not_found;
1123  failed_branch->target = not_found;
1124  LoadConstantNoClobber(rl_return.reg, -1);
1125
1126  // And join up at the end.
1127  all_done->target = NewLIR0(kPseudoTargetLabel);
1128  // Restore EDI from the stack.
1129  NewLIR1(kX86Pop32R, rs_rDI.GetReg());
1130
1131  // Out of line code returns here.
1132  if (slowpath_branch != nullptr) {
1133    LIR *return_point = NewLIR0(kPseudoTargetLabel);
1134    AddIntrinsicSlowPath(info, slowpath_branch, return_point);
1135  }
1136
1137  StoreValue(rl_dest, rl_return);
1138  return true;
1139}
1140
1141/*
1142 * @brief Enter an 'advance LOC' into the FDE buffer
1143 * @param buf FDE buffer.
1144 * @param increment Amount by which to increase the current location.
1145 */
1146static void AdvanceLoc(std::vector<uint8_t>&buf, uint32_t increment) {
1147  if (increment < 64) {
1148    // Encoding in opcode.
1149    buf.push_back(0x1 << 6 | increment);
1150  } else if (increment < 256) {
1151    // Single byte delta.
1152    buf.push_back(0x02);
1153    buf.push_back(increment);
1154  } else if (increment < 256 * 256) {
1155    // Two byte delta.
1156    buf.push_back(0x03);
1157    buf.push_back(increment & 0xff);
1158    buf.push_back((increment >> 8) & 0xff);
1159  } else {
1160    // Four byte delta.
1161    buf.push_back(0x04);
1162    PushWord(buf, increment);
1163  }
1164}
1165
1166
1167std::vector<uint8_t>* X86CFIInitialization() {
1168  return X86Mir2Lir::ReturnCommonCallFrameInformation();
1169}
1170
1171std::vector<uint8_t>* X86Mir2Lir::ReturnCommonCallFrameInformation() {
1172  std::vector<uint8_t>*cfi_info = new std::vector<uint8_t>;
1173
1174  // Length of the CIE (except for this field).
1175  PushWord(*cfi_info, 16);
1176
1177  // CIE id.
1178  PushWord(*cfi_info, 0xFFFFFFFFU);
1179
1180  // Version: 3.
1181  cfi_info->push_back(0x03);
1182
1183  // Augmentation: empty string.
1184  cfi_info->push_back(0x0);
1185
1186  // Code alignment: 1.
1187  cfi_info->push_back(0x01);
1188
1189  // Data alignment: -4.
1190  cfi_info->push_back(0x7C);
1191
1192  // Return address register (R8).
1193  cfi_info->push_back(0x08);
1194
1195  // Initial return PC is 4(ESP): DW_CFA_def_cfa R4 4.
1196  cfi_info->push_back(0x0C);
1197  cfi_info->push_back(0x04);
1198  cfi_info->push_back(0x04);
1199
1200  // Return address location: 0(SP): DW_CFA_offset R8 1 (* -4);.
1201  cfi_info->push_back(0x2 << 6 | 0x08);
1202  cfi_info->push_back(0x01);
1203
1204  // And 2 Noops to align to 4 byte boundary.
1205  cfi_info->push_back(0x0);
1206  cfi_info->push_back(0x0);
1207
1208  DCHECK_EQ(cfi_info->size() & 3, 0U);
1209  return cfi_info;
1210}
1211
1212static void EncodeUnsignedLeb128(std::vector<uint8_t>& buf, uint32_t value) {
1213  uint8_t buffer[12];
1214  uint8_t *ptr = EncodeUnsignedLeb128(buffer, value);
1215  for (uint8_t *p = buffer; p < ptr; p++) {
1216    buf.push_back(*p);
1217  }
1218}
1219
1220std::vector<uint8_t>* X86Mir2Lir::ReturnCallFrameInformation() {
1221  std::vector<uint8_t>*cfi_info = new std::vector<uint8_t>;
1222
1223  // Generate the FDE for the method.
1224  DCHECK_NE(data_offset_, 0U);
1225
1226  // Length (will be filled in later in this routine).
1227  PushWord(*cfi_info, 0);
1228
1229  // CIE_pointer (can be filled in by linker); might be left at 0 if there is only
1230  // one CIE for the whole debug_frame section.
1231  PushWord(*cfi_info, 0);
1232
1233  // 'initial_location' (filled in by linker).
1234  PushWord(*cfi_info, 0);
1235
1236  // 'address_range' (number of bytes in the method).
1237  PushWord(*cfi_info, data_offset_);
1238
1239  // The instructions in the FDE.
1240  if (stack_decrement_ != nullptr) {
1241    // Advance LOC to just past the stack decrement.
1242    uint32_t pc = NEXT_LIR(stack_decrement_)->offset;
1243    AdvanceLoc(*cfi_info, pc);
1244
1245    // Now update the offset to the call frame: DW_CFA_def_cfa_offset frame_size.
1246    cfi_info->push_back(0x0e);
1247    EncodeUnsignedLeb128(*cfi_info, frame_size_);
1248
1249    // We continue with that stack until the epilogue.
1250    if (stack_increment_ != nullptr) {
1251      uint32_t new_pc = NEXT_LIR(stack_increment_)->offset;
1252      AdvanceLoc(*cfi_info, new_pc - pc);
1253
1254      // We probably have code snippets after the epilogue, so save the
1255      // current state: DW_CFA_remember_state.
1256      cfi_info->push_back(0x0a);
1257
1258      // We have now popped the stack: DW_CFA_def_cfa_offset 4.  There is only the return
1259      // PC on the stack now.
1260      cfi_info->push_back(0x0e);
1261      EncodeUnsignedLeb128(*cfi_info, 4);
1262
1263      // Everything after that is the same as before the epilogue.
1264      // Stack bump was followed by RET instruction.
1265      LIR *post_ret_insn = NEXT_LIR(NEXT_LIR(stack_increment_));
1266      if (post_ret_insn != nullptr) {
1267        pc = new_pc;
1268        new_pc = post_ret_insn->offset;
1269        AdvanceLoc(*cfi_info, new_pc - pc);
1270        // Restore the state: DW_CFA_restore_state.
1271        cfi_info->push_back(0x0b);
1272      }
1273    }
1274  }
1275
1276  // Padding to a multiple of 4
1277  while ((cfi_info->size() & 3) != 0) {
1278    // DW_CFA_nop is encoded as 0.
1279    cfi_info->push_back(0);
1280  }
1281
1282  // Set the length of the FDE inside the generated bytes.
1283  uint32_t length = cfi_info->size() - 4;
1284  (*cfi_info)[0] = length;
1285  (*cfi_info)[1] = length >> 8;
1286  (*cfi_info)[2] = length >> 16;
1287  (*cfi_info)[3] = length >> 24;
1288  return cfi_info;
1289}
1290
1291void X86Mir2Lir::GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir) {
1292  switch (static_cast<ExtendedMIROpcode>(mir->dalvikInsn.opcode)) {
1293    case kMirOpConstVector:
1294      GenConst128(bb, mir);
1295      break;
1296    case kMirOpMoveVector:
1297      GenMoveVector(bb, mir);
1298      break;
1299    case kMirOpPackedMultiply:
1300      GenMultiplyVector(bb, mir);
1301      break;
1302    case kMirOpPackedAddition:
1303      GenAddVector(bb, mir);
1304      break;
1305    case kMirOpPackedSubtract:
1306      GenSubtractVector(bb, mir);
1307      break;
1308    case kMirOpPackedShiftLeft:
1309      GenShiftLeftVector(bb, mir);
1310      break;
1311    case kMirOpPackedSignedShiftRight:
1312      GenSignedShiftRightVector(bb, mir);
1313      break;
1314    case kMirOpPackedUnsignedShiftRight:
1315      GenUnsignedShiftRightVector(bb, mir);
1316      break;
1317    case kMirOpPackedAnd:
1318      GenAndVector(bb, mir);
1319      break;
1320    case kMirOpPackedOr:
1321      GenOrVector(bb, mir);
1322      break;
1323    case kMirOpPackedXor:
1324      GenXorVector(bb, mir);
1325      break;
1326    case kMirOpPackedAddReduce:
1327      GenAddReduceVector(bb, mir);
1328      break;
1329    case kMirOpPackedReduce:
1330      GenReduceVector(bb, mir);
1331      break;
1332    case kMirOpPackedSet:
1333      GenSetVector(bb, mir);
1334      break;
1335    default:
1336      break;
1337  }
1338}
1339
1340void X86Mir2Lir::GenConst128(BasicBlock* bb, MIR* mir) {
1341  int type_size = mir->dalvikInsn.vA;
1342  // We support 128 bit vectors.
1343  DCHECK_EQ(type_size & 0xFFFF, 128);
1344  RegStorage rs_dest = RegStorage::Solo128(mir->dalvikInsn.vB);
1345  uint32_t *args = mir->dalvikInsn.arg;
1346  int reg = rs_dest.GetReg();
1347  // Check for all 0 case.
1348  if (args[0] == 0 && args[1] == 0 && args[2] == 0 && args[3] == 0) {
1349    NewLIR2(kX86XorpsRR, reg, reg);
1350    return;
1351  }
1352  // Okay, load it from the constant vector area.
1353  LIR *data_target = ScanVectorLiteral(mir);
1354  if (data_target == nullptr) {
1355    data_target = AddVectorLiteral(mir);
1356  }
1357
1358  // Address the start of the method.
1359  RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
1360  rl_method = LoadValue(rl_method, kCoreReg);
1361
1362  // Load the proper value from the literal area.
1363  // We don't know the proper offset for the value, so pick one that will force
1364  // 4 byte offset.  We will fix this up in the assembler later to have the right
1365  // value.
1366  LIR *load = NewLIR3(kX86Mova128RM, reg, rl_method.reg.GetReg(),  256 /* bogus */);
1367  load->flags.fixup = kFixupLoad;
1368  load->target = data_target;
1369  SetMemRefType(load, true, kLiteral);
1370}
1371
1372void X86Mir2Lir::GenMoveVector(BasicBlock *bb, MIR *mir) {
1373  // We only support 128 bit registers.
1374  DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
1375  RegStorage rs_dest = RegStorage::Solo128(mir->dalvikInsn.vB);
1376  RegStorage rs_src = RegStorage::Solo128(mir->dalvikInsn.vC);
1377  NewLIR2(kX86Mova128RR, rs_dest.GetReg(), rs_src.GetReg());
1378}
1379
1380void X86Mir2Lir::GenMultiplyVector(BasicBlock *bb, MIR *mir) {
1381  DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
1382  OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16);
1383  RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB);
1384  RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vC);
1385  int opcode = 0;
1386  switch (opsize) {
1387    case k32:
1388      opcode = kX86PmulldRR;
1389      break;
1390    case kSignedHalf:
1391      opcode = kX86PmullwRR;
1392      break;
1393    case kSingle:
1394      opcode = kX86MulpsRR;
1395      break;
1396    case kDouble:
1397      opcode = kX86MulpdRR;
1398      break;
1399    default:
1400      LOG(FATAL) << "Unsupported vector multiply " << opsize;
1401      break;
1402  }
1403  NewLIR2(opcode, rs_dest_src1.GetReg(), rs_src2.GetReg());
1404}
1405
1406void X86Mir2Lir::GenAddVector(BasicBlock *bb, MIR *mir) {
1407  DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
1408  OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16);
1409  RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB);
1410  RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vC);
1411  int opcode = 0;
1412  switch (opsize) {
1413    case k32:
1414      opcode = kX86PadddRR;
1415      break;
1416    case kSignedHalf:
1417    case kUnsignedHalf:
1418      opcode = kX86PaddwRR;
1419      break;
1420    case kUnsignedByte:
1421    case kSignedByte:
1422      opcode = kX86PaddbRR;
1423      break;
1424    case kSingle:
1425      opcode = kX86AddpsRR;
1426      break;
1427    case kDouble:
1428      opcode = kX86AddpdRR;
1429      break;
1430    default:
1431      LOG(FATAL) << "Unsupported vector addition " << opsize;
1432      break;
1433  }
1434  NewLIR2(opcode, rs_dest_src1.GetReg(), rs_src2.GetReg());
1435}
1436
1437void X86Mir2Lir::GenSubtractVector(BasicBlock *bb, MIR *mir) {
1438  DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
1439  OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16);
1440  RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB);
1441  RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vC);
1442  int opcode = 0;
1443  switch (opsize) {
1444    case k32:
1445      opcode = kX86PsubdRR;
1446      break;
1447    case kSignedHalf:
1448    case kUnsignedHalf:
1449      opcode = kX86PsubwRR;
1450      break;
1451    case kUnsignedByte:
1452    case kSignedByte:
1453      opcode = kX86PsubbRR;
1454      break;
1455    case kSingle:
1456      opcode = kX86SubpsRR;
1457      break;
1458    case kDouble:
1459      opcode = kX86SubpdRR;
1460      break;
1461    default:
1462      LOG(FATAL) << "Unsupported vector subtraction " << opsize;
1463      break;
1464  }
1465  NewLIR2(opcode, rs_dest_src1.GetReg(), rs_src2.GetReg());
1466}
1467
1468void X86Mir2Lir::GenShiftLeftVector(BasicBlock *bb, MIR *mir) {
1469  DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
1470  OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16);
1471  RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB);
1472  int imm = mir->dalvikInsn.vC;
1473  int opcode = 0;
1474  switch (opsize) {
1475    case k32:
1476      opcode = kX86PslldRI;
1477      break;
1478    case k64:
1479      opcode = kX86PsllqRI;
1480      break;
1481    case kSignedHalf:
1482    case kUnsignedHalf:
1483      opcode = kX86PsllwRI;
1484      break;
1485    default:
1486      LOG(FATAL) << "Unsupported vector shift left " << opsize;
1487      break;
1488  }
1489  NewLIR2(opcode, rs_dest_src1.GetReg(), imm);
1490}
1491
1492void X86Mir2Lir::GenSignedShiftRightVector(BasicBlock *bb, MIR *mir) {
1493  DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
1494  OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16);
1495  RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB);
1496  int imm = mir->dalvikInsn.vC;
1497  int opcode = 0;
1498  switch (opsize) {
1499    case k32:
1500      opcode = kX86PsradRI;
1501      break;
1502    case kSignedHalf:
1503    case kUnsignedHalf:
1504      opcode = kX86PsrawRI;
1505      break;
1506    default:
1507      LOG(FATAL) << "Unsupported vector signed shift right " << opsize;
1508      break;
1509  }
1510  NewLIR2(opcode, rs_dest_src1.GetReg(), imm);
1511}
1512
1513void X86Mir2Lir::GenUnsignedShiftRightVector(BasicBlock *bb, MIR *mir) {
1514  DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
1515  OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16);
1516  RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB);
1517  int imm = mir->dalvikInsn.vC;
1518  int opcode = 0;
1519  switch (opsize) {
1520    case k32:
1521      opcode = kX86PsrldRI;
1522      break;
1523    case k64:
1524      opcode = kX86PsrlqRI;
1525      break;
1526    case kSignedHalf:
1527    case kUnsignedHalf:
1528      opcode = kX86PsrlwRI;
1529      break;
1530    default:
1531      LOG(FATAL) << "Unsupported vector unsigned shift right " << opsize;
1532      break;
1533  }
1534  NewLIR2(opcode, rs_dest_src1.GetReg(), imm);
1535}
1536
1537void X86Mir2Lir::GenAndVector(BasicBlock *bb, MIR *mir) {
1538  // We only support 128 bit registers.
1539  DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
1540  RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB);
1541  RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vC);
1542  NewLIR2(kX86PandRR, rs_dest_src1.GetReg(), rs_src2.GetReg());
1543}
1544
1545void X86Mir2Lir::GenOrVector(BasicBlock *bb, MIR *mir) {
1546  // We only support 128 bit registers.
1547  DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
1548  RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB);
1549  RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vC);
1550  NewLIR2(kX86PorRR, rs_dest_src1.GetReg(), rs_src2.GetReg());
1551}
1552
1553void X86Mir2Lir::GenXorVector(BasicBlock *bb, MIR *mir) {
1554  // We only support 128 bit registers.
1555  DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
1556  RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB);
1557  RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vC);
1558  NewLIR2(kX86PxorRR, rs_dest_src1.GetReg(), rs_src2.GetReg());
1559}
1560
1561void X86Mir2Lir::GenAddReduceVector(BasicBlock *bb, MIR *mir) {
1562  DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
1563  OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16);
1564  RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB);
1565  int imm = mir->dalvikInsn.vC;
1566  int opcode = 0;
1567  switch (opsize) {
1568    case k32:
1569      opcode = kX86PhadddRR;
1570      break;
1571    case kSignedHalf:
1572    case kUnsignedHalf:
1573      opcode = kX86PhaddwRR;
1574      break;
1575    default:
1576      LOG(FATAL) << "Unsupported vector add reduce " << opsize;
1577      break;
1578  }
1579  NewLIR2(opcode, rs_dest_src1.GetReg(), imm);
1580}
1581
1582void X86Mir2Lir::GenReduceVector(BasicBlock *bb, MIR *mir) {
1583  DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
1584  OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16);
1585  RegStorage rs_src = RegStorage::Solo128(mir->dalvikInsn.vB);
1586  int index = mir->dalvikInsn.arg[0];
1587  int opcode = 0;
1588  switch (opsize) {
1589    case k32:
1590      opcode = kX86PextrdRRI;
1591      break;
1592    case kSignedHalf:
1593    case kUnsignedHalf:
1594      opcode = kX86PextrwRRI;
1595      break;
1596    case kUnsignedByte:
1597    case kSignedByte:
1598      opcode = kX86PextrbRRI;
1599      break;
1600    default:
1601      LOG(FATAL) << "Unsupported vector reduce " << opsize;
1602      break;
1603  }
1604  // We need to extract to a GPR.
1605  RegStorage temp = AllocTemp();
1606  NewLIR3(opcode, temp.GetReg(), rs_src.GetReg(), index);
1607
1608  // Assume that the destination VR is in the def for the mir.
1609  RegLocation rl_dest = mir_graph_->GetDest(mir);
1610  RegLocation rl_temp =
1611    {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, temp, INVALID_SREG, INVALID_SREG};
1612  StoreValue(rl_dest, rl_temp);
1613}
1614
1615void X86Mir2Lir::GenSetVector(BasicBlock *bb, MIR *mir) {
1616  DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
1617  OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16);
1618  RegStorage rs_dest = RegStorage::Solo128(mir->dalvikInsn.vB);
1619  int op_low = 0, op_high = 0;
1620  switch (opsize) {
1621    case k32:
1622      op_low = kX86PshufdRRI;
1623      break;
1624    case kSignedHalf:
1625    case kUnsignedHalf:
1626      // Handles low quadword.
1627      op_low = kX86PshuflwRRI;
1628      // Handles upper quadword.
1629      op_high = kX86PshufdRRI;
1630      break;
1631    default:
1632      LOG(FATAL) << "Unsupported vector set " << opsize;
1633      break;
1634  }
1635
1636  // Load the value from the VR into a GPR.
1637  RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
1638  rl_src = LoadValue(rl_src, kCoreReg);
1639
1640  // Load the value into the XMM register.
1641  NewLIR2(kX86MovdxrRR, rs_dest.GetReg(), rl_src.reg.GetReg());
1642
1643  // Now shuffle the value across the destination.
1644  NewLIR3(op_low, rs_dest.GetReg(), rs_dest.GetReg(), 0);
1645
1646  // And then repeat as needed.
1647  if (op_high != 0) {
1648    NewLIR3(op_high, rs_dest.GetReg(), rs_dest.GetReg(), 0);
1649  }
1650}
1651
1652
1653LIR *X86Mir2Lir::ScanVectorLiteral(MIR *mir) {
1654  int *args = reinterpret_cast<int*>(mir->dalvikInsn.arg);
1655  for (LIR *p = const_vectors_; p != nullptr; p = p->next) {
1656    if (args[0] == p->operands[0] && args[1] == p->operands[1] &&
1657        args[2] == p->operands[2] && args[3] == p->operands[3]) {
1658      return p;
1659    }
1660  }
1661  return nullptr;
1662}
1663
1664LIR *X86Mir2Lir::AddVectorLiteral(MIR *mir) {
1665  LIR* new_value = static_cast<LIR*>(arena_->Alloc(sizeof(LIR), kArenaAllocData));
1666  int *args = reinterpret_cast<int*>(mir->dalvikInsn.arg);
1667  new_value->operands[0] = args[0];
1668  new_value->operands[1] = args[1];
1669  new_value->operands[2] = args[2];
1670  new_value->operands[3] = args[3];
1671  new_value->next = const_vectors_;
1672  if (const_vectors_ == nullptr) {
1673    estimated_native_code_size_ += 12;  // Amount needed to align to 16 byte boundary.
1674  }
1675  estimated_native_code_size_ += 16;  // Space for one vector.
1676  const_vectors_ = new_value;
1677  return new_value;
1678}
1679
1680}  // namespace art
1681