target_x86.cc revision 7fb36ded9cd5b1d254b63b3091f35c1e6471b90e
1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <string>
18#include <inttypes.h>
19
20#include "codegen_x86.h"
21#include "dex/compiler_internals.h"
22#include "dex/quick/mir_to_lir-inl.h"
23#include "dex/reg_storage_eq.h"
24#include "mirror/array.h"
25#include "mirror/string.h"
26#include "x86_lir.h"
27
28namespace art {
29
30static constexpr RegStorage core_regs_arr_32[] = {
31    rs_rAX, rs_rCX, rs_rDX, rs_rBX, rs_rX86_SP_32, rs_rBP, rs_rSI, rs_rDI,
32};
33static constexpr RegStorage core_regs_arr_64[] = {
34    rs_rAX, rs_rCX, rs_rDX, rs_rBX, rs_rX86_SP_32, rs_rBP, rs_rSI, rs_rDI,
35    rs_r8, rs_r9, rs_r10, rs_r11, rs_r12, rs_r13, rs_r14, rs_r15
36};
37static constexpr RegStorage core_regs_arr_64q[] = {
38    rs_r0q, rs_r1q, rs_r2q, rs_r3q, rs_rX86_SP_64, rs_r5q, rs_r6q, rs_r7q,
39    rs_r8q, rs_r9q, rs_r10q, rs_r11q, rs_r12q, rs_r13q, rs_r14q, rs_r15q
40};
41static constexpr RegStorage sp_regs_arr_32[] = {
42    rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
43};
44static constexpr RegStorage sp_regs_arr_64[] = {
45    rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
46    rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15
47};
48static constexpr RegStorage dp_regs_arr_32[] = {
49    rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
50};
51static constexpr RegStorage dp_regs_arr_64[] = {
52    rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
53    rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15
54};
55static constexpr RegStorage xp_regs_arr_32[] = {
56    rs_xr0, rs_xr1, rs_xr2, rs_xr3, rs_xr4, rs_xr5, rs_xr6, rs_xr7,
57};
58static constexpr RegStorage xp_regs_arr_64[] = {
59    rs_xr0, rs_xr1, rs_xr2, rs_xr3, rs_xr4, rs_xr5, rs_xr6, rs_xr7,
60    rs_xr8, rs_xr9, rs_xr10, rs_xr11, rs_xr12, rs_xr13, rs_xr14, rs_xr15
61};
62static constexpr RegStorage reserved_regs_arr_32[] = {rs_rX86_SP_32};
63static constexpr RegStorage reserved_regs_arr_64[] = {rs_rX86_SP_32};
64static constexpr RegStorage reserved_regs_arr_64q[] = {rs_rX86_SP_64};
65static constexpr RegStorage core_temps_arr_32[] = {rs_rAX, rs_rCX, rs_rDX, rs_rBX};
66static constexpr RegStorage core_temps_arr_64[] = {
67    rs_rAX, rs_rCX, rs_rDX, rs_rSI, rs_rDI,
68    rs_r8, rs_r9, rs_r10, rs_r11
69};
70
71// How to add register to be available for promotion:
72// 1) Remove register from array defining temp
73// 2) Update ClobberCallerSave
74// 3) Update JNI compiler ABI:
75// 3.1) add reg in JniCallingConvention method
76// 3.2) update CoreSpillMask/FpSpillMask
77// 4) Update entrypoints
78// 4.1) Update constants in asm_support_x86_64.h for new frame size
79// 4.2) Remove entry in SmashCallerSaves
80// 4.3) Update jni_entrypoints to spill/unspill new callee save reg
81// 4.4) Update quick_entrypoints to spill/unspill new callee save reg
82// 5) Update runtime ABI
83// 5.1) Update quick_method_frame_info with new required spills
84// 5.2) Update QuickArgumentVisitor with new offsets to gprs and xmms
85// Note that you cannot use register corresponding to incoming args
86// according to ABI and QCG needs one additional XMM temp for
87// bulk copy in preparation to call.
88static constexpr RegStorage core_temps_arr_64q[] = {
89    rs_r0q, rs_r1q, rs_r2q, rs_r6q, rs_r7q,
90    rs_r8q, rs_r9q, rs_r10q, rs_r11q
91};
92static constexpr RegStorage sp_temps_arr_32[] = {
93    rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
94};
95static constexpr RegStorage sp_temps_arr_64[] = {
96    rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
97    rs_fr8, rs_fr9, rs_fr10, rs_fr11
98};
99static constexpr RegStorage dp_temps_arr_32[] = {
100    rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
101};
102static constexpr RegStorage dp_temps_arr_64[] = {
103    rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
104    rs_dr8, rs_dr9, rs_dr10, rs_dr11
105};
106
107static constexpr RegStorage xp_temps_arr_32[] = {
108    rs_xr0, rs_xr1, rs_xr2, rs_xr3, rs_xr4, rs_xr5, rs_xr6, rs_xr7,
109};
110static constexpr RegStorage xp_temps_arr_64[] = {
111    rs_xr0, rs_xr1, rs_xr2, rs_xr3, rs_xr4, rs_xr5, rs_xr6, rs_xr7,
112    rs_xr8, rs_xr9, rs_xr10, rs_xr11
113};
114
115static constexpr ArrayRef<const RegStorage> empty_pool;
116static constexpr ArrayRef<const RegStorage> core_regs_32(core_regs_arr_32);
117static constexpr ArrayRef<const RegStorage> core_regs_64(core_regs_arr_64);
118static constexpr ArrayRef<const RegStorage> core_regs_64q(core_regs_arr_64q);
119static constexpr ArrayRef<const RegStorage> sp_regs_32(sp_regs_arr_32);
120static constexpr ArrayRef<const RegStorage> sp_regs_64(sp_regs_arr_64);
121static constexpr ArrayRef<const RegStorage> dp_regs_32(dp_regs_arr_32);
122static constexpr ArrayRef<const RegStorage> dp_regs_64(dp_regs_arr_64);
123static constexpr ArrayRef<const RegStorage> xp_regs_32(xp_regs_arr_32);
124static constexpr ArrayRef<const RegStorage> xp_regs_64(xp_regs_arr_64);
125static constexpr ArrayRef<const RegStorage> reserved_regs_32(reserved_regs_arr_32);
126static constexpr ArrayRef<const RegStorage> reserved_regs_64(reserved_regs_arr_64);
127static constexpr ArrayRef<const RegStorage> reserved_regs_64q(reserved_regs_arr_64q);
128static constexpr ArrayRef<const RegStorage> core_temps_32(core_temps_arr_32);
129static constexpr ArrayRef<const RegStorage> core_temps_64(core_temps_arr_64);
130static constexpr ArrayRef<const RegStorage> core_temps_64q(core_temps_arr_64q);
131static constexpr ArrayRef<const RegStorage> sp_temps_32(sp_temps_arr_32);
132static constexpr ArrayRef<const RegStorage> sp_temps_64(sp_temps_arr_64);
133static constexpr ArrayRef<const RegStorage> dp_temps_32(dp_temps_arr_32);
134static constexpr ArrayRef<const RegStorage> dp_temps_64(dp_temps_arr_64);
135
136static constexpr ArrayRef<const RegStorage> xp_temps_32(xp_temps_arr_32);
137static constexpr ArrayRef<const RegStorage> xp_temps_64(xp_temps_arr_64);
138
139RegStorage rs_rX86_SP;
140
141X86NativeRegisterPool rX86_ARG0;
142X86NativeRegisterPool rX86_ARG1;
143X86NativeRegisterPool rX86_ARG2;
144X86NativeRegisterPool rX86_ARG3;
145X86NativeRegisterPool rX86_ARG4;
146X86NativeRegisterPool rX86_ARG5;
147X86NativeRegisterPool rX86_FARG0;
148X86NativeRegisterPool rX86_FARG1;
149X86NativeRegisterPool rX86_FARG2;
150X86NativeRegisterPool rX86_FARG3;
151X86NativeRegisterPool rX86_FARG4;
152X86NativeRegisterPool rX86_FARG5;
153X86NativeRegisterPool rX86_FARG6;
154X86NativeRegisterPool rX86_FARG7;
155X86NativeRegisterPool rX86_RET0;
156X86NativeRegisterPool rX86_RET1;
157X86NativeRegisterPool rX86_INVOKE_TGT;
158X86NativeRegisterPool rX86_COUNT;
159
160RegStorage rs_rX86_ARG0;
161RegStorage rs_rX86_ARG1;
162RegStorage rs_rX86_ARG2;
163RegStorage rs_rX86_ARG3;
164RegStorage rs_rX86_ARG4;
165RegStorage rs_rX86_ARG5;
166RegStorage rs_rX86_FARG0;
167RegStorage rs_rX86_FARG1;
168RegStorage rs_rX86_FARG2;
169RegStorage rs_rX86_FARG3;
170RegStorage rs_rX86_FARG4;
171RegStorage rs_rX86_FARG5;
172RegStorage rs_rX86_FARG6;
173RegStorage rs_rX86_FARG7;
174RegStorage rs_rX86_RET0;
175RegStorage rs_rX86_RET1;
176RegStorage rs_rX86_INVOKE_TGT;
177RegStorage rs_rX86_COUNT;
178
179RegLocation X86Mir2Lir::LocCReturn() {
180  return x86_loc_c_return;
181}
182
183RegLocation X86Mir2Lir::LocCReturnRef() {
184  return cu_->target64 ? x86_64_loc_c_return_ref : x86_loc_c_return_ref;
185}
186
187RegLocation X86Mir2Lir::LocCReturnWide() {
188  return cu_->target64 ? x86_64_loc_c_return_wide : x86_loc_c_return_wide;
189}
190
191RegLocation X86Mir2Lir::LocCReturnFloat() {
192  return x86_loc_c_return_float;
193}
194
195RegLocation X86Mir2Lir::LocCReturnDouble() {
196  return x86_loc_c_return_double;
197}
198
199// Return a target-dependent special register for 32-bit.
200RegStorage X86Mir2Lir::TargetReg32(SpecialTargetRegister reg) {
201  RegStorage res_reg = RegStorage::InvalidReg();
202  switch (reg) {
203    case kSelf: res_reg = RegStorage::InvalidReg(); break;
204    case kSuspend: res_reg =  RegStorage::InvalidReg(); break;
205    case kLr: res_reg =  RegStorage::InvalidReg(); break;
206    case kPc: res_reg =  RegStorage::InvalidReg(); break;
207    case kSp: res_reg =  rs_rX86_SP; break;
208    case kArg0: res_reg = rs_rX86_ARG0; break;
209    case kArg1: res_reg = rs_rX86_ARG1; break;
210    case kArg2: res_reg = rs_rX86_ARG2; break;
211    case kArg3: res_reg = rs_rX86_ARG3; break;
212    case kArg4: res_reg = rs_rX86_ARG4; break;
213    case kArg5: res_reg = rs_rX86_ARG5; break;
214    case kFArg0: res_reg = rs_rX86_FARG0; break;
215    case kFArg1: res_reg = rs_rX86_FARG1; break;
216    case kFArg2: res_reg = rs_rX86_FARG2; break;
217    case kFArg3: res_reg = rs_rX86_FARG3; break;
218    case kFArg4: res_reg = rs_rX86_FARG4; break;
219    case kFArg5: res_reg = rs_rX86_FARG5; break;
220    case kFArg6: res_reg = rs_rX86_FARG6; break;
221    case kFArg7: res_reg = rs_rX86_FARG7; break;
222    case kRet0: res_reg = rs_rX86_RET0; break;
223    case kRet1: res_reg = rs_rX86_RET1; break;
224    case kInvokeTgt: res_reg = rs_rX86_INVOKE_TGT; break;
225    case kHiddenArg: res_reg = rs_rAX; break;
226    case kHiddenFpArg: DCHECK(!cu_->target64); res_reg = rs_fr0; break;
227    case kCount: res_reg = rs_rX86_COUNT; break;
228    default: res_reg = RegStorage::InvalidReg();
229  }
230  return res_reg;
231}
232
233RegStorage X86Mir2Lir::TargetReg(SpecialTargetRegister reg) {
234  LOG(FATAL) << "Do not use this function!!!";
235  return RegStorage::InvalidReg();
236}
237
238/*
239 * Decode the register id.
240 */
241ResourceMask X86Mir2Lir::GetRegMaskCommon(const RegStorage& reg) const {
242  /* Double registers in x86 are just a single FP register. This is always just a single bit. */
243  return ResourceMask::Bit(
244      /* FP register starts at bit position 16 */
245      ((reg.IsFloat() || reg.StorageSize() > 8) ? kX86FPReg0 : 0) + reg.GetRegNum());
246}
247
248ResourceMask X86Mir2Lir::GetPCUseDefEncoding() const {
249  /*
250   * FIXME: might make sense to use a virtual resource encoding bit for pc.  Might be
251   * able to clean up some of the x86/Arm_Mips differences
252   */
253  LOG(FATAL) << "Unexpected call to GetPCUseDefEncoding for x86";
254  return kEncodeNone;
255}
256
257void X86Mir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags,
258                                          ResourceMask* use_mask, ResourceMask* def_mask) {
259  DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64);
260  DCHECK(!lir->flags.use_def_invalid);
261
262  // X86-specific resource map setup here.
263  if (flags & REG_USE_SP) {
264    use_mask->SetBit(kX86RegSP);
265  }
266
267  if (flags & REG_DEF_SP) {
268    def_mask->SetBit(kX86RegSP);
269  }
270
271  if (flags & REG_DEFA) {
272    SetupRegMask(def_mask, rs_rAX.GetReg());
273  }
274
275  if (flags & REG_DEFD) {
276    SetupRegMask(def_mask, rs_rDX.GetReg());
277  }
278  if (flags & REG_USEA) {
279    SetupRegMask(use_mask, rs_rAX.GetReg());
280  }
281
282  if (flags & REG_USEC) {
283    SetupRegMask(use_mask, rs_rCX.GetReg());
284  }
285
286  if (flags & REG_USED) {
287    SetupRegMask(use_mask, rs_rDX.GetReg());
288  }
289
290  if (flags & REG_USEB) {
291    SetupRegMask(use_mask, rs_rBX.GetReg());
292  }
293
294  // Fixup hard to describe instruction: Uses rAX, rCX, rDI; sets rDI.
295  if (lir->opcode == kX86RepneScasw) {
296    SetupRegMask(use_mask, rs_rAX.GetReg());
297    SetupRegMask(use_mask, rs_rCX.GetReg());
298    SetupRegMask(use_mask, rs_rDI.GetReg());
299    SetupRegMask(def_mask, rs_rDI.GetReg());
300  }
301
302  if (flags & USE_FP_STACK) {
303    use_mask->SetBit(kX86FPStack);
304    def_mask->SetBit(kX86FPStack);
305  }
306}
307
308/* For dumping instructions */
309static const char* x86RegName[] = {
310  "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi",
311  "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
312};
313
314static const char* x86CondName[] = {
315  "O",
316  "NO",
317  "B/NAE/C",
318  "NB/AE/NC",
319  "Z/EQ",
320  "NZ/NE",
321  "BE/NA",
322  "NBE/A",
323  "S",
324  "NS",
325  "P/PE",
326  "NP/PO",
327  "L/NGE",
328  "NL/GE",
329  "LE/NG",
330  "NLE/G"
331};
332
333/*
334 * Interpret a format string and build a string no longer than size
335 * See format key in Assemble.cc.
336 */
337std::string X86Mir2Lir::BuildInsnString(const char *fmt, LIR *lir, unsigned char* base_addr) {
338  std::string buf;
339  size_t i = 0;
340  size_t fmt_len = strlen(fmt);
341  while (i < fmt_len) {
342    if (fmt[i] != '!') {
343      buf += fmt[i];
344      i++;
345    } else {
346      i++;
347      DCHECK_LT(i, fmt_len);
348      char operand_number_ch = fmt[i];
349      i++;
350      if (operand_number_ch == '!') {
351        buf += "!";
352      } else {
353        int operand_number = operand_number_ch - '0';
354        DCHECK_LT(operand_number, 6);  // Expect upto 6 LIR operands.
355        DCHECK_LT(i, fmt_len);
356        int operand = lir->operands[operand_number];
357        switch (fmt[i]) {
358          case 'c':
359            DCHECK_LT(static_cast<size_t>(operand), sizeof(x86CondName));
360            buf += x86CondName[operand];
361            break;
362          case 'd':
363            buf += StringPrintf("%d", operand);
364            break;
365          case 'q': {
366             int64_t value = static_cast<int64_t>(static_cast<int64_t>(operand) << 32 |
367                             static_cast<uint32_t>(lir->operands[operand_number+1]));
368             buf +=StringPrintf("%" PRId64, value);
369          }
370          case 'p': {
371            EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(operand));
372            buf += StringPrintf("0x%08x", tab_rec->offset);
373            break;
374          }
375          case 'r':
376            if (RegStorage::IsFloat(operand)) {
377              int fp_reg = RegStorage::RegNum(operand);
378              buf += StringPrintf("xmm%d", fp_reg);
379            } else {
380              int reg_num = RegStorage::RegNum(operand);
381              DCHECK_LT(static_cast<size_t>(reg_num), sizeof(x86RegName));
382              buf += x86RegName[reg_num];
383            }
384            break;
385          case 't':
386            buf += StringPrintf("0x%08" PRIxPTR " (L%p)",
387                                reinterpret_cast<uintptr_t>(base_addr) + lir->offset + operand,
388                                lir->target);
389            break;
390          default:
391            buf += StringPrintf("DecodeError '%c'", fmt[i]);
392            break;
393        }
394        i++;
395      }
396    }
397  }
398  return buf;
399}
400
401void X86Mir2Lir::DumpResourceMask(LIR *x86LIR, const ResourceMask& mask, const char *prefix) {
402  char buf[256];
403  buf[0] = 0;
404
405  if (mask.Equals(kEncodeAll)) {
406    strcpy(buf, "all");
407  } else {
408    char num[8];
409    int i;
410
411    for (i = 0; i < kX86RegEnd; i++) {
412      if (mask.HasBit(i)) {
413        snprintf(num, arraysize(num), "%d ", i);
414        strcat(buf, num);
415      }
416    }
417
418    if (mask.HasBit(ResourceMask::kCCode)) {
419      strcat(buf, "cc ");
420    }
421    /* Memory bits */
422    if (x86LIR && (mask.HasBit(ResourceMask::kDalvikReg))) {
423      snprintf(buf + strlen(buf), arraysize(buf) - strlen(buf), "dr%d%s",
424               DECODE_ALIAS_INFO_REG(x86LIR->flags.alias_info),
425               (DECODE_ALIAS_INFO_WIDE(x86LIR->flags.alias_info)) ? "(+1)" : "");
426    }
427    if (mask.HasBit(ResourceMask::kLiteral)) {
428      strcat(buf, "lit ");
429    }
430
431    if (mask.HasBit(ResourceMask::kHeapRef)) {
432      strcat(buf, "heap ");
433    }
434    if (mask.HasBit(ResourceMask::kMustNotAlias)) {
435      strcat(buf, "noalias ");
436    }
437  }
438  if (buf[0]) {
439    LOG(INFO) << prefix << ": " <<  buf;
440  }
441}
442
443void X86Mir2Lir::AdjustSpillMask() {
444  // Adjustment for LR spilling, x86 has no LR so nothing to do here
445  core_spill_mask_ |= (1 << rs_rRET.GetRegNum());
446  num_core_spills_++;
447}
448
449RegStorage X86Mir2Lir::AllocateByteRegister() {
450  RegStorage reg = AllocTypedTemp(false, kCoreReg);
451  if (!cu_->target64) {
452    DCHECK_LT(reg.GetRegNum(), rs_rX86_SP.GetRegNum());
453  }
454  return reg;
455}
456
457RegStorage X86Mir2Lir::Get128BitRegister(RegStorage reg) {
458  return GetRegInfo(reg)->FindMatchingView(RegisterInfo::k128SoloStorageMask)->GetReg();
459}
460
461bool X86Mir2Lir::IsByteRegister(RegStorage reg) {
462  return cu_->target64 || reg.GetRegNum() < rs_rX86_SP.GetRegNum();
463}
464
465/* Clobber all regs that might be used by an external C call */
466void X86Mir2Lir::ClobberCallerSave() {
467  if (cu_->target64) {
468    Clobber(rs_rAX);
469    Clobber(rs_rCX);
470    Clobber(rs_rDX);
471    Clobber(rs_rSI);
472    Clobber(rs_rDI);
473
474    Clobber(rs_r8);
475    Clobber(rs_r9);
476    Clobber(rs_r10);
477    Clobber(rs_r11);
478
479    Clobber(rs_fr8);
480    Clobber(rs_fr9);
481    Clobber(rs_fr10);
482    Clobber(rs_fr11);
483  } else {
484    Clobber(rs_rAX);
485    Clobber(rs_rCX);
486    Clobber(rs_rDX);
487    Clobber(rs_rBX);
488  }
489
490  Clobber(rs_fr0);
491  Clobber(rs_fr1);
492  Clobber(rs_fr2);
493  Clobber(rs_fr3);
494  Clobber(rs_fr4);
495  Clobber(rs_fr5);
496  Clobber(rs_fr6);
497  Clobber(rs_fr7);
498}
499
500RegLocation X86Mir2Lir::GetReturnWideAlt() {
501  RegLocation res = LocCReturnWide();
502  DCHECK(res.reg.GetLowReg() == rs_rAX.GetReg());
503  DCHECK(res.reg.GetHighReg() == rs_rDX.GetReg());
504  Clobber(rs_rAX);
505  Clobber(rs_rDX);
506  MarkInUse(rs_rAX);
507  MarkInUse(rs_rDX);
508  MarkWide(res.reg);
509  return res;
510}
511
512RegLocation X86Mir2Lir::GetReturnAlt() {
513  RegLocation res = LocCReturn();
514  res.reg.SetReg(rs_rDX.GetReg());
515  Clobber(rs_rDX);
516  MarkInUse(rs_rDX);
517  return res;
518}
519
520/* To be used when explicitly managing register use */
521void X86Mir2Lir::LockCallTemps() {
522  LockTemp(rs_rX86_ARG0);
523  LockTemp(rs_rX86_ARG1);
524  LockTemp(rs_rX86_ARG2);
525  LockTemp(rs_rX86_ARG3);
526  if (cu_->target64) {
527    LockTemp(rs_rX86_ARG4);
528    LockTemp(rs_rX86_ARG5);
529    LockTemp(rs_rX86_FARG0);
530    LockTemp(rs_rX86_FARG1);
531    LockTemp(rs_rX86_FARG2);
532    LockTemp(rs_rX86_FARG3);
533    LockTemp(rs_rX86_FARG4);
534    LockTemp(rs_rX86_FARG5);
535    LockTemp(rs_rX86_FARG6);
536    LockTemp(rs_rX86_FARG7);
537  }
538}
539
540/* To be used when explicitly managing register use */
541void X86Mir2Lir::FreeCallTemps() {
542  FreeTemp(rs_rX86_ARG0);
543  FreeTemp(rs_rX86_ARG1);
544  FreeTemp(rs_rX86_ARG2);
545  FreeTemp(rs_rX86_ARG3);
546  if (cu_->target64) {
547    FreeTemp(rs_rX86_ARG4);
548    FreeTemp(rs_rX86_ARG5);
549    FreeTemp(rs_rX86_FARG0);
550    FreeTemp(rs_rX86_FARG1);
551    FreeTemp(rs_rX86_FARG2);
552    FreeTemp(rs_rX86_FARG3);
553    FreeTemp(rs_rX86_FARG4);
554    FreeTemp(rs_rX86_FARG5);
555    FreeTemp(rs_rX86_FARG6);
556    FreeTemp(rs_rX86_FARG7);
557  }
558}
559
560bool X86Mir2Lir::ProvidesFullMemoryBarrier(X86OpCode opcode) {
561    switch (opcode) {
562      case kX86LockCmpxchgMR:
563      case kX86LockCmpxchgAR:
564      case kX86LockCmpxchg64M:
565      case kX86LockCmpxchg64A:
566      case kX86XchgMR:
567      case kX86Mfence:
568        // Atomic memory instructions provide full barrier.
569        return true;
570      default:
571        break;
572    }
573
574    // Conservative if cannot prove it provides full barrier.
575    return false;
576}
577
578bool X86Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
579#if ANDROID_SMP != 0
580  // Start off with using the last LIR as the barrier. If it is not enough, then we will update it.
581  LIR* mem_barrier = last_lir_insn_;
582
583  bool ret = false;
584  /*
585   * According to the JSR-133 Cookbook, for x86 only StoreLoad barriers need memory fence. All other barriers
586   * (LoadLoad, LoadStore, StoreStore) are nops due to the x86 memory model. For those cases, all we need
587   * to ensure is that there is a scheduling barrier in place.
588   */
589  if (barrier_kind == kStoreLoad) {
590    // If no LIR exists already that can be used a barrier, then generate an mfence.
591    if (mem_barrier == nullptr) {
592      mem_barrier = NewLIR0(kX86Mfence);
593      ret = true;
594    }
595
596    // If last instruction does not provide full barrier, then insert an mfence.
597    if (ProvidesFullMemoryBarrier(static_cast<X86OpCode>(mem_barrier->opcode)) == false) {
598      mem_barrier = NewLIR0(kX86Mfence);
599      ret = true;
600    }
601  }
602
603  // Now ensure that a scheduling barrier is in place.
604  if (mem_barrier == nullptr) {
605    GenBarrier();
606  } else {
607    // Mark as a scheduling barrier.
608    DCHECK(!mem_barrier->flags.use_def_invalid);
609    mem_barrier->u.m.def_mask = &kEncodeAll;
610  }
611  return ret;
612#else
613  return false;
614#endif
615}
616
617void X86Mir2Lir::CompilerInitializeRegAlloc() {
618  if (cu_->target64) {
619    reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs_64, core_regs_64q, sp_regs_64,
620                                          dp_regs_64, reserved_regs_64, reserved_regs_64q,
621                                          core_temps_64, core_temps_64q, sp_temps_64, dp_temps_64);
622  } else {
623    reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs_32, empty_pool, sp_regs_32,
624                                          dp_regs_32, reserved_regs_32, empty_pool,
625                                          core_temps_32, empty_pool, sp_temps_32, dp_temps_32);
626  }
627
628  // Target-specific adjustments.
629
630  // Add in XMM registers.
631  const ArrayRef<const RegStorage> *xp_regs = cu_->target64 ? &xp_regs_64 : &xp_regs_32;
632  for (RegStorage reg : *xp_regs) {
633    RegisterInfo* info = new (arena_) RegisterInfo(reg, GetRegMaskCommon(reg));
634    reginfo_map_.Put(reg.GetReg(), info);
635  }
636  const ArrayRef<const RegStorage> *xp_temps = cu_->target64 ? &xp_temps_64 : &xp_temps_32;
637  for (RegStorage reg : *xp_temps) {
638    RegisterInfo* xp_reg_info = GetRegInfo(reg);
639    xp_reg_info->SetIsTemp(true);
640  }
641
642  // Alias single precision xmm to double xmms.
643  // TODO: as needed, add larger vector sizes - alias all to the largest.
644  GrowableArray<RegisterInfo*>::Iterator it(&reg_pool_->sp_regs_);
645  for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
646    int sp_reg_num = info->GetReg().GetRegNum();
647    RegStorage xp_reg = RegStorage::Solo128(sp_reg_num);
648    RegisterInfo* xp_reg_info = GetRegInfo(xp_reg);
649    // 128-bit xmm vector register's master storage should refer to itself.
650    DCHECK_EQ(xp_reg_info, xp_reg_info->Master());
651
652    // Redirect 32-bit vector's master storage to 128-bit vector.
653    info->SetMaster(xp_reg_info);
654
655    RegStorage dp_reg = RegStorage::FloatSolo64(sp_reg_num);
656    RegisterInfo* dp_reg_info = GetRegInfo(dp_reg);
657    // Redirect 64-bit vector's master storage to 128-bit vector.
658    dp_reg_info->SetMaster(xp_reg_info);
659    // Singles should show a single 32-bit mask bit, at first referring to the low half.
660    DCHECK_EQ(info->StorageMask(), 0x1U);
661  }
662
663  if (cu_->target64) {
664    // Alias 32bit W registers to corresponding 64bit X registers.
665    GrowableArray<RegisterInfo*>::Iterator w_it(&reg_pool_->core_regs_);
666    for (RegisterInfo* info = w_it.Next(); info != nullptr; info = w_it.Next()) {
667      int x_reg_num = info->GetReg().GetRegNum();
668      RegStorage x_reg = RegStorage::Solo64(x_reg_num);
669      RegisterInfo* x_reg_info = GetRegInfo(x_reg);
670      // 64bit X register's master storage should refer to itself.
671      DCHECK_EQ(x_reg_info, x_reg_info->Master());
672      // Redirect 32bit W master storage to 64bit X.
673      info->SetMaster(x_reg_info);
674      // 32bit W should show a single 32-bit mask bit, at first referring to the low half.
675      DCHECK_EQ(info->StorageMask(), 0x1U);
676    }
677  }
678
679  // Don't start allocating temps at r0/s0/d0 or you may clobber return regs in early-exit methods.
680  // TODO: adjust for x86/hard float calling convention.
681  reg_pool_->next_core_reg_ = 2;
682  reg_pool_->next_sp_reg_ = 2;
683  reg_pool_->next_dp_reg_ = 1;
684}
685
686int X86Mir2Lir::VectorRegisterSize() {
687  return 128;
688}
689
690int X86Mir2Lir::NumReservableVectorRegisters(bool fp_used) {
691  return fp_used ? 5 : 7;
692}
693
694void X86Mir2Lir::SpillCoreRegs() {
695  if (num_core_spills_ == 0) {
696    return;
697  }
698  // Spill mask not including fake return address register
699  uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum());
700  int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_);
701  OpSize size = cu_->target64 ? k64 : k32;
702  for (int reg = 0; mask; mask >>= 1, reg++) {
703    if (mask & 0x1) {
704      StoreBaseDisp(rs_rX86_SP, offset, cu_->target64 ? RegStorage::Solo64(reg) :  RegStorage::Solo32(reg),
705                   size, kNotVolatile);
706      offset += GetInstructionSetPointerSize(cu_->instruction_set);
707    }
708  }
709}
710
711void X86Mir2Lir::UnSpillCoreRegs() {
712  if (num_core_spills_ == 0) {
713    return;
714  }
715  // Spill mask not including fake return address register
716  uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum());
717  int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_);
718  OpSize size = cu_->target64 ? k64 : k32;
719  for (int reg = 0; mask; mask >>= 1, reg++) {
720    if (mask & 0x1) {
721      LoadBaseDisp(rs_rX86_SP, offset, cu_->target64 ? RegStorage::Solo64(reg) :  RegStorage::Solo32(reg),
722                   size, kNotVolatile);
723      offset += GetInstructionSetPointerSize(cu_->instruction_set);
724    }
725  }
726}
727
728void X86Mir2Lir::SpillFPRegs() {
729  if (num_fp_spills_ == 0) {
730    return;
731  }
732  uint32_t mask = fp_spill_mask_;
733  int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * (num_fp_spills_ + num_core_spills_));
734  for (int reg = 0; mask; mask >>= 1, reg++) {
735    if (mask & 0x1) {
736      StoreBaseDisp(rs_rX86_SP, offset, RegStorage::FloatSolo64(reg),
737                   k64, kNotVolatile);
738      offset += sizeof(double);
739    }
740  }
741}
742void X86Mir2Lir::UnSpillFPRegs() {
743  if (num_fp_spills_ == 0) {
744    return;
745  }
746  uint32_t mask = fp_spill_mask_;
747  int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * (num_fp_spills_ + num_core_spills_));
748  for (int reg = 0; mask; mask >>= 1, reg++) {
749    if (mask & 0x1) {
750      LoadBaseDisp(rs_rX86_SP, offset, RegStorage::FloatSolo64(reg),
751                   k64, kNotVolatile);
752      offset += sizeof(double);
753    }
754  }
755}
756
757
758bool X86Mir2Lir::IsUnconditionalBranch(LIR* lir) {
759  return (lir->opcode == kX86Jmp8 || lir->opcode == kX86Jmp32);
760}
761
762bool X86Mir2Lir::SupportsVolatileLoadStore(OpSize size) {
763  return true;
764}
765
766RegisterClass X86Mir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volatile) {
767  // X86_64 can handle any size.
768  if (cu_->target64) {
769    if (size == kReference) {
770      return kRefReg;
771    }
772    return kCoreReg;
773  }
774
775  if (UNLIKELY(is_volatile)) {
776    // On x86, atomic 64-bit load/store requires an fp register.
777    // Smaller aligned load/store is atomic for both core and fp registers.
778    if (size == k64 || size == kDouble) {
779      return kFPReg;
780    }
781  }
782  return RegClassBySize(size);
783}
784
785X86Mir2Lir::X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena)
786    : Mir2Lir(cu, mir_graph, arena),
787      base_of_code_(nullptr), store_method_addr_(false), store_method_addr_used_(false),
788      method_address_insns_(arena, 100, kGrowableArrayMisc),
789      class_type_address_insns_(arena, 100, kGrowableArrayMisc),
790      call_method_insns_(arena, 100, kGrowableArrayMisc),
791      stack_decrement_(nullptr), stack_increment_(nullptr),
792      const_vectors_(nullptr) {
793  store_method_addr_used_ = false;
794  if (kIsDebugBuild) {
795    for (int i = 0; i < kX86Last; i++) {
796      if (X86Mir2Lir::EncodingMap[i].opcode != i) {
797        LOG(FATAL) << "Encoding order for " << X86Mir2Lir::EncodingMap[i].name
798                   << " is wrong: expecting " << i << ", seeing "
799                   << static_cast<int>(X86Mir2Lir::EncodingMap[i].opcode);
800      }
801    }
802  }
803  if (cu_->target64) {
804    rs_rX86_SP = rs_rX86_SP_64;
805
806    rs_rX86_ARG0 = rs_rDI;
807    rs_rX86_ARG1 = rs_rSI;
808    rs_rX86_ARG2 = rs_rDX;
809    rs_rX86_ARG3 = rs_rCX;
810    rs_rX86_ARG4 = rs_r8;
811    rs_rX86_ARG5 = rs_r9;
812    rs_rX86_FARG0 = rs_fr0;
813    rs_rX86_FARG1 = rs_fr1;
814    rs_rX86_FARG2 = rs_fr2;
815    rs_rX86_FARG3 = rs_fr3;
816    rs_rX86_FARG4 = rs_fr4;
817    rs_rX86_FARG5 = rs_fr5;
818    rs_rX86_FARG6 = rs_fr6;
819    rs_rX86_FARG7 = rs_fr7;
820    rX86_ARG0 = rDI;
821    rX86_ARG1 = rSI;
822    rX86_ARG2 = rDX;
823    rX86_ARG3 = rCX;
824    rX86_ARG4 = r8;
825    rX86_ARG5 = r9;
826    rX86_FARG0 = fr0;
827    rX86_FARG1 = fr1;
828    rX86_FARG2 = fr2;
829    rX86_FARG3 = fr3;
830    rX86_FARG4 = fr4;
831    rX86_FARG5 = fr5;
832    rX86_FARG6 = fr6;
833    rX86_FARG7 = fr7;
834    rs_rX86_INVOKE_TGT = rs_rDI;
835  } else {
836    rs_rX86_SP = rs_rX86_SP_32;
837
838    rs_rX86_ARG0 = rs_rAX;
839    rs_rX86_ARG1 = rs_rCX;
840    rs_rX86_ARG2 = rs_rDX;
841    rs_rX86_ARG3 = rs_rBX;
842    rs_rX86_ARG4 = RegStorage::InvalidReg();
843    rs_rX86_ARG5 = RegStorage::InvalidReg();
844    rs_rX86_FARG0 = rs_rAX;
845    rs_rX86_FARG1 = rs_rCX;
846    rs_rX86_FARG2 = rs_rDX;
847    rs_rX86_FARG3 = rs_rBX;
848    rs_rX86_FARG4 = RegStorage::InvalidReg();
849    rs_rX86_FARG5 = RegStorage::InvalidReg();
850    rs_rX86_FARG6 = RegStorage::InvalidReg();
851    rs_rX86_FARG7 = RegStorage::InvalidReg();
852    rX86_ARG0 = rAX;
853    rX86_ARG1 = rCX;
854    rX86_ARG2 = rDX;
855    rX86_ARG3 = rBX;
856    rX86_FARG0 = rAX;
857    rX86_FARG1 = rCX;
858    rX86_FARG2 = rDX;
859    rX86_FARG3 = rBX;
860    rs_rX86_INVOKE_TGT = rs_rAX;
861    // TODO(64): Initialize with invalid reg
862//    rX86_ARG4 = RegStorage::InvalidReg();
863//    rX86_ARG5 = RegStorage::InvalidReg();
864  }
865  rs_rX86_RET0 = rs_rAX;
866  rs_rX86_RET1 = rs_rDX;
867  rs_rX86_COUNT = rs_rCX;
868  rX86_RET0 = rAX;
869  rX86_RET1 = rDX;
870  rX86_INVOKE_TGT = rAX;
871  rX86_COUNT = rCX;
872
873  // Initialize the number of reserved vector registers
874  num_reserved_vector_regs_ = -1;
875}
876
877Mir2Lir* X86CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
878                          ArenaAllocator* const arena) {
879  return new X86Mir2Lir(cu, mir_graph, arena);
880}
881
882// Not used in x86
883RegStorage X86Mir2Lir::LoadHelper(ThreadOffset<4> offset) {
884  LOG(FATAL) << "Unexpected use of LoadHelper in x86";
885  return RegStorage::InvalidReg();
886}
887
888// Not used in x86
889RegStorage X86Mir2Lir::LoadHelper(ThreadOffset<8> offset) {
890  LOG(FATAL) << "Unexpected use of LoadHelper in x86";
891  return RegStorage::InvalidReg();
892}
893
894LIR* X86Mir2Lir::CheckSuspendUsingLoad() {
895  // First load the pointer in fs:[suspend-trigger] into eax
896  // Then use a test instruction to indirect via that address.
897  NewLIR2(kX86Mov32RT, rs_rAX.GetReg(),   cu_->target64 ?
898      Thread::ThreadSuspendTriggerOffset<8>().Int32Value() :
899      Thread::ThreadSuspendTriggerOffset<4>().Int32Value());
900  return NewLIR3(kX86Test32RM, rs_rAX.GetReg(), rs_rAX.GetReg(), 0);
901}
902
903uint64_t X86Mir2Lir::GetTargetInstFlags(int opcode) {
904  DCHECK(!IsPseudoLirOp(opcode));
905  return X86Mir2Lir::EncodingMap[opcode].flags;
906}
907
908const char* X86Mir2Lir::GetTargetInstName(int opcode) {
909  DCHECK(!IsPseudoLirOp(opcode));
910  return X86Mir2Lir::EncodingMap[opcode].name;
911}
912
913const char* X86Mir2Lir::GetTargetInstFmt(int opcode) {
914  DCHECK(!IsPseudoLirOp(opcode));
915  return X86Mir2Lir::EncodingMap[opcode].fmt;
916}
917
918void X86Mir2Lir::GenConstWide(RegLocation rl_dest, int64_t value) {
919  // Can we do this directly to memory?
920  rl_dest = UpdateLocWide(rl_dest);
921  if ((rl_dest.location == kLocDalvikFrame) ||
922      (rl_dest.location == kLocCompilerTemp)) {
923    int32_t val_lo = Low32Bits(value);
924    int32_t val_hi = High32Bits(value);
925    int r_base = rs_rX86_SP.GetReg();
926    int displacement = SRegOffset(rl_dest.s_reg_low);
927
928    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
929    LIR * store = NewLIR3(kX86Mov32MI, r_base, displacement + LOWORD_OFFSET, val_lo);
930    AnnotateDalvikRegAccess(store, (displacement + LOWORD_OFFSET) >> 2,
931                              false /* is_load */, true /* is64bit */);
932    store = NewLIR3(kX86Mov32MI, r_base, displacement + HIWORD_OFFSET, val_hi);
933    AnnotateDalvikRegAccess(store, (displacement + HIWORD_OFFSET) >> 2,
934                              false /* is_load */, true /* is64bit */);
935    return;
936  }
937
938  // Just use the standard code to do the generation.
939  Mir2Lir::GenConstWide(rl_dest, value);
940}
941
942// TODO: Merge with existing RegLocation dumper in vreg_analysis.cc
943void X86Mir2Lir::DumpRegLocation(RegLocation loc) {
944  LOG(INFO)  << "location: " << loc.location << ','
945             << (loc.wide ? " w" : "  ")
946             << (loc.defined ? " D" : "  ")
947             << (loc.is_const ? " c" : "  ")
948             << (loc.fp ? " F" : "  ")
949             << (loc.core ? " C" : "  ")
950             << (loc.ref ? " r" : "  ")
951             << (loc.high_word ? " h" : "  ")
952             << (loc.home ? " H" : "  ")
953             << ", low: " << static_cast<int>(loc.reg.GetLowReg())
954             << ", high: " << static_cast<int>(loc.reg.GetHighReg())
955             << ", s_reg: " << loc.s_reg_low
956             << ", orig: " << loc.orig_sreg;
957}
958
959void X86Mir2Lir::Materialize() {
960  // A good place to put the analysis before starting.
961  AnalyzeMIR();
962
963  // Now continue with regular code generation.
964  Mir2Lir::Materialize();
965}
966
967void X86Mir2Lir::LoadMethodAddress(const MethodReference& target_method, InvokeType type,
968                                   SpecialTargetRegister symbolic_reg) {
969  /*
970   * For x86, just generate a 32 bit move immediate instruction, that will be filled
971   * in at 'link time'.  For now, put a unique value based on target to ensure that
972   * code deduplication works.
973   */
974  int target_method_idx = target_method.dex_method_index;
975  const DexFile* target_dex_file = target_method.dex_file;
976  const DexFile::MethodId& target_method_id = target_dex_file->GetMethodId(target_method_idx);
977  uintptr_t target_method_id_ptr = reinterpret_cast<uintptr_t>(&target_method_id);
978
979  // Generate the move instruction with the unique pointer and save index, dex_file, and type.
980  LIR *move = RawLIR(current_dalvik_offset_, kX86Mov32RI, TargetReg(symbolic_reg, false).GetReg(),
981                     static_cast<int>(target_method_id_ptr), target_method_idx,
982                     WrapPointer(const_cast<DexFile*>(target_dex_file)), type);
983  AppendLIR(move);
984  method_address_insns_.Insert(move);
985}
986
987void X86Mir2Lir::LoadClassType(uint32_t type_idx, SpecialTargetRegister symbolic_reg) {
988  /*
989   * For x86, just generate a 32 bit move immediate instruction, that will be filled
990   * in at 'link time'.  For now, put a unique value based on target to ensure that
991   * code deduplication works.
992   */
993  const DexFile::TypeId& id = cu_->dex_file->GetTypeId(type_idx);
994  uintptr_t ptr = reinterpret_cast<uintptr_t>(&id);
995
996  // Generate the move instruction with the unique pointer and save index and type.
997  LIR *move = RawLIR(current_dalvik_offset_, kX86Mov32RI, TargetReg(symbolic_reg, false).GetReg(),
998                     static_cast<int>(ptr), type_idx);
999  AppendLIR(move);
1000  class_type_address_insns_.Insert(move);
1001}
1002
1003LIR *X86Mir2Lir::CallWithLinkerFixup(const MethodReference& target_method, InvokeType type) {
1004  /*
1005   * For x86, just generate a 32 bit call relative instruction, that will be filled
1006   * in at 'link time'.  For now, put a unique value based on target to ensure that
1007   * code deduplication works.
1008   */
1009  int target_method_idx = target_method.dex_method_index;
1010  const DexFile* target_dex_file = target_method.dex_file;
1011  const DexFile::MethodId& target_method_id = target_dex_file->GetMethodId(target_method_idx);
1012  uintptr_t target_method_id_ptr = reinterpret_cast<uintptr_t>(&target_method_id);
1013
1014  // Generate the call instruction with the unique pointer and save index, dex_file, and type.
1015  LIR *call = RawLIR(current_dalvik_offset_, kX86CallI, static_cast<int>(target_method_id_ptr),
1016                     target_method_idx, WrapPointer(const_cast<DexFile*>(target_dex_file)), type);
1017  AppendLIR(call);
1018  call_method_insns_.Insert(call);
1019  return call;
1020}
1021
1022/*
1023 * @brief Enter a 32 bit quantity into a buffer
1024 * @param buf buffer.
1025 * @param data Data value.
1026 */
1027
1028static void PushWord(std::vector<uint8_t>&buf, int32_t data) {
1029  buf.push_back(data & 0xff);
1030  buf.push_back((data >> 8) & 0xff);
1031  buf.push_back((data >> 16) & 0xff);
1032  buf.push_back((data >> 24) & 0xff);
1033}
1034
1035void X86Mir2Lir::InstallLiteralPools() {
1036  // These are handled differently for x86.
1037  DCHECK(code_literal_list_ == nullptr);
1038  DCHECK(method_literal_list_ == nullptr);
1039  DCHECK(class_literal_list_ == nullptr);
1040
1041  // Align to 16 byte boundary.  We have implicit knowledge that the start of the method is
1042  // on a 4 byte boundary.   How can I check this if it changes (other than aligned loads
1043  // will fail at runtime)?
1044  if (const_vectors_ != nullptr) {
1045    int align_size = (16-4) - (code_buffer_.size() & 0xF);
1046    if (align_size < 0) {
1047      align_size += 16;
1048    }
1049
1050    while (align_size > 0) {
1051      code_buffer_.push_back(0);
1052      align_size--;
1053    }
1054    for (LIR *p = const_vectors_; p != nullptr; p = p->next) {
1055      PushWord(code_buffer_, p->operands[0]);
1056      PushWord(code_buffer_, p->operands[1]);
1057      PushWord(code_buffer_, p->operands[2]);
1058      PushWord(code_buffer_, p->operands[3]);
1059    }
1060  }
1061
1062  // Handle the fixups for methods.
1063  for (uint32_t i = 0; i < method_address_insns_.Size(); i++) {
1064      LIR* p = method_address_insns_.Get(i);
1065      DCHECK_EQ(p->opcode, kX86Mov32RI);
1066      uint32_t target_method_idx = p->operands[2];
1067      const DexFile* target_dex_file =
1068          reinterpret_cast<const DexFile*>(UnwrapPointer(p->operands[3]));
1069
1070      // The offset to patch is the last 4 bytes of the instruction.
1071      int patch_offset = p->offset + p->flags.size - 4;
1072      cu_->compiler_driver->AddMethodPatch(cu_->dex_file, cu_->class_def_idx,
1073                                           cu_->method_idx, cu_->invoke_type,
1074                                           target_method_idx, target_dex_file,
1075                                           static_cast<InvokeType>(p->operands[4]),
1076                                           patch_offset);
1077  }
1078
1079  // Handle the fixups for class types.
1080  for (uint32_t i = 0; i < class_type_address_insns_.Size(); i++) {
1081      LIR* p = class_type_address_insns_.Get(i);
1082      DCHECK_EQ(p->opcode, kX86Mov32RI);
1083      uint32_t target_method_idx = p->operands[2];
1084
1085      // The offset to patch is the last 4 bytes of the instruction.
1086      int patch_offset = p->offset + p->flags.size - 4;
1087      cu_->compiler_driver->AddClassPatch(cu_->dex_file, cu_->class_def_idx,
1088                                          cu_->method_idx, target_method_idx, patch_offset);
1089  }
1090
1091  // And now the PC-relative calls to methods.
1092  for (uint32_t i = 0; i < call_method_insns_.Size(); i++) {
1093      LIR* p = call_method_insns_.Get(i);
1094      DCHECK_EQ(p->opcode, kX86CallI);
1095      uint32_t target_method_idx = p->operands[1];
1096      const DexFile* target_dex_file =
1097          reinterpret_cast<const DexFile*>(UnwrapPointer(p->operands[2]));
1098
1099      // The offset to patch is the last 4 bytes of the instruction.
1100      int patch_offset = p->offset + p->flags.size - 4;
1101      cu_->compiler_driver->AddRelativeCodePatch(cu_->dex_file, cu_->class_def_idx,
1102                                                 cu_->method_idx, cu_->invoke_type,
1103                                                 target_method_idx, target_dex_file,
1104                                                 static_cast<InvokeType>(p->operands[3]),
1105                                                 patch_offset, -4 /* offset */);
1106  }
1107
1108  // And do the normal processing.
1109  Mir2Lir::InstallLiteralPools();
1110}
1111
1112bool X86Mir2Lir::GenInlinedArrayCopyCharArray(CallInfo* info) {
1113  if (cu_->target64) {
1114    // TODO: Implement ArrayCOpy intrinsic for x86_64
1115    return false;
1116  }
1117
1118  RegLocation rl_src = info->args[0];
1119  RegLocation rl_srcPos = info->args[1];
1120  RegLocation rl_dst = info->args[2];
1121  RegLocation rl_dstPos = info->args[3];
1122  RegLocation rl_length = info->args[4];
1123  if (rl_srcPos.is_const && (mir_graph_->ConstantValue(rl_srcPos) < 0)) {
1124    return false;
1125  }
1126  if (rl_dstPos.is_const && (mir_graph_->ConstantValue(rl_dstPos) < 0)) {
1127    return false;
1128  }
1129  ClobberCallerSave();
1130  LockCallTemps();  // Using fixed registers
1131  LoadValueDirectFixed(rl_src , rs_rAX);
1132  LoadValueDirectFixed(rl_dst , rs_rCX);
1133  LIR* src_dst_same  = OpCmpBranch(kCondEq, rs_rAX , rs_rCX, nullptr);
1134  LIR* src_null_branch = OpCmpImmBranch(kCondEq, rs_rAX , 0, nullptr);
1135  LIR* dst_null_branch = OpCmpImmBranch(kCondEq, rs_rCX , 0, nullptr);
1136  LoadValueDirectFixed(rl_length , rs_rDX);
1137  LIR* len_negative  = OpCmpImmBranch(kCondLt, rs_rDX , 0, nullptr);
1138  LIR* len_too_big  = OpCmpImmBranch(kCondGt, rs_rDX , 128, nullptr);
1139  LoadValueDirectFixed(rl_src , rs_rAX);
1140  LoadWordDisp(rs_rAX , mirror::Array::LengthOffset().Int32Value(), rs_rAX);
1141  LIR* src_bad_len  = nullptr;
1142  LIR* srcPos_negative  = nullptr;
1143  if (!rl_srcPos.is_const) {
1144    LoadValueDirectFixed(rl_srcPos , rs_rBX);
1145    srcPos_negative  = OpCmpImmBranch(kCondLt, rs_rBX , 0, nullptr);
1146    OpRegReg(kOpAdd, rs_rBX, rs_rDX);
1147    src_bad_len  = OpCmpBranch(kCondLt, rs_rAX , rs_rBX, nullptr);
1148  } else {
1149    int pos_val = mir_graph_->ConstantValue(rl_srcPos.orig_sreg);
1150    if (pos_val == 0) {
1151      src_bad_len  = OpCmpBranch(kCondLt, rs_rAX , rs_rDX, nullptr);
1152    } else {
1153      OpRegRegImm(kOpAdd, rs_rBX,  rs_rDX, pos_val);
1154      src_bad_len  = OpCmpBranch(kCondLt, rs_rAX , rs_rBX, nullptr);
1155    }
1156  }
1157  LIR* dstPos_negative = nullptr;
1158  LIR* dst_bad_len = nullptr;
1159  LoadValueDirectFixed(rl_dst, rs_rAX);
1160  LoadWordDisp(rs_rAX, mirror::Array::LengthOffset().Int32Value(), rs_rAX);
1161  if (!rl_dstPos.is_const) {
1162    LoadValueDirectFixed(rl_dstPos , rs_rBX);
1163    dstPos_negative = OpCmpImmBranch(kCondLt, rs_rBX , 0, nullptr);
1164    OpRegRegReg(kOpAdd, rs_rBX, rs_rBX, rs_rDX);
1165    dst_bad_len = OpCmpBranch(kCondLt, rs_rAX , rs_rBX, nullptr);
1166  } else {
1167    int pos_val = mir_graph_->ConstantValue(rl_dstPos.orig_sreg);
1168    if (pos_val == 0) {
1169      dst_bad_len = OpCmpBranch(kCondLt, rs_rAX , rs_rDX, nullptr);
1170    } else {
1171      OpRegRegImm(kOpAdd, rs_rBX,  rs_rDX, pos_val);
1172      dst_bad_len = OpCmpBranch(kCondLt, rs_rAX , rs_rBX, nullptr);
1173    }
1174  }
1175  // everything is checked now
1176  LoadValueDirectFixed(rl_src , rs_rAX);
1177  LoadValueDirectFixed(rl_dst , rs_rBX);
1178  LoadValueDirectFixed(rl_srcPos , rs_rCX);
1179  NewLIR5(kX86Lea32RA, rs_rAX.GetReg(), rs_rAX.GetReg(),
1180       rs_rCX.GetReg() , 1, mirror::Array::DataOffset(2).Int32Value());
1181  // RAX now holds the address of the first src element to be copied
1182
1183  LoadValueDirectFixed(rl_dstPos , rs_rCX);
1184  NewLIR5(kX86Lea32RA, rs_rBX.GetReg(), rs_rBX.GetReg(),
1185       rs_rCX.GetReg() , 1, mirror::Array::DataOffset(2).Int32Value() );
1186  // RBX now holds the address of the first dst element to be copied
1187
1188  // check if the number of elements to be copied is odd or even. If odd
1189  // then copy the first element (so that the remaining number of elements
1190  // is even).
1191  LoadValueDirectFixed(rl_length , rs_rCX);
1192  OpRegImm(kOpAnd, rs_rCX, 1);
1193  LIR* jmp_to_begin_loop  = OpCmpImmBranch(kCondEq, rs_rCX, 0, nullptr);
1194  OpRegImm(kOpSub, rs_rDX, 1);
1195  LoadBaseIndexedDisp(rs_rAX, rs_rDX, 1, 0, rs_rCX, kSignedHalf);
1196  StoreBaseIndexedDisp(rs_rBX, rs_rDX, 1, 0, rs_rCX, kSignedHalf);
1197
1198  // since the remaining number of elements is even, we will copy by
1199  // two elements at a time.
1200  LIR *beginLoop = NewLIR0(kPseudoTargetLabel);
1201  LIR* jmp_to_ret  = OpCmpImmBranch(kCondEq, rs_rDX , 0, nullptr);
1202  OpRegImm(kOpSub, rs_rDX, 2);
1203  LoadBaseIndexedDisp(rs_rAX, rs_rDX, 1, 0, rs_rCX, kSingle);
1204  StoreBaseIndexedDisp(rs_rBX, rs_rDX, 1, 0, rs_rCX, kSingle);
1205  OpUnconditionalBranch(beginLoop);
1206  LIR *check_failed = NewLIR0(kPseudoTargetLabel);
1207  LIR* launchpad_branch  = OpUnconditionalBranch(nullptr);
1208  LIR *return_point = NewLIR0(kPseudoTargetLabel);
1209  jmp_to_ret->target = return_point;
1210  jmp_to_begin_loop->target = beginLoop;
1211  src_dst_same->target = check_failed;
1212  len_negative->target = check_failed;
1213  len_too_big->target = check_failed;
1214  src_null_branch->target = check_failed;
1215  if (srcPos_negative != nullptr)
1216    srcPos_negative ->target = check_failed;
1217  if (src_bad_len != nullptr)
1218    src_bad_len->target = check_failed;
1219  dst_null_branch->target = check_failed;
1220  if (dstPos_negative != nullptr)
1221    dstPos_negative->target = check_failed;
1222  if (dst_bad_len != nullptr)
1223    dst_bad_len->target = check_failed;
1224  AddIntrinsicSlowPath(info, launchpad_branch, return_point);
1225  return true;
1226}
1227
1228
1229/*
1230 * Fast string.index_of(I) & (II).  Inline check for simple case of char <= 0xffff,
1231 * otherwise bails to standard library code.
1232 */
1233bool X86Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) {
1234  ClobberCallerSave();
1235  LockCallTemps();  // Using fixed registers
1236
1237  // EAX: 16 bit character being searched.
1238  // ECX: count: number of words to be searched.
1239  // EDI: String being searched.
1240  // EDX: temporary during execution.
1241  // EBX or R11: temporary during execution (depending on mode).
1242
1243  RegLocation rl_obj = info->args[0];
1244  RegLocation rl_char = info->args[1];
1245  RegLocation rl_start;  // Note: only present in III flavor or IndexOf.
1246  RegStorage tmpReg = cu_->target64 ? rs_r11 : rs_rBX;
1247
1248  uint32_t char_value =
1249    rl_char.is_const ? mir_graph_->ConstantValue(rl_char.orig_sreg) : 0;
1250
1251  if (char_value > 0xFFFF) {
1252    // We have to punt to the real String.indexOf.
1253    return false;
1254  }
1255
1256  // Okay, we are commited to inlining this.
1257  RegLocation rl_return = GetReturn(kCoreReg);
1258  RegLocation rl_dest = InlineTarget(info);
1259
1260  // Is the string non-NULL?
1261  LoadValueDirectFixed(rl_obj, rs_rDX);
1262  GenNullCheck(rs_rDX, info->opt_flags);
1263  // uint32_t opt_flags = info->opt_flags;
1264  info->opt_flags |= MIR_IGNORE_NULL_CHECK;  // Record that we've null checked.
1265
1266  // Does the character fit in 16 bits?
1267  LIR* slowpath_branch = nullptr;
1268  if (rl_char.is_const) {
1269    // We need the value in EAX.
1270    LoadConstantNoClobber(rs_rAX, char_value);
1271  } else {
1272    // Character is not a constant; compare at runtime.
1273    LoadValueDirectFixed(rl_char, rs_rAX);
1274    slowpath_branch = OpCmpImmBranch(kCondGt, rs_rAX, 0xFFFF, nullptr);
1275  }
1276
1277  // From here down, we know that we are looking for a char that fits in 16 bits.
1278  // Location of reference to data array within the String object.
1279  int value_offset = mirror::String::ValueOffset().Int32Value();
1280  // Location of count within the String object.
1281  int count_offset = mirror::String::CountOffset().Int32Value();
1282  // Starting offset within data array.
1283  int offset_offset = mirror::String::OffsetOffset().Int32Value();
1284  // Start of char data with array_.
1285  int data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value();
1286
1287  // Character is in EAX.
1288  // Object pointer is in EDX.
1289
1290  // Compute the number of words to search in to rCX.
1291  Load32Disp(rs_rDX, count_offset, rs_rCX);
1292
1293  // Possible signal here due to null pointer dereference.
1294  // Note that the signal handler will expect the top word of
1295  // the stack to be the ArtMethod*.  If the PUSH edi instruction
1296  // below is ahead of the load above then this will not be true
1297  // and the signal handler will not work.
1298  MarkPossibleNullPointerException(0);
1299
1300  // We need to preserve EDI, but have no spare registers, so push it on the stack.
1301  // We have to remember that all stack addresses after this are offset by sizeof(EDI).
1302  NewLIR1(kX86Push32R, rs_rDI.GetReg());
1303
1304  LIR *length_compare = nullptr;
1305  int start_value = 0;
1306  bool is_index_on_stack = false;
1307  if (zero_based) {
1308    // We have to handle an empty string.  Use special instruction JECXZ.
1309    length_compare = NewLIR0(kX86Jecxz8);
1310  } else {
1311    rl_start = info->args[2];
1312    // We have to offset by the start index.
1313    if (rl_start.is_const) {
1314      start_value = mir_graph_->ConstantValue(rl_start.orig_sreg);
1315      start_value = std::max(start_value, 0);
1316
1317      // Is the start > count?
1318      length_compare = OpCmpImmBranch(kCondLe, rs_rCX, start_value, nullptr);
1319
1320      if (start_value != 0) {
1321        OpRegImm(kOpSub, rs_rCX, start_value);
1322      }
1323    } else {
1324      // Runtime start index.
1325      rl_start = UpdateLocTyped(rl_start, kCoreReg);
1326      if (rl_start.location == kLocPhysReg) {
1327        // Handle "start index < 0" case.
1328        OpRegReg(kOpXor, tmpReg, tmpReg);
1329        OpRegReg(kOpCmp, rl_start.reg, tmpReg);
1330        OpCondRegReg(kOpCmov, kCondLt, rl_start.reg, tmpReg);
1331
1332        // The length of the string should be greater than the start index.
1333        length_compare = OpCmpBranch(kCondLe, rs_rCX, rl_start.reg, nullptr);
1334        OpRegReg(kOpSub, rs_rCX, rl_start.reg);
1335        if (rl_start.reg == rs_rDI) {
1336          // The special case. We will use EDI further, so lets put start index to stack.
1337          NewLIR1(kX86Push32R, rs_rDI.GetReg());
1338          is_index_on_stack = true;
1339        }
1340      } else {
1341        // Load the start index from stack, remembering that we pushed EDI.
1342        int displacement = SRegOffset(rl_start.s_reg_low) + (cu_->target64 ? 2 : 1) * sizeof(uint32_t);
1343        {
1344          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
1345          Load32Disp(rs_rX86_SP, displacement, tmpReg);
1346        }
1347        OpRegReg(kOpXor, rs_rDI, rs_rDI);
1348        OpRegReg(kOpCmp, tmpReg, rs_rDI);
1349        OpCondRegReg(kOpCmov, kCondLt, tmpReg, rs_rDI);
1350
1351        length_compare = OpCmpBranch(kCondLe, rs_rCX, tmpReg, nullptr);
1352        OpRegReg(kOpSub, rs_rCX, tmpReg);
1353        // Put the start index to stack.
1354        NewLIR1(kX86Push32R, tmpReg.GetReg());
1355        is_index_on_stack = true;
1356      }
1357    }
1358  }
1359  DCHECK(length_compare != nullptr);
1360
1361  // ECX now contains the count in words to be searched.
1362
1363  // Load the address of the string into R11 or EBX (depending on mode).
1364  // The string starts at VALUE(String) + 2 * OFFSET(String) + DATA_OFFSET.
1365  Load32Disp(rs_rDX, value_offset, rs_rDI);
1366  Load32Disp(rs_rDX, offset_offset, tmpReg);
1367  OpLea(tmpReg, rs_rDI, tmpReg, 1, data_offset);
1368
1369  // Now compute into EDI where the search will start.
1370  if (zero_based || rl_start.is_const) {
1371    if (start_value == 0) {
1372      OpRegCopy(rs_rDI, tmpReg);
1373    } else {
1374      NewLIR3(kX86Lea32RM, rs_rDI.GetReg(), tmpReg.GetReg(), 2 * start_value);
1375    }
1376  } else {
1377    if (is_index_on_stack == true) {
1378      // Load the start index from stack.
1379      NewLIR1(kX86Pop32R, rs_rDX.GetReg());
1380      OpLea(rs_rDI, tmpReg, rs_rDX, 1, 0);
1381    } else {
1382      OpLea(rs_rDI, tmpReg, rl_start.reg, 1, 0);
1383    }
1384  }
1385
1386  // EDI now contains the start of the string to be searched.
1387  // We are all prepared to do the search for the character.
1388  NewLIR0(kX86RepneScasw);
1389
1390  // Did we find a match?
1391  LIR* failed_branch = OpCondBranch(kCondNe, nullptr);
1392
1393  // yes, we matched.  Compute the index of the result.
1394  // index = ((curr_ptr - orig_ptr) / 2) - 1.
1395  OpRegReg(kOpSub, rs_rDI, tmpReg);
1396  OpRegImm(kOpAsr, rs_rDI, 1);
1397  NewLIR3(kX86Lea32RM, rl_return.reg.GetReg(), rs_rDI.GetReg(), -1);
1398  LIR *all_done = NewLIR1(kX86Jmp8, 0);
1399
1400  // Failed to match; return -1.
1401  LIR *not_found = NewLIR0(kPseudoTargetLabel);
1402  length_compare->target = not_found;
1403  failed_branch->target = not_found;
1404  LoadConstantNoClobber(rl_return.reg, -1);
1405
1406  // And join up at the end.
1407  all_done->target = NewLIR0(kPseudoTargetLabel);
1408  // Restore EDI from the stack.
1409  NewLIR1(kX86Pop32R, rs_rDI.GetReg());
1410
1411  // Out of line code returns here.
1412  if (slowpath_branch != nullptr) {
1413    LIR *return_point = NewLIR0(kPseudoTargetLabel);
1414    AddIntrinsicSlowPath(info, slowpath_branch, return_point);
1415  }
1416
1417  StoreValue(rl_dest, rl_return);
1418  return true;
1419}
1420
1421/*
1422 * @brief Enter an 'advance LOC' into the FDE buffer
1423 * @param buf FDE buffer.
1424 * @param increment Amount by which to increase the current location.
1425 */
1426static void AdvanceLoc(std::vector<uint8_t>&buf, uint32_t increment) {
1427  if (increment < 64) {
1428    // Encoding in opcode.
1429    buf.push_back(0x1 << 6 | increment);
1430  } else if (increment < 256) {
1431    // Single byte delta.
1432    buf.push_back(0x02);
1433    buf.push_back(increment);
1434  } else if (increment < 256 * 256) {
1435    // Two byte delta.
1436    buf.push_back(0x03);
1437    buf.push_back(increment & 0xff);
1438    buf.push_back((increment >> 8) & 0xff);
1439  } else {
1440    // Four byte delta.
1441    buf.push_back(0x04);
1442    PushWord(buf, increment);
1443  }
1444}
1445
1446
1447std::vector<uint8_t>* X86CFIInitialization() {
1448  return X86Mir2Lir::ReturnCommonCallFrameInformation();
1449}
1450
1451std::vector<uint8_t>* X86Mir2Lir::ReturnCommonCallFrameInformation() {
1452  std::vector<uint8_t>*cfi_info = new std::vector<uint8_t>;
1453
1454  // Length of the CIE (except for this field).
1455  PushWord(*cfi_info, 16);
1456
1457  // CIE id.
1458  PushWord(*cfi_info, 0xFFFFFFFFU);
1459
1460  // Version: 3.
1461  cfi_info->push_back(0x03);
1462
1463  // Augmentation: empty string.
1464  cfi_info->push_back(0x0);
1465
1466  // Code alignment: 1.
1467  cfi_info->push_back(0x01);
1468
1469  // Data alignment: -4.
1470  cfi_info->push_back(0x7C);
1471
1472  // Return address register (R8).
1473  cfi_info->push_back(0x08);
1474
1475  // Initial return PC is 4(ESP): DW_CFA_def_cfa R4 4.
1476  cfi_info->push_back(0x0C);
1477  cfi_info->push_back(0x04);
1478  cfi_info->push_back(0x04);
1479
1480  // Return address location: 0(SP): DW_CFA_offset R8 1 (* -4);.
1481  cfi_info->push_back(0x2 << 6 | 0x08);
1482  cfi_info->push_back(0x01);
1483
1484  // And 2 Noops to align to 4 byte boundary.
1485  cfi_info->push_back(0x0);
1486  cfi_info->push_back(0x0);
1487
1488  DCHECK_EQ(cfi_info->size() & 3, 0U);
1489  return cfi_info;
1490}
1491
1492static void EncodeUnsignedLeb128(std::vector<uint8_t>& buf, uint32_t value) {
1493  uint8_t buffer[12];
1494  uint8_t *ptr = EncodeUnsignedLeb128(buffer, value);
1495  for (uint8_t *p = buffer; p < ptr; p++) {
1496    buf.push_back(*p);
1497  }
1498}
1499
1500std::vector<uint8_t>* X86Mir2Lir::ReturnCallFrameInformation() {
1501  std::vector<uint8_t>*cfi_info = new std::vector<uint8_t>;
1502
1503  // Generate the FDE for the method.
1504  DCHECK_NE(data_offset_, 0U);
1505
1506  // Length (will be filled in later in this routine).
1507  PushWord(*cfi_info, 0);
1508
1509  // CIE_pointer (can be filled in by linker); might be left at 0 if there is only
1510  // one CIE for the whole debug_frame section.
1511  PushWord(*cfi_info, 0);
1512
1513  // 'initial_location' (filled in by linker).
1514  PushWord(*cfi_info, 0);
1515
1516  // 'address_range' (number of bytes in the method).
1517  PushWord(*cfi_info, data_offset_);
1518
1519  // The instructions in the FDE.
1520  if (stack_decrement_ != nullptr) {
1521    // Advance LOC to just past the stack decrement.
1522    uint32_t pc = NEXT_LIR(stack_decrement_)->offset;
1523    AdvanceLoc(*cfi_info, pc);
1524
1525    // Now update the offset to the call frame: DW_CFA_def_cfa_offset frame_size.
1526    cfi_info->push_back(0x0e);
1527    EncodeUnsignedLeb128(*cfi_info, frame_size_);
1528
1529    // We continue with that stack until the epilogue.
1530    if (stack_increment_ != nullptr) {
1531      uint32_t new_pc = NEXT_LIR(stack_increment_)->offset;
1532      AdvanceLoc(*cfi_info, new_pc - pc);
1533
1534      // We probably have code snippets after the epilogue, so save the
1535      // current state: DW_CFA_remember_state.
1536      cfi_info->push_back(0x0a);
1537
1538      // We have now popped the stack: DW_CFA_def_cfa_offset 4.  There is only the return
1539      // PC on the stack now.
1540      cfi_info->push_back(0x0e);
1541      EncodeUnsignedLeb128(*cfi_info, 4);
1542
1543      // Everything after that is the same as before the epilogue.
1544      // Stack bump was followed by RET instruction.
1545      LIR *post_ret_insn = NEXT_LIR(NEXT_LIR(stack_increment_));
1546      if (post_ret_insn != nullptr) {
1547        pc = new_pc;
1548        new_pc = post_ret_insn->offset;
1549        AdvanceLoc(*cfi_info, new_pc - pc);
1550        // Restore the state: DW_CFA_restore_state.
1551        cfi_info->push_back(0x0b);
1552      }
1553    }
1554  }
1555
1556  // Padding to a multiple of 4
1557  while ((cfi_info->size() & 3) != 0) {
1558    // DW_CFA_nop is encoded as 0.
1559    cfi_info->push_back(0);
1560  }
1561
1562  // Set the length of the FDE inside the generated bytes.
1563  uint32_t length = cfi_info->size() - 4;
1564  (*cfi_info)[0] = length;
1565  (*cfi_info)[1] = length >> 8;
1566  (*cfi_info)[2] = length >> 16;
1567  (*cfi_info)[3] = length >> 24;
1568  return cfi_info;
1569}
1570
1571void X86Mir2Lir::GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir) {
1572  switch (static_cast<ExtendedMIROpcode>(mir->dalvikInsn.opcode)) {
1573    case kMirOpReserveVectorRegisters:
1574      ReserveVectorRegisters(mir);
1575      break;
1576    case kMirOpReturnVectorRegisters:
1577      ReturnVectorRegisters();
1578      break;
1579    case kMirOpConstVector:
1580      GenConst128(bb, mir);
1581      break;
1582    case kMirOpMoveVector:
1583      GenMoveVector(bb, mir);
1584      break;
1585    case kMirOpPackedMultiply:
1586      GenMultiplyVector(bb, mir);
1587      break;
1588    case kMirOpPackedAddition:
1589      GenAddVector(bb, mir);
1590      break;
1591    case kMirOpPackedSubtract:
1592      GenSubtractVector(bb, mir);
1593      break;
1594    case kMirOpPackedShiftLeft:
1595      GenShiftLeftVector(bb, mir);
1596      break;
1597    case kMirOpPackedSignedShiftRight:
1598      GenSignedShiftRightVector(bb, mir);
1599      break;
1600    case kMirOpPackedUnsignedShiftRight:
1601      GenUnsignedShiftRightVector(bb, mir);
1602      break;
1603    case kMirOpPackedAnd:
1604      GenAndVector(bb, mir);
1605      break;
1606    case kMirOpPackedOr:
1607      GenOrVector(bb, mir);
1608      break;
1609    case kMirOpPackedXor:
1610      GenXorVector(bb, mir);
1611      break;
1612    case kMirOpPackedAddReduce:
1613      GenAddReduceVector(bb, mir);
1614      break;
1615    case kMirOpPackedReduce:
1616      GenReduceVector(bb, mir);
1617      break;
1618    case kMirOpPackedSet:
1619      GenSetVector(bb, mir);
1620      break;
1621    default:
1622      break;
1623  }
1624}
1625
1626void X86Mir2Lir::ReserveVectorRegisters(MIR* mir) {
1627  // We should not try to reserve twice without returning the registers
1628  DCHECK_NE(num_reserved_vector_regs_, -1);
1629
1630  int num_vector_reg = mir->dalvikInsn.vA;
1631  for (int i = 0; i < num_vector_reg; i++) {
1632    RegStorage xp_reg = RegStorage::Solo128(i);
1633    RegisterInfo *xp_reg_info = GetRegInfo(xp_reg);
1634    Clobber(xp_reg);
1635
1636    for (RegisterInfo *info = xp_reg_info->GetAliasChain();
1637                       info != nullptr;
1638                       info = info->GetAliasChain()) {
1639      if (info->GetReg().IsSingle()) {
1640        reg_pool_->sp_regs_.Delete(info);
1641      } else {
1642        reg_pool_->dp_regs_.Delete(info);
1643      }
1644    }
1645  }
1646
1647  num_reserved_vector_regs_ = num_vector_reg;
1648}
1649
1650void X86Mir2Lir::ReturnVectorRegisters() {
1651  // Return all the reserved registers
1652  for (int i = 0; i < num_reserved_vector_regs_; i++) {
1653    RegStorage xp_reg = RegStorage::Solo128(i);
1654    RegisterInfo *xp_reg_info = GetRegInfo(xp_reg);
1655
1656    for (RegisterInfo *info = xp_reg_info->GetAliasChain();
1657                       info != nullptr;
1658                       info = info->GetAliasChain()) {
1659      if (info->GetReg().IsSingle()) {
1660        reg_pool_->sp_regs_.Insert(info);
1661      } else {
1662        reg_pool_->dp_regs_.Insert(info);
1663      }
1664    }
1665  }
1666
1667  // We don't have anymore reserved vector registers
1668  num_reserved_vector_regs_ = -1;
1669}
1670
1671void X86Mir2Lir::GenConst128(BasicBlock* bb, MIR* mir) {
1672  store_method_addr_used_ = true;
1673  int type_size = mir->dalvikInsn.vB;
1674  // We support 128 bit vectors.
1675  DCHECK_EQ(type_size & 0xFFFF, 128);
1676  RegStorage rs_dest = RegStorage::Solo128(mir->dalvikInsn.vA);
1677  uint32_t *args = mir->dalvikInsn.arg;
1678  int reg = rs_dest.GetReg();
1679  // Check for all 0 case.
1680  if (args[0] == 0 && args[1] == 0 && args[2] == 0 && args[3] == 0) {
1681    NewLIR2(kX86XorpsRR, reg, reg);
1682    return;
1683  }
1684
1685  // Append the mov const vector to reg opcode.
1686  AppendOpcodeWithConst(kX86MovupsRM, reg, mir);
1687}
1688
1689void X86Mir2Lir::AppendOpcodeWithConst(X86OpCode opcode, int reg, MIR* mir) {
1690  // Okay, load it from the constant vector area.
1691  LIR *data_target = ScanVectorLiteral(mir);
1692  if (data_target == nullptr) {
1693    data_target = AddVectorLiteral(mir);
1694  }
1695
1696  // Address the start of the method.
1697  RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
1698  if (rl_method.wide) {
1699    rl_method = LoadValueWide(rl_method, kCoreReg);
1700  } else {
1701    rl_method = LoadValue(rl_method, kCoreReg);
1702  }
1703
1704  // Load the proper value from the literal area.
1705  // We don't know the proper offset for the value, so pick one that will force
1706  // 4 byte offset.  We will fix this up in the assembler later to have the right
1707  // value.
1708  ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
1709  LIR *load = NewLIR2(opcode, reg, rl_method.reg.GetReg());
1710  load->flags.fixup = kFixupLoad;
1711  load->target = data_target;
1712}
1713
1714void X86Mir2Lir::GenMoveVector(BasicBlock *bb, MIR *mir) {
1715  // We only support 128 bit registers.
1716  DCHECK_EQ(mir->dalvikInsn.vC & 0xFFFF, 128U);
1717  RegStorage rs_dest = RegStorage::Solo128(mir->dalvikInsn.vA);
1718  RegStorage rs_src = RegStorage::Solo128(mir->dalvikInsn.vB);
1719  NewLIR2(kX86Mova128RR, rs_dest.GetReg(), rs_src.GetReg());
1720}
1721
1722void X86Mir2Lir::GenMultiplyVectorSignedByte(BasicBlock *bb, MIR *mir) {
1723  const int BYTE_SIZE = 8;
1724  RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vA);
1725  RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vB);
1726  RegStorage rs_src1_high_tmp = Get128BitRegister(AllocTempWide());
1727
1728  /*
1729   * Emulate the behavior of a kSignedByte by separating out the 16 values in the two XMM
1730   * and multiplying 8 at a time before recombining back into one XMM register.
1731   *
1732   *   let xmm1, xmm2 be real srcs (keep low bits of 16bit lanes)
1733   *       xmm3 is tmp             (operate on high bits of 16bit lanes)
1734   *
1735   *    xmm3 = xmm1
1736   *    xmm1 = xmm1 .* xmm2
1737   *    xmm1 = xmm1 & 0x00ff00ff00ff00ff00ff00ff00ff00ff  // xmm1 now has low bits
1738   *    xmm3 = xmm3 .>> 8
1739   *    xmm2 = xmm2 & 0xff00ff00ff00ff00ff00ff00ff00ff00
1740   *    xmm2 = xmm2 .* xmm3                               // xmm2 now has high bits
1741   *    xmm1 = xmm1 | xmm2                                // combine results
1742   */
1743
1744  // Copy xmm1.
1745  NewLIR2(kX86Mova128RR, rs_src1_high_tmp.GetReg(), rs_dest_src1.GetReg());
1746
1747  // Multiply low bits.
1748  NewLIR2(kX86PmullwRR, rs_dest_src1.GetReg(), rs_src2.GetReg());
1749
1750  // xmm1 now has low bits.
1751  AndMaskVectorRegister(rs_dest_src1, 0x00FF00FF, 0x00FF00FF, 0x00FF00FF, 0x00FF00FF);
1752
1753  // Prepare high bits for multiplication.
1754  NewLIR2(kX86PsrlwRI, rs_src1_high_tmp.GetReg(), BYTE_SIZE);
1755  AndMaskVectorRegister(rs_src2, 0xFF00FF00, 0xFF00FF00, 0xFF00FF00, 0xFF00FF00);
1756
1757  // Multiply high bits and xmm2 now has high bits.
1758  NewLIR2(kX86PmullwRR, rs_src2.GetReg(), rs_src1_high_tmp.GetReg());
1759
1760  // Combine back into dest XMM register.
1761  NewLIR2(kX86PorRR, rs_dest_src1.GetReg(), rs_src2.GetReg());
1762}
1763
1764void X86Mir2Lir::GenMultiplyVector(BasicBlock *bb, MIR *mir) {
1765  DCHECK_EQ(mir->dalvikInsn.vC & 0xFFFF, 128U);
1766  OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vC >> 16);
1767  RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vA);
1768  RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vB);
1769  int opcode = 0;
1770  switch (opsize) {
1771    case k32:
1772      opcode = kX86PmulldRR;
1773      break;
1774    case kSignedHalf:
1775      opcode = kX86PmullwRR;
1776      break;
1777    case kSingle:
1778      opcode = kX86MulpsRR;
1779      break;
1780    case kDouble:
1781      opcode = kX86MulpdRR;
1782      break;
1783    case kSignedByte:
1784      // HW doesn't support 16x16 byte multiplication so emulate it.
1785      GenMultiplyVectorSignedByte(bb, mir);
1786      return;
1787    default:
1788      LOG(FATAL) << "Unsupported vector multiply " << opsize;
1789      break;
1790  }
1791  NewLIR2(opcode, rs_dest_src1.GetReg(), rs_src2.GetReg());
1792}
1793
1794void X86Mir2Lir::GenAddVector(BasicBlock *bb, MIR *mir) {
1795  DCHECK_EQ(mir->dalvikInsn.vC & 0xFFFF, 128U);
1796  OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vC >> 16);
1797  RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vA);
1798  RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vB);
1799  int opcode = 0;
1800  switch (opsize) {
1801    case k32:
1802      opcode = kX86PadddRR;
1803      break;
1804    case kSignedHalf:
1805    case kUnsignedHalf:
1806      opcode = kX86PaddwRR;
1807      break;
1808    case kUnsignedByte:
1809    case kSignedByte:
1810      opcode = kX86PaddbRR;
1811      break;
1812    case kSingle:
1813      opcode = kX86AddpsRR;
1814      break;
1815    case kDouble:
1816      opcode = kX86AddpdRR;
1817      break;
1818    default:
1819      LOG(FATAL) << "Unsupported vector addition " << opsize;
1820      break;
1821  }
1822  NewLIR2(opcode, rs_dest_src1.GetReg(), rs_src2.GetReg());
1823}
1824
1825void X86Mir2Lir::GenSubtractVector(BasicBlock *bb, MIR *mir) {
1826  DCHECK_EQ(mir->dalvikInsn.vC & 0xFFFF, 128U);
1827  OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vC >> 16);
1828  RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vA);
1829  RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vB);
1830  int opcode = 0;
1831  switch (opsize) {
1832    case k32:
1833      opcode = kX86PsubdRR;
1834      break;
1835    case kSignedHalf:
1836    case kUnsignedHalf:
1837      opcode = kX86PsubwRR;
1838      break;
1839    case kUnsignedByte:
1840    case kSignedByte:
1841      opcode = kX86PsubbRR;
1842      break;
1843    case kSingle:
1844      opcode = kX86SubpsRR;
1845      break;
1846    case kDouble:
1847      opcode = kX86SubpdRR;
1848      break;
1849    default:
1850      LOG(FATAL) << "Unsupported vector subtraction " << opsize;
1851      break;
1852  }
1853  NewLIR2(opcode, rs_dest_src1.GetReg(), rs_src2.GetReg());
1854}
1855
1856void X86Mir2Lir::GenShiftByteVector(BasicBlock *bb, MIR *mir) {
1857  RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vA);
1858  RegStorage rs_tmp = Get128BitRegister(AllocTempWide());
1859
1860  int opcode = 0;
1861  int imm = mir->dalvikInsn.vB;
1862
1863  switch (static_cast<ExtendedMIROpcode>(mir->dalvikInsn.opcode)) {
1864    case kMirOpPackedShiftLeft:
1865      opcode = kX86PsllwRI;
1866      break;
1867    case kMirOpPackedSignedShiftRight:
1868      opcode = kX86PsrawRI;
1869      break;
1870    case kMirOpPackedUnsignedShiftRight:
1871      opcode = kX86PsrlwRI;
1872      break;
1873    default:
1874      LOG(FATAL) << "Unsupported shift operation on byte vector " << opcode;
1875      break;
1876  }
1877
1878  /*
1879   * xmm1 will have low bits
1880   * xmm2 will have high bits
1881   *
1882   * xmm2 = xmm1
1883   * xmm1 = xmm1 .<< N
1884   * xmm2 = xmm2 && 0xFF00FF00FF00FF00FF00FF00FF00FF00
1885   * xmm2 = xmm2 .<< N
1886   * xmm1 = xmm1 | xmm2
1887   */
1888
1889  // Copy xmm1.
1890  NewLIR2(kX86Mova128RR, rs_tmp.GetReg(), rs_dest_src1.GetReg());
1891
1892  // Shift lower values.
1893  NewLIR2(opcode, rs_dest_src1.GetReg(), imm);
1894
1895  // Mask bottom bits.
1896  AndMaskVectorRegister(rs_tmp, 0xFF00FF00, 0xFF00FF00, 0xFF00FF00, 0xFF00FF00);
1897
1898  // Shift higher values.
1899  NewLIR2(opcode, rs_tmp.GetReg(), imm);
1900
1901  // Combine back into dest XMM register.
1902  NewLIR2(kX86PorRR, rs_dest_src1.GetReg(), rs_tmp.GetReg());
1903}
1904
1905void X86Mir2Lir::GenShiftLeftVector(BasicBlock *bb, MIR *mir) {
1906  DCHECK_EQ(mir->dalvikInsn.vC & 0xFFFF, 128U);
1907  OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vC >> 16);
1908  RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vA);
1909  int imm = mir->dalvikInsn.vB;
1910  int opcode = 0;
1911  switch (opsize) {
1912    case k32:
1913      opcode = kX86PslldRI;
1914      break;
1915    case k64:
1916      opcode = kX86PsllqRI;
1917      break;
1918    case kSignedHalf:
1919    case kUnsignedHalf:
1920      opcode = kX86PsllwRI;
1921      break;
1922    case kSignedByte:
1923    case kUnsignedByte:
1924      GenShiftByteVector(bb, mir);
1925      return;
1926    default:
1927      LOG(FATAL) << "Unsupported vector shift left " << opsize;
1928      break;
1929  }
1930  NewLIR2(opcode, rs_dest_src1.GetReg(), imm);
1931}
1932
1933void X86Mir2Lir::GenSignedShiftRightVector(BasicBlock *bb, MIR *mir) {
1934  DCHECK_EQ(mir->dalvikInsn.vC & 0xFFFF, 128U);
1935  OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vC >> 16);
1936  RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vA);
1937  int imm = mir->dalvikInsn.vB;
1938  int opcode = 0;
1939  switch (opsize) {
1940    case k32:
1941      opcode = kX86PsradRI;
1942      break;
1943    case kSignedHalf:
1944    case kUnsignedHalf:
1945      opcode = kX86PsrawRI;
1946      break;
1947    case kSignedByte:
1948    case kUnsignedByte:
1949      GenShiftByteVector(bb, mir);
1950      return;
1951    default:
1952      LOG(FATAL) << "Unsupported vector signed shift right " << opsize;
1953      break;
1954  }
1955  NewLIR2(opcode, rs_dest_src1.GetReg(), imm);
1956}
1957
1958void X86Mir2Lir::GenUnsignedShiftRightVector(BasicBlock *bb, MIR *mir) {
1959  DCHECK_EQ(mir->dalvikInsn.vC & 0xFFFF, 128U);
1960  OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vC >> 16);
1961  RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vA);
1962  int imm = mir->dalvikInsn.vB;
1963  int opcode = 0;
1964  switch (opsize) {
1965    case k32:
1966      opcode = kX86PsrldRI;
1967      break;
1968    case k64:
1969      opcode = kX86PsrlqRI;
1970      break;
1971    case kSignedHalf:
1972    case kUnsignedHalf:
1973      opcode = kX86PsrlwRI;
1974      break;
1975    case kSignedByte:
1976    case kUnsignedByte:
1977      GenShiftByteVector(bb, mir);
1978      return;
1979    default:
1980      LOG(FATAL) << "Unsupported vector unsigned shift right " << opsize;
1981      break;
1982  }
1983  NewLIR2(opcode, rs_dest_src1.GetReg(), imm);
1984}
1985
1986void X86Mir2Lir::GenAndVector(BasicBlock *bb, MIR *mir) {
1987  // We only support 128 bit registers.
1988  DCHECK_EQ(mir->dalvikInsn.vC & 0xFFFF, 128U);
1989  RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vA);
1990  RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vB);
1991  NewLIR2(kX86PandRR, rs_dest_src1.GetReg(), rs_src2.GetReg());
1992}
1993
1994void X86Mir2Lir::GenOrVector(BasicBlock *bb, MIR *mir) {
1995  // We only support 128 bit registers.
1996  DCHECK_EQ(mir->dalvikInsn.vC & 0xFFFF, 128U);
1997  RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vA);
1998  RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vB);
1999  NewLIR2(kX86PorRR, rs_dest_src1.GetReg(), rs_src2.GetReg());
2000}
2001
2002void X86Mir2Lir::GenXorVector(BasicBlock *bb, MIR *mir) {
2003  // We only support 128 bit registers.
2004  DCHECK_EQ(mir->dalvikInsn.vC & 0xFFFF, 128U);
2005  RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vA);
2006  RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vB);
2007  NewLIR2(kX86PxorRR, rs_dest_src1.GetReg(), rs_src2.GetReg());
2008}
2009
2010void X86Mir2Lir::AndMaskVectorRegister(RegStorage rs_src1, uint32_t m1, uint32_t m2, uint32_t m3, uint32_t m4) {
2011  MaskVectorRegister(kX86PandRM, rs_src1, m1, m2, m3, m4);
2012}
2013
2014void X86Mir2Lir::MaskVectorRegister(X86OpCode opcode, RegStorage rs_src1, uint32_t m0, uint32_t m1, uint32_t m2, uint32_t m3) {
2015  // Create temporary MIR as container for 128-bit binary mask.
2016  MIR const_mir;
2017  MIR* const_mirp = &const_mir;
2018  const_mirp->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpConstVector);
2019  const_mirp->dalvikInsn.arg[0] = m0;
2020  const_mirp->dalvikInsn.arg[1] = m1;
2021  const_mirp->dalvikInsn.arg[2] = m2;
2022  const_mirp->dalvikInsn.arg[3] = m3;
2023
2024  // Mask vector with const from literal pool.
2025  AppendOpcodeWithConst(opcode, rs_src1.GetReg(), const_mirp);
2026}
2027
2028void X86Mir2Lir::GenAddReduceVector(BasicBlock *bb, MIR *mir) {
2029  OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vC >> 16);
2030  RegStorage rs_src1 = RegStorage::Solo128(mir->dalvikInsn.vB);
2031  RegLocation rl_dest = mir_graph_->GetDest(mir);
2032  RegStorage rs_tmp;
2033
2034  int vec_bytes = (mir->dalvikInsn.vC & 0xFFFF) / 8;
2035  int vec_unit_size = 0;
2036  int opcode = 0;
2037  int extr_opcode = 0;
2038  RegLocation rl_result;
2039
2040  switch (opsize) {
2041    case k32:
2042      extr_opcode = kX86PextrdRRI;
2043      opcode = kX86PhadddRR;
2044      vec_unit_size = 4;
2045      break;
2046    case kSignedByte:
2047    case kUnsignedByte:
2048      extr_opcode = kX86PextrbRRI;
2049      opcode = kX86PhaddwRR;
2050      vec_unit_size = 2;
2051      break;
2052    case kSignedHalf:
2053    case kUnsignedHalf:
2054      extr_opcode = kX86PextrwRRI;
2055      opcode = kX86PhaddwRR;
2056      vec_unit_size = 2;
2057      break;
2058    case kSingle:
2059      rl_result = EvalLoc(rl_dest, kFPReg, true);
2060      vec_unit_size = 4;
2061      for (int i = 0; i < 3; i++) {
2062        NewLIR2(kX86AddssRR, rl_result.reg.GetReg(), rs_src1.GetReg());
2063        NewLIR3(kX86ShufpsRRI, rs_src1.GetReg(), rs_src1.GetReg(), 0x39);
2064      }
2065      NewLIR2(kX86AddssRR, rl_result.reg.GetReg(), rs_src1.GetReg());
2066      StoreValue(rl_dest, rl_result);
2067
2068      // For single-precision floats, we are done here
2069      return;
2070    default:
2071      LOG(FATAL) << "Unsupported vector add reduce " << opsize;
2072      break;
2073  }
2074
2075  int elems = vec_bytes / vec_unit_size;
2076
2077  // Emulate horizontal add instruction by reducing 2 vectors with 8 values before adding them again
2078  // TODO is overflow handled correctly?
2079  if (opsize == kSignedByte || opsize == kUnsignedByte) {
2080    rs_tmp = Get128BitRegister(AllocTempWide());
2081
2082    // tmp = xmm1 .>> 8.
2083    NewLIR2(kX86Mova128RR, rs_tmp.GetReg(), rs_src1.GetReg());
2084    NewLIR2(kX86PsrlwRI, rs_tmp.GetReg(), 8);
2085
2086    // Zero extend low bits in xmm1.
2087    AndMaskVectorRegister(rs_src1, 0x00FF00FF, 0x00FF00FF, 0x00FF00FF, 0x00FF00FF);
2088  }
2089
2090  while (elems > 1) {
2091    if (opsize == kSignedByte || opsize == kUnsignedByte) {
2092      NewLIR2(opcode, rs_tmp.GetReg(), rs_tmp.GetReg());
2093    }
2094    NewLIR2(opcode, rs_src1.GetReg(), rs_src1.GetReg());
2095    elems >>= 1;
2096  }
2097
2098  // Combine the results if we separated them.
2099  if (opsize == kSignedByte || opsize == kUnsignedByte) {
2100    NewLIR2(kX86PaddbRR, rs_src1.GetReg(), rs_tmp.GetReg());
2101  }
2102
2103  // We need to extract to a GPR.
2104  RegStorage temp = AllocTemp();
2105  NewLIR3(extr_opcode, temp.GetReg(), rs_src1.GetReg(), 0);
2106
2107  // Can we do this directly into memory?
2108  rl_result = UpdateLocTyped(rl_dest, kCoreReg);
2109  if (rl_result.location == kLocPhysReg) {
2110    // Ensure res is in a core reg
2111    rl_result = EvalLoc(rl_dest, kCoreReg, true);
2112    OpRegReg(kOpAdd, rl_result.reg, temp);
2113    StoreFinalValue(rl_dest, rl_result);
2114  } else {
2115    OpMemReg(kOpAdd, rl_result, temp.GetReg());
2116  }
2117
2118  FreeTemp(temp);
2119}
2120
2121void X86Mir2Lir::GenReduceVector(BasicBlock *bb, MIR *mir) {
2122  OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vC >> 16);
2123  RegLocation rl_dest = mir_graph_->GetDest(mir);
2124  RegStorage rs_src1 = RegStorage::Solo128(mir->dalvikInsn.vB);
2125  int extract_index = mir->dalvikInsn.arg[0];
2126  int extr_opcode = 0;
2127  RegLocation rl_result;
2128  bool is_wide = false;
2129
2130  switch (opsize) {
2131    case k32:
2132      rl_result = UpdateLocTyped(rl_dest, kCoreReg);
2133      extr_opcode = (rl_result.location == kLocPhysReg) ? kX86PextrdMRI : kX86PextrdRRI;
2134      break;
2135    case kSignedHalf:
2136    case kUnsignedHalf:
2137      rl_result= UpdateLocTyped(rl_dest, kCoreReg);
2138      extr_opcode = (rl_result.location == kLocPhysReg) ? kX86PextrwMRI : kX86PextrwRRI;
2139      break;
2140    default:
2141      LOG(FATAL) << "Unsupported vector add reduce " << opsize;
2142      return;
2143      break;
2144  }
2145
2146  if (rl_result.location == kLocPhysReg) {
2147    NewLIR3(extr_opcode, rl_result.reg.GetReg(), rs_src1.GetReg(), extract_index);
2148    if (is_wide == true) {
2149      StoreFinalValue(rl_dest, rl_result);
2150    } else {
2151      StoreFinalValueWide(rl_dest, rl_result);
2152    }
2153  } else {
2154    int displacement = SRegOffset(rl_result.s_reg_low);
2155    LIR *l = NewLIR3(extr_opcode, rs_rX86_SP.GetReg(), displacement, rs_src1.GetReg());
2156    AnnotateDalvikRegAccess(l, displacement >> 2, true /* is_load */, is_wide /* is_64bit */);
2157    AnnotateDalvikRegAccess(l, displacement >> 2, false /* is_load */, is_wide /* is_64bit */);
2158  }
2159}
2160
2161void X86Mir2Lir::GenSetVector(BasicBlock *bb, MIR *mir) {
2162  DCHECK_EQ(mir->dalvikInsn.vC & 0xFFFF, 128U);
2163  OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vC >> 16);
2164  RegStorage rs_dest = RegStorage::Solo128(mir->dalvikInsn.vA);
2165  int op_low = 0, op_high = 0, imm = 0, op_mov = kX86MovdxrRR;
2166  RegisterClass reg_type = kCoreReg;
2167
2168  switch (opsize) {
2169    case k32:
2170      op_low = kX86PshufdRRI;
2171      break;
2172    case kSingle:
2173      op_low = kX86PshufdRRI;
2174      op_mov = kX86Mova128RR;
2175      reg_type = kFPReg;
2176      break;
2177    case k64:
2178      op_low = kX86PshufdRRI;
2179      imm = 0x44;
2180      break;
2181    case kDouble:
2182      op_low = kX86PshufdRRI;
2183      op_mov = kX86Mova128RR;
2184      reg_type = kFPReg;
2185      imm = 0x44;
2186      break;
2187    case kSignedByte:
2188    case kUnsignedByte:
2189      // Shuffle 8 bit value into 16 bit word.
2190      // We set val = val + (val << 8) below and use 16 bit shuffle.
2191    case kSignedHalf:
2192    case kUnsignedHalf:
2193      // Handles low quadword.
2194      op_low = kX86PshuflwRRI;
2195      // Handles upper quadword.
2196      op_high = kX86PshufdRRI;
2197      break;
2198    default:
2199      LOG(FATAL) << "Unsupported vector set " << opsize;
2200      break;
2201  }
2202
2203  RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
2204
2205  // Load the value from the VR into the reg.
2206  if (rl_src.wide == 0) {
2207    rl_src = LoadValue(rl_src, reg_type);
2208  } else {
2209    rl_src = LoadValueWide(rl_src, reg_type);
2210  }
2211
2212  // If opsize is 8 bits wide then double value and use 16 bit shuffle instead.
2213  if (opsize == kSignedByte || opsize == kUnsignedByte) {
2214    RegStorage temp = AllocTemp();
2215    // val = val + (val << 8).
2216    NewLIR2(kX86Mov32RR, temp.GetReg(), rl_src.reg.GetReg());
2217    NewLIR2(kX86Sal32RI, temp.GetReg(), 8);
2218    NewLIR2(kX86Or32RR, rl_src.reg.GetReg(), temp.GetReg());
2219    FreeTemp(temp);
2220  }
2221
2222  // Load the value into the XMM register.
2223  NewLIR2(op_mov, rs_dest.GetReg(), rl_src.reg.GetReg());
2224
2225  // Now shuffle the value across the destination.
2226  NewLIR3(op_low, rs_dest.GetReg(), rs_dest.GetReg(), imm);
2227
2228  // And then repeat as needed.
2229  if (op_high != 0) {
2230    NewLIR3(op_high, rs_dest.GetReg(), rs_dest.GetReg(), imm);
2231  }
2232}
2233
2234LIR *X86Mir2Lir::ScanVectorLiteral(MIR *mir) {
2235  int *args = reinterpret_cast<int*>(mir->dalvikInsn.arg);
2236  for (LIR *p = const_vectors_; p != nullptr; p = p->next) {
2237    if (args[0] == p->operands[0] && args[1] == p->operands[1] &&
2238        args[2] == p->operands[2] && args[3] == p->operands[3]) {
2239      return p;
2240    }
2241  }
2242  return nullptr;
2243}
2244
2245LIR *X86Mir2Lir::AddVectorLiteral(MIR *mir) {
2246  LIR* new_value = static_cast<LIR*>(arena_->Alloc(sizeof(LIR), kArenaAllocData));
2247  int *args = reinterpret_cast<int*>(mir->dalvikInsn.arg);
2248  new_value->operands[0] = args[0];
2249  new_value->operands[1] = args[1];
2250  new_value->operands[2] = args[2];
2251  new_value->operands[3] = args[3];
2252  new_value->next = const_vectors_;
2253  if (const_vectors_ == nullptr) {
2254    estimated_native_code_size_ += 12;  // Amount needed to align to 16 byte boundary.
2255  }
2256  estimated_native_code_size_ += 16;  // Space for one vector.
2257  const_vectors_ = new_value;
2258  return new_value;
2259}
2260
2261// ------------ ABI support: mapping of args to physical registers -------------
2262RegStorage X86Mir2Lir::InToRegStorageX86_64Mapper::GetNextReg(bool is_double_or_float, bool is_wide, bool is_ref) {
2263  const SpecialTargetRegister coreArgMappingToPhysicalReg[] = {kArg1, kArg2, kArg3, kArg4, kArg5};
2264  const int coreArgMappingToPhysicalRegSize = sizeof(coreArgMappingToPhysicalReg) / sizeof(SpecialTargetRegister);
2265  const SpecialTargetRegister fpArgMappingToPhysicalReg[] = {kFArg0, kFArg1, kFArg2, kFArg3,
2266                                                  kFArg4, kFArg5, kFArg6, kFArg7};
2267  const int fpArgMappingToPhysicalRegSize = sizeof(fpArgMappingToPhysicalReg) / sizeof(SpecialTargetRegister);
2268
2269  if (is_double_or_float) {
2270    if (cur_fp_reg_ < fpArgMappingToPhysicalRegSize) {
2271      return ml_->TargetReg(fpArgMappingToPhysicalReg[cur_fp_reg_++], is_wide);
2272    }
2273  } else {
2274    if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
2275      return is_ref ? ml_->TargetRefReg(coreArgMappingToPhysicalReg[cur_core_reg_++]) :
2276                      ml_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++], is_wide);
2277    }
2278  }
2279  return RegStorage::InvalidReg();
2280}
2281
2282RegStorage X86Mir2Lir::InToRegStorageMapping::Get(int in_position) {
2283  DCHECK(IsInitialized());
2284  auto res = mapping_.find(in_position);
2285  return res != mapping_.end() ? res->second : RegStorage::InvalidReg();
2286}
2287
2288void X86Mir2Lir::InToRegStorageMapping::Initialize(RegLocation* arg_locs, int count, InToRegStorageMapper* mapper) {
2289  DCHECK(mapper != nullptr);
2290  max_mapped_in_ = -1;
2291  is_there_stack_mapped_ = false;
2292  for (int in_position = 0; in_position < count; in_position++) {
2293     RegStorage reg = mapper->GetNextReg(arg_locs[in_position].fp,
2294             arg_locs[in_position].wide, arg_locs[in_position].ref);
2295     if (reg.Valid()) {
2296       mapping_[in_position] = reg;
2297       max_mapped_in_ = std::max(max_mapped_in_, in_position);
2298       if (arg_locs[in_position].wide) {
2299         // We covered 2 args, so skip the next one
2300         in_position++;
2301       }
2302     } else {
2303       is_there_stack_mapped_ = true;
2304     }
2305  }
2306  initialized_ = true;
2307}
2308
2309RegStorage X86Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
2310  if (!cu_->target64) {
2311    return GetCoreArgMappingToPhysicalReg(arg_num);
2312  }
2313
2314  if (!in_to_reg_storage_mapping_.IsInitialized()) {
2315    int start_vreg = cu_->num_dalvik_registers - cu_->num_ins;
2316    RegLocation* arg_locs = &mir_graph_->reg_location_[start_vreg];
2317
2318    InToRegStorageX86_64Mapper mapper(this);
2319    in_to_reg_storage_mapping_.Initialize(arg_locs, cu_->num_ins, &mapper);
2320  }
2321  return in_to_reg_storage_mapping_.Get(arg_num);
2322}
2323
2324RegStorage X86Mir2Lir::GetCoreArgMappingToPhysicalReg(int core_arg_num) {
2325  // For the 32-bit internal ABI, the first 3 arguments are passed in registers.
2326  // Not used for 64-bit, TODO: Move X86_32 to the same framework
2327  switch (core_arg_num) {
2328    case 0:
2329      return rs_rX86_ARG1;
2330    case 1:
2331      return rs_rX86_ARG2;
2332    case 2:
2333      return rs_rX86_ARG3;
2334    default:
2335      return RegStorage::InvalidReg();
2336  }
2337}
2338
2339// ---------End of ABI support: mapping of args to physical registers -------------
2340
2341/*
2342 * If there are any ins passed in registers that have not been promoted
2343 * to a callee-save register, flush them to the frame.  Perform initial
2344 * assignment of promoted arguments.
2345 *
2346 * ArgLocs is an array of location records describing the incoming arguments
2347 * with one location record per word of argument.
2348 */
2349void X86Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) {
2350  if (!cu_->target64) return Mir2Lir::FlushIns(ArgLocs, rl_method);
2351  /*
2352   * Dummy up a RegLocation for the incoming Method*
2353   * It will attempt to keep kArg0 live (or copy it to home location
2354   * if promoted).
2355   */
2356
2357  RegLocation rl_src = rl_method;
2358  rl_src.location = kLocPhysReg;
2359  rl_src.reg = TargetRefReg(kArg0);
2360  rl_src.home = false;
2361  MarkLive(rl_src);
2362  StoreValue(rl_method, rl_src);
2363  // If Method* has been promoted, explicitly flush
2364  if (rl_method.location == kLocPhysReg) {
2365    StoreRefDisp(rs_rX86_SP, 0, As32BitReg(TargetRefReg(kArg0)), kNotVolatile);
2366  }
2367
2368  if (cu_->num_ins == 0) {
2369    return;
2370  }
2371
2372  int start_vreg = cu_->num_dalvik_registers - cu_->num_ins;
2373  /*
2374   * Copy incoming arguments to their proper home locations.
2375   * NOTE: an older version of dx had an issue in which
2376   * it would reuse static method argument registers.
2377   * This could result in the same Dalvik virtual register
2378   * being promoted to both core and fp regs. To account for this,
2379   * we only copy to the corresponding promoted physical register
2380   * if it matches the type of the SSA name for the incoming
2381   * argument.  It is also possible that long and double arguments
2382   * end up half-promoted.  In those cases, we must flush the promoted
2383   * half to memory as well.
2384   */
2385  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
2386  for (int i = 0; i < cu_->num_ins; i++) {
2387    // get reg corresponding to input
2388    RegStorage reg = GetArgMappingToPhysicalReg(i);
2389
2390    RegLocation* t_loc = &ArgLocs[i];
2391    if (reg.Valid()) {
2392      // If arriving in register.
2393
2394      // We have already updated the arg location with promoted info
2395      // so we can be based on it.
2396      if (t_loc->location == kLocPhysReg) {
2397        // Just copy it.
2398        OpRegCopy(t_loc->reg, reg);
2399      } else {
2400        // Needs flush.
2401        if (t_loc->ref) {
2402          StoreRefDisp(rs_rX86_SP, SRegOffset(start_vreg + i), reg, kNotVolatile);
2403        } else {
2404          StoreBaseDisp(rs_rX86_SP, SRegOffset(start_vreg + i), reg, t_loc->wide ? k64 : k32,
2405                        kNotVolatile);
2406        }
2407      }
2408    } else {
2409      // If arriving in frame & promoted.
2410      if (t_loc->location == kLocPhysReg) {
2411        if (t_loc->ref) {
2412          LoadRefDisp(rs_rX86_SP, SRegOffset(start_vreg + i), t_loc->reg, kNotVolatile);
2413        } else {
2414          LoadBaseDisp(rs_rX86_SP, SRegOffset(start_vreg + i), t_loc->reg,
2415                       t_loc->wide ? k64 : k32, kNotVolatile);
2416        }
2417      }
2418    }
2419    if (t_loc->wide) {
2420      // Increment i to skip the next one.
2421      i++;
2422    }
2423  }
2424}
2425
2426/*
2427 * Load up to 5 arguments, the first three of which will be in
2428 * kArg1 .. kArg3.  On entry kArg0 contains the current method pointer,
2429 * and as part of the load sequence, it must be replaced with
2430 * the target method pointer.  Note, this may also be called
2431 * for "range" variants if the number of arguments is 5 or fewer.
2432 */
2433int X86Mir2Lir::GenDalvikArgsNoRange(CallInfo* info,
2434                                  int call_state, LIR** pcrLabel, NextCallInsn next_call_insn,
2435                                  const MethodReference& target_method,
2436                                  uint32_t vtable_idx, uintptr_t direct_code,
2437                                  uintptr_t direct_method, InvokeType type, bool skip_this) {
2438  if (!cu_->target64) {
2439    return Mir2Lir::GenDalvikArgsNoRange(info,
2440                                  call_state, pcrLabel, next_call_insn,
2441                                  target_method,
2442                                  vtable_idx, direct_code,
2443                                  direct_method, type, skip_this);
2444  }
2445  return GenDalvikArgsRange(info,
2446                       call_state, pcrLabel, next_call_insn,
2447                       target_method,
2448                       vtable_idx, direct_code,
2449                       direct_method, type, skip_this);
2450}
2451
2452/*
2453 * May have 0+ arguments (also used for jumbo).  Note that
2454 * source virtual registers may be in physical registers, so may
2455 * need to be flushed to home location before copying.  This
2456 * applies to arg3 and above (see below).
2457 *
2458 * Two general strategies:
2459 *    If < 20 arguments
2460 *       Pass args 3-18 using vldm/vstm block copy
2461 *       Pass arg0, arg1 & arg2 in kArg1-kArg3
2462 *    If 20+ arguments
2463 *       Pass args arg19+ using memcpy block copy
2464 *       Pass arg0, arg1 & arg2 in kArg1-kArg3
2465 *
2466 */
2467int X86Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state,
2468                                LIR** pcrLabel, NextCallInsn next_call_insn,
2469                                const MethodReference& target_method,
2470                                uint32_t vtable_idx, uintptr_t direct_code, uintptr_t direct_method,
2471                                InvokeType type, bool skip_this) {
2472  if (!cu_->target64) {
2473    return Mir2Lir::GenDalvikArgsRange(info, call_state,
2474                                pcrLabel, next_call_insn,
2475                                target_method,
2476                                vtable_idx, direct_code, direct_method,
2477                                type, skip_this);
2478  }
2479
2480  /* If no arguments, just return */
2481  if (info->num_arg_words == 0)
2482    return call_state;
2483
2484  const int start_index = skip_this ? 1 : 0;
2485
2486  InToRegStorageX86_64Mapper mapper(this);
2487  InToRegStorageMapping in_to_reg_storage_mapping;
2488  in_to_reg_storage_mapping.Initialize(info->args, info->num_arg_words, &mapper);
2489  const int last_mapped_in = in_to_reg_storage_mapping.GetMaxMappedIn();
2490  const int size_of_the_last_mapped = last_mapped_in == -1 ? 1 :
2491          in_to_reg_storage_mapping.Get(last_mapped_in).Is64BitSolo() ? 2 : 1;
2492  int regs_left_to_pass_via_stack = info->num_arg_words - (last_mapped_in + size_of_the_last_mapped);
2493
2494  // Fisrt of all, check whether it make sense to use bulk copying
2495  // Optimization is aplicable only for range case
2496  // TODO: make a constant instead of 2
2497  if (info->is_range && regs_left_to_pass_via_stack >= 2) {
2498    // Scan the rest of the args - if in phys_reg flush to memory
2499    for (int next_arg = last_mapped_in + size_of_the_last_mapped; next_arg < info->num_arg_words;) {
2500      RegLocation loc = info->args[next_arg];
2501      if (loc.wide) {
2502        loc = UpdateLocWide(loc);
2503        if (loc.location == kLocPhysReg) {
2504          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
2505          StoreBaseDisp(rs_rX86_SP, SRegOffset(loc.s_reg_low), loc.reg, k64, kNotVolatile);
2506        }
2507        next_arg += 2;
2508      } else {
2509        loc = UpdateLoc(loc);
2510        if (loc.location == kLocPhysReg) {
2511          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
2512          StoreBaseDisp(rs_rX86_SP, SRegOffset(loc.s_reg_low), loc.reg, k32, kNotVolatile);
2513        }
2514        next_arg++;
2515      }
2516    }
2517
2518    // Logic below assumes that Method pointer is at offset zero from SP.
2519    DCHECK_EQ(VRegOffset(static_cast<int>(kVRegMethodPtrBaseReg)), 0);
2520
2521    // The rest can be copied together
2522    int start_offset = SRegOffset(info->args[last_mapped_in + size_of_the_last_mapped].s_reg_low);
2523    int outs_offset = StackVisitor::GetOutVROffset(last_mapped_in + size_of_the_last_mapped, cu_->instruction_set);
2524
2525    int current_src_offset = start_offset;
2526    int current_dest_offset = outs_offset;
2527
2528    // Only davik regs are accessed in this loop; no next_call_insn() calls.
2529    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
2530    while (regs_left_to_pass_via_stack > 0) {
2531      // This is based on the knowledge that the stack itself is 16-byte aligned.
2532      bool src_is_16b_aligned = (current_src_offset & 0xF) == 0;
2533      bool dest_is_16b_aligned = (current_dest_offset & 0xF) == 0;
2534      size_t bytes_to_move;
2535
2536      /*
2537       * The amount to move defaults to 32-bit. If there are 4 registers left to move, then do a
2538       * a 128-bit move because we won't get the chance to try to aligned. If there are more than
2539       * 4 registers left to move, consider doing a 128-bit only if either src or dest are aligned.
2540       * We do this because we could potentially do a smaller move to align.
2541       */
2542      if (regs_left_to_pass_via_stack == 4 ||
2543          (regs_left_to_pass_via_stack > 4 && (src_is_16b_aligned || dest_is_16b_aligned))) {
2544        // Moving 128-bits via xmm register.
2545        bytes_to_move = sizeof(uint32_t) * 4;
2546
2547        // Allocate a free xmm temp. Since we are working through the calling sequence,
2548        // we expect to have an xmm temporary available.  AllocTempDouble will abort if
2549        // there are no free registers.
2550        RegStorage temp = AllocTempDouble();
2551
2552        LIR* ld1 = nullptr;
2553        LIR* ld2 = nullptr;
2554        LIR* st1 = nullptr;
2555        LIR* st2 = nullptr;
2556
2557        /*
2558         * The logic is similar for both loads and stores. If we have 16-byte alignment,
2559         * do an aligned move. If we have 8-byte alignment, then do the move in two
2560         * parts. This approach prevents possible cache line splits. Finally, fall back
2561         * to doing an unaligned move. In most cases we likely won't split the cache
2562         * line but we cannot prove it and thus take a conservative approach.
2563         */
2564        bool src_is_8b_aligned = (current_src_offset & 0x7) == 0;
2565        bool dest_is_8b_aligned = (current_dest_offset & 0x7) == 0;
2566
2567        ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
2568        if (src_is_16b_aligned) {
2569          ld1 = OpMovRegMem(temp, rs_rX86_SP, current_src_offset, kMovA128FP);
2570        } else if (src_is_8b_aligned) {
2571          ld1 = OpMovRegMem(temp, rs_rX86_SP, current_src_offset, kMovLo128FP);
2572          ld2 = OpMovRegMem(temp, rs_rX86_SP, current_src_offset + (bytes_to_move >> 1),
2573                            kMovHi128FP);
2574        } else {
2575          ld1 = OpMovRegMem(temp, rs_rX86_SP, current_src_offset, kMovU128FP);
2576        }
2577
2578        if (dest_is_16b_aligned) {
2579          st1 = OpMovMemReg(rs_rX86_SP, current_dest_offset, temp, kMovA128FP);
2580        } else if (dest_is_8b_aligned) {
2581          st1 = OpMovMemReg(rs_rX86_SP, current_dest_offset, temp, kMovLo128FP);
2582          st2 = OpMovMemReg(rs_rX86_SP, current_dest_offset + (bytes_to_move >> 1),
2583                            temp, kMovHi128FP);
2584        } else {
2585          st1 = OpMovMemReg(rs_rX86_SP, current_dest_offset, temp, kMovU128FP);
2586        }
2587
2588        // TODO If we could keep track of aliasing information for memory accesses that are wider
2589        // than 64-bit, we wouldn't need to set up a barrier.
2590        if (ld1 != nullptr) {
2591          if (ld2 != nullptr) {
2592            // For 64-bit load we can actually set up the aliasing information.
2593            AnnotateDalvikRegAccess(ld1, current_src_offset >> 2, true, true);
2594            AnnotateDalvikRegAccess(ld2, (current_src_offset + (bytes_to_move >> 1)) >> 2, true, true);
2595          } else {
2596            // Set barrier for 128-bit load.
2597            ld1->u.m.def_mask = &kEncodeAll;
2598          }
2599        }
2600        if (st1 != nullptr) {
2601          if (st2 != nullptr) {
2602            // For 64-bit store we can actually set up the aliasing information.
2603            AnnotateDalvikRegAccess(st1, current_dest_offset >> 2, false, true);
2604            AnnotateDalvikRegAccess(st2, (current_dest_offset + (bytes_to_move >> 1)) >> 2, false, true);
2605          } else {
2606            // Set barrier for 128-bit store.
2607            st1->u.m.def_mask = &kEncodeAll;
2608          }
2609        }
2610
2611        // Free the temporary used for the data movement.
2612        FreeTemp(temp);
2613      } else {
2614        // Moving 32-bits via general purpose register.
2615        bytes_to_move = sizeof(uint32_t);
2616
2617        // Instead of allocating a new temp, simply reuse one of the registers being used
2618        // for argument passing.
2619        RegStorage temp = TargetReg(kArg3, false);
2620
2621        // Now load the argument VR and store to the outs.
2622        Load32Disp(rs_rX86_SP, current_src_offset, temp);
2623        Store32Disp(rs_rX86_SP, current_dest_offset, temp);
2624      }
2625
2626      current_src_offset += bytes_to_move;
2627      current_dest_offset += bytes_to_move;
2628      regs_left_to_pass_via_stack -= (bytes_to_move >> 2);
2629    }
2630    DCHECK_EQ(regs_left_to_pass_via_stack, 0);
2631  }
2632
2633  // Now handle rest not registers if they are
2634  if (in_to_reg_storage_mapping.IsThereStackMapped()) {
2635    RegStorage regSingle = TargetReg(kArg2, false);
2636    RegStorage regWide = TargetReg(kArg3, true);
2637    for (int i = start_index;
2638         i < last_mapped_in + size_of_the_last_mapped + regs_left_to_pass_via_stack; i++) {
2639      RegLocation rl_arg = info->args[i];
2640      rl_arg = UpdateRawLoc(rl_arg);
2641      RegStorage reg = in_to_reg_storage_mapping.Get(i);
2642      if (!reg.Valid()) {
2643        int out_offset = StackVisitor::GetOutVROffset(i, cu_->instruction_set);
2644
2645        {
2646          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
2647          if (rl_arg.wide) {
2648            if (rl_arg.location == kLocPhysReg) {
2649              StoreBaseDisp(rs_rX86_SP, out_offset, rl_arg.reg, k64, kNotVolatile);
2650            } else {
2651              LoadValueDirectWideFixed(rl_arg, regWide);
2652              StoreBaseDisp(rs_rX86_SP, out_offset, regWide, k64, kNotVolatile);
2653            }
2654          } else {
2655            if (rl_arg.location == kLocPhysReg) {
2656              StoreBaseDisp(rs_rX86_SP, out_offset, rl_arg.reg, k32, kNotVolatile);
2657            } else {
2658              LoadValueDirectFixed(rl_arg, regSingle);
2659              StoreBaseDisp(rs_rX86_SP, out_offset, regSingle, k32, kNotVolatile);
2660            }
2661          }
2662        }
2663        call_state = next_call_insn(cu_, info, call_state, target_method,
2664                                    vtable_idx, direct_code, direct_method, type);
2665      }
2666      if (rl_arg.wide) {
2667        i++;
2668      }
2669    }
2670  }
2671
2672  // Finish with mapped registers
2673  for (int i = start_index; i <= last_mapped_in; i++) {
2674    RegLocation rl_arg = info->args[i];
2675    rl_arg = UpdateRawLoc(rl_arg);
2676    RegStorage reg = in_to_reg_storage_mapping.Get(i);
2677    if (reg.Valid()) {
2678      if (rl_arg.wide) {
2679        LoadValueDirectWideFixed(rl_arg, reg);
2680      } else {
2681        LoadValueDirectFixed(rl_arg, reg);
2682      }
2683      call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
2684                               direct_code, direct_method, type);
2685    }
2686    if (rl_arg.wide) {
2687      i++;
2688    }
2689  }
2690
2691  call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
2692                           direct_code, direct_method, type);
2693  if (pcrLabel) {
2694    if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitNullChecks()) {
2695      *pcrLabel = GenExplicitNullCheck(TargetRefReg(kArg1), info->opt_flags);
2696    } else {
2697      *pcrLabel = nullptr;
2698      // In lieu of generating a check for kArg1 being null, we need to
2699      // perform a load when doing implicit checks.
2700      RegStorage tmp = AllocTemp();
2701      Load32Disp(TargetRefReg(kArg1), 0, tmp);
2702      MarkPossibleNullPointerException(info->opt_flags);
2703      FreeTemp(tmp);
2704    }
2705  }
2706  return call_state;
2707}
2708
2709}  // namespace art
2710