utility_arm64.cc revision b504d2f89fdd5c01816bcbad752797cb78de0e99
1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "arm64_lir.h"
18#include "codegen_arm64.h"
19#include "dex/quick/mir_to_lir-inl.h"
20#include "dex/reg_storage_eq.h"
21
22namespace art {
23
24/* This file contains codegen for the A64 ISA. */
25
26int32_t Arm64Mir2Lir::EncodeImmSingle(uint32_t bits) {
27  /*
28   * Valid values will have the form:
29   *
30   *   aBbb.bbbc.defg.h000.0000.0000.0000.0000
31   *
32   * where B = not(b). In other words, if b == 1, then B == 0 and viceversa.
33   */
34
35  // bits[19..0] are cleared.
36  if ((bits & 0x0007ffff) != 0)
37    return -1;
38
39  // bits[29..25] are all set or all cleared.
40  uint32_t b_pattern = (bits >> 16) & 0x3e00;
41  if (b_pattern != 0 && b_pattern != 0x3e00)
42    return -1;
43
44  // bit[30] and bit[29] are opposite.
45  if (((bits ^ (bits << 1)) & 0x40000000) == 0)
46    return -1;
47
48  // bits: aBbb.bbbc.defg.h000.0000.0000.0000.0000
49  // bit7: a000.0000
50  uint32_t bit7 = ((bits >> 31) & 0x1) << 7;
51  // bit6: 0b00.0000
52  uint32_t bit6 = ((bits >> 29) & 0x1) << 6;
53  // bit5_to_0: 00cd.efgh
54  uint32_t bit5_to_0 = (bits >> 19) & 0x3f;
55  return (bit7 | bit6 | bit5_to_0);
56}
57
58int32_t Arm64Mir2Lir::EncodeImmDouble(uint64_t bits) {
59  /*
60   * Valid values will have the form:
61   *
62   *   aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
63   *   0000.0000.0000.0000.0000.0000.0000.0000
64   *
65   * where B = not(b).
66   */
67
68  // bits[47..0] are cleared.
69  if ((bits & UINT64_C(0xffffffffffff)) != 0)
70    return -1;
71
72  // bits[61..54] are all set or all cleared.
73  uint32_t b_pattern = (bits >> 48) & 0x3fc0;
74  if (b_pattern != 0 && b_pattern != 0x3fc0)
75    return -1;
76
77  // bit[62] and bit[61] are opposite.
78  if (((bits ^ (bits << 1)) & UINT64_C(0x4000000000000000)) == 0)
79    return -1;
80
81  // bit7: a000.0000
82  uint32_t bit7 = ((bits >> 63) & 0x1) << 7;
83  // bit6: 0b00.0000
84  uint32_t bit6 = ((bits >> 61) & 0x1) << 6;
85  // bit5_to_0: 00cd.efgh
86  uint32_t bit5_to_0 = (bits >> 48) & 0x3f;
87  return (bit7 | bit6 | bit5_to_0);
88}
89
90size_t Arm64Mir2Lir::GetLoadStoreSize(LIR* lir) {
91  bool opcode_is_wide = IS_WIDE(lir->opcode);
92  ArmOpcode opcode = UNWIDE(lir->opcode);
93  DCHECK(!IsPseudoLirOp(opcode));
94  const ArmEncodingMap *encoder = &EncodingMap[opcode];
95  uint32_t bits = opcode_is_wide ? encoder->xskeleton : encoder->wskeleton;
96  return (bits >> 30);
97}
98
99size_t Arm64Mir2Lir::GetInstructionOffset(LIR* lir) {
100  size_t offset = lir->operands[2];
101  uint64_t check_flags = GetTargetInstFlags(lir->opcode);
102  DCHECK((check_flags & IS_LOAD) || (check_flags & IS_STORE));
103  if (check_flags & SCALED_OFFSET_X0) {
104    DCHECK(check_flags & IS_TERTIARY_OP);
105    offset = offset * (1 << GetLoadStoreSize(lir));
106  }
107  return offset;
108}
109
110LIR* Arm64Mir2Lir::LoadFPConstantValue(RegStorage r_dest, int32_t value) {
111  DCHECK(r_dest.IsSingle());
112  if (value == 0) {
113    return NewLIR2(kA64Fmov2sw, r_dest.GetReg(), rwzr);
114  } else {
115    int32_t encoded_imm = EncodeImmSingle((uint32_t)value);
116    if (encoded_imm >= 0) {
117      return NewLIR2(kA64Fmov2fI, r_dest.GetReg(), encoded_imm);
118    }
119  }
120
121  LIR* data_target = ScanLiteralPool(literal_list_, value, 0);
122  if (data_target == NULL) {
123    // Wide, as we need 8B alignment.
124    data_target = AddWideData(&literal_list_, value, 0);
125  }
126
127  ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
128  LIR* load_pc_rel = RawLIR(current_dalvik_offset_, kA64Ldr2fp,
129                            r_dest.GetReg(), 0, 0, 0, 0, data_target);
130  AppendLIR(load_pc_rel);
131  return load_pc_rel;
132}
133
134LIR* Arm64Mir2Lir::LoadFPConstantValueWide(RegStorage r_dest, int64_t value) {
135  DCHECK(r_dest.IsDouble());
136  if (value == 0) {
137    return NewLIR2(kA64Fmov2Sx, r_dest.GetReg(), rxzr);
138  } else {
139    int32_t encoded_imm = EncodeImmDouble(value);
140    if (encoded_imm >= 0) {
141      return NewLIR2(FWIDE(kA64Fmov2fI), r_dest.GetReg(), encoded_imm);
142    }
143  }
144
145  // No short form - load from the literal pool.
146  int32_t val_lo = Low32Bits(value);
147  int32_t val_hi = High32Bits(value);
148  LIR* data_target = ScanLiteralPoolWide(literal_list_, val_lo, val_hi);
149  if (data_target == NULL) {
150    data_target = AddWideData(&literal_list_, val_lo, val_hi);
151  }
152
153  ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
154  LIR* load_pc_rel = RawLIR(current_dalvik_offset_, FWIDE(kA64Ldr2fp),
155                            r_dest.GetReg(), 0, 0, 0, 0, data_target);
156  AppendLIR(load_pc_rel);
157  return load_pc_rel;
158}
159
160static int CountLeadingZeros(bool is_wide, uint64_t value) {
161  return (is_wide) ? __builtin_clzll(value) : __builtin_clz((uint32_t)value);
162}
163
164static int CountTrailingZeros(bool is_wide, uint64_t value) {
165  return (is_wide) ? __builtin_ctzll(value) : __builtin_ctz((uint32_t)value);
166}
167
168static int CountSetBits(bool is_wide, uint64_t value) {
169  return ((is_wide) ?
170          __builtin_popcountll(value) : __builtin_popcount((uint32_t)value));
171}
172
173/**
174 * @brief Try encoding an immediate in the form required by logical instructions.
175 *
176 * @param is_wide Whether @p value is a 64-bit (as opposed to 32-bit) value.
177 * @param value An integer to be encoded. This is interpreted as 64-bit if @p is_wide is true and as
178 *   32-bit if @p is_wide is false.
179 * @return A non-negative integer containing the encoded immediate or -1 if the encoding failed.
180 * @note This is the inverse of Arm64Mir2Lir::DecodeLogicalImmediate().
181 */
182int Arm64Mir2Lir::EncodeLogicalImmediate(bool is_wide, uint64_t value) {
183  unsigned n, imm_s, imm_r;
184
185  // Logical immediates are encoded using parameters n, imm_s and imm_r using
186  // the following table:
187  //
188  //  N   imms    immr    size        S             R
189  //  1  ssssss  rrrrrr    64    UInt(ssssss)  UInt(rrrrrr)
190  //  0  0sssss  xrrrrr    32    UInt(sssss)   UInt(rrrrr)
191  //  0  10ssss  xxrrrr    16    UInt(ssss)    UInt(rrrr)
192  //  0  110sss  xxxrrr     8    UInt(sss)     UInt(rrr)
193  //  0  1110ss  xxxxrr     4    UInt(ss)      UInt(rr)
194  //  0  11110s  xxxxxr     2    UInt(s)       UInt(r)
195  // (s bits must not be all set)
196  //
197  // A pattern is constructed of size bits, where the least significant S+1
198  // bits are set. The pattern is rotated right by R, and repeated across a
199  // 32 or 64-bit value, depending on destination register width.
200  //
201  // To test if an arbitary immediate can be encoded using this scheme, an
202  // iterative algorithm is used.
203  //
204
205  // 1. If the value has all set or all clear bits, it can't be encoded.
206  if (value == 0 || value == ~UINT64_C(0) ||
207      (!is_wide && (uint32_t)value == ~UINT32_C(0))) {
208    return -1;
209  }
210
211  unsigned lead_zero  = CountLeadingZeros(is_wide, value);
212  unsigned lead_one   = CountLeadingZeros(is_wide, ~value);
213  unsigned trail_zero = CountTrailingZeros(is_wide, value);
214  unsigned trail_one  = CountTrailingZeros(is_wide, ~value);
215  unsigned set_bits   = CountSetBits(is_wide, value);
216
217  // The fixed bits in the immediate s field.
218  // If width == 64 (X reg), start at 0xFFFFFF80.
219  // If width == 32 (W reg), start at 0xFFFFFFC0, as the iteration for 64-bit
220  // widths won't be executed.
221  unsigned width = (is_wide) ? 64 : 32;
222  int imm_s_fixed = (is_wide) ? -128 : -64;
223  int imm_s_mask = 0x3f;
224
225  for (;;) {
226    // 2. If the value is two bits wide, it can be encoded.
227    if (width == 2) {
228      n = 0;
229      imm_s = 0x3C;
230      imm_r = (value & 3) - 1;
231      break;
232    }
233
234    n = (width == 64) ? 1 : 0;
235    imm_s = ((imm_s_fixed | (set_bits - 1)) & imm_s_mask);
236    if ((lead_zero + set_bits) == width) {
237      imm_r = 0;
238    } else {
239      imm_r = (lead_zero > 0) ? (width - trail_zero) : lead_one;
240    }
241
242    // 3. If the sum of leading zeros, trailing zeros and set bits is
243    //    equal to the bit width of the value, it can be encoded.
244    if (lead_zero + trail_zero + set_bits == width) {
245      break;
246    }
247
248    // 4. If the sum of leading ones, trailing ones and unset bits in the
249    //    value is equal to the bit width of the value, it can be encoded.
250    if (lead_one + trail_one + (width - set_bits) == width) {
251      break;
252    }
253
254    // 5. If the most-significant half of the bitwise value is equal to
255    //    the least-significant half, return to step 2 using the
256    //    least-significant half of the value.
257    uint64_t mask = (UINT64_C(1) << (width >> 1)) - 1;
258    if ((value & mask) == ((value >> (width >> 1)) & mask)) {
259      width >>= 1;
260      set_bits >>= 1;
261      imm_s_fixed >>= 1;
262      continue;
263    }
264
265    // 6. Otherwise, the value can't be encoded.
266    return -1;
267  }
268
269  return (n << 12 | imm_r << 6 | imm_s);
270}
271
272// Maximum number of instructions to use for encoding the immediate.
273static const int max_num_ops_per_const_load = 2;
274
275/**
276 * @brief Return the number of fast halfwords in the given uint64_t integer.
277 * @details The input integer is split into 4 halfwords (bits 0-15, 16-31, 32-47, 48-63). The
278 *   number of fast halfwords (halfwords that are either 0 or 0xffff) is returned. See below for
279 *   a more accurate description.
280 * @param value The input 64-bit integer.
281 * @return Return @c retval such that (retval & 0x7) is the maximum between n and m, where n is
282 *   the number of halfwords with all bits unset (0) and m is the number of halfwords with all bits
283 *   set (0xffff). Additionally (retval & 0x8) is set when m > n.
284 */
285static int GetNumFastHalfWords(uint64_t value) {
286  unsigned int num_0000_halfwords = 0;
287  unsigned int num_ffff_halfwords = 0;
288  for (int shift = 0; shift < 64; shift += 16) {
289    uint16_t halfword = static_cast<uint16_t>(value >> shift);
290    if (halfword == 0)
291      num_0000_halfwords++;
292    else if (halfword == UINT16_C(0xffff))
293      num_ffff_halfwords++;
294  }
295  if (num_0000_halfwords >= num_ffff_halfwords) {
296    DCHECK_LE(num_0000_halfwords, 4U);
297    return num_0000_halfwords;
298  } else {
299    DCHECK_LE(num_ffff_halfwords, 4U);
300    return num_ffff_halfwords | 0x8;
301  }
302}
303
304// The InexpensiveConstantXXX variants below are used in the promotion algorithm to determine how a
305// constant is considered for promotion. If the constant is "inexpensive" then the promotion
306// algorithm will give it a low priority for promotion, even when it is referenced many times in
307// the code.
308
309bool Arm64Mir2Lir::InexpensiveConstantInt(int32_t value) {
310  // A 32-bit int can always be loaded with 2 instructions (and without using the literal pool).
311  // We therefore return true and give it a low priority for promotion.
312  return true;
313}
314
315bool Arm64Mir2Lir::InexpensiveConstantFloat(int32_t value) {
316  return EncodeImmSingle(value) >= 0;
317}
318
319bool Arm64Mir2Lir::InexpensiveConstantLong(int64_t value) {
320  int num_slow_halfwords = 4 - (GetNumFastHalfWords(value) & 0x7);
321  if (num_slow_halfwords <= max_num_ops_per_const_load) {
322    return true;
323  }
324  return (EncodeLogicalImmediate(/*is_wide=*/true, value) >= 0);
325}
326
327bool Arm64Mir2Lir::InexpensiveConstantDouble(int64_t value) {
328  return EncodeImmDouble(value) >= 0;
329}
330
331// The InexpensiveConstantXXX variants below are used to determine which A64 instructions to use
332// when one of the operands is an immediate (e.g. register version or immediate version of add).
333
334bool Arm64Mir2Lir::InexpensiveConstantInt(int32_t value, Instruction::Code opcode) {
335  switch (opcode) {
336  case Instruction::IF_EQ:
337  case Instruction::IF_NE:
338  case Instruction::IF_LT:
339  case Instruction::IF_GE:
340  case Instruction::IF_GT:
341  case Instruction::IF_LE:
342  case Instruction::ADD_INT:
343  case Instruction::ADD_INT_2ADDR:
344  case Instruction::SUB_INT:
345  case Instruction::SUB_INT_2ADDR:
346    // The code below is consistent with the implementation of OpRegRegImm().
347    {
348      uint32_t abs_value = (value == INT_MIN) ? value : std::abs(value);
349      if (abs_value < 0x1000) {
350        return true;
351      } else if ((abs_value & UINT64_C(0xfff)) == 0 && ((abs_value >> 12) < 0x1000)) {
352        return true;
353      }
354      return false;
355    }
356  case Instruction::SHL_INT:
357  case Instruction::SHL_INT_2ADDR:
358  case Instruction::SHR_INT:
359  case Instruction::SHR_INT_2ADDR:
360  case Instruction::USHR_INT:
361  case Instruction::USHR_INT_2ADDR:
362    return true;
363  case Instruction::AND_INT:
364  case Instruction::AND_INT_2ADDR:
365  case Instruction::AND_INT_LIT16:
366  case Instruction::AND_INT_LIT8:
367  case Instruction::OR_INT:
368  case Instruction::OR_INT_2ADDR:
369  case Instruction::OR_INT_LIT16:
370  case Instruction::OR_INT_LIT8:
371  case Instruction::XOR_INT:
372  case Instruction::XOR_INT_2ADDR:
373  case Instruction::XOR_INT_LIT16:
374  case Instruction::XOR_INT_LIT8:
375    if (value == 0 || value == INT32_C(-1)) {
376      return true;
377    }
378    return (EncodeLogicalImmediate(/*is_wide=*/false, value) >= 0);
379  default:
380    return false;
381  }
382}
383
384/*
385 * Load a immediate using one single instruction when possible; otherwise
386 * use a pair of movz and movk instructions.
387 *
388 * No additional register clobbering operation performed. Use this version when
389 * 1) r_dest is freshly returned from AllocTemp or
390 * 2) The codegen is under fixed register usage
391 */
392LIR* Arm64Mir2Lir::LoadConstantNoClobber(RegStorage r_dest, int value) {
393  LIR* res;
394
395  if (r_dest.IsFloat()) {
396    return LoadFPConstantValue(r_dest, value);
397  }
398
399  if (r_dest.Is64Bit()) {
400    return LoadConstantWide(r_dest, value);
401  }
402
403  // Loading SP/ZR with an immediate is not supported.
404  DCHECK(!A64_REG_IS_SP(r_dest.GetReg()));
405  DCHECK(!A64_REG_IS_ZR(r_dest.GetReg()));
406
407  // Compute how many movk, movz instructions are needed to load the value.
408  uint16_t high_bits = High16Bits(value);
409  uint16_t low_bits = Low16Bits(value);
410
411  bool low_fast = ((uint16_t)(low_bits + 1) <= 1);
412  bool high_fast = ((uint16_t)(high_bits + 1) <= 1);
413
414  if (LIKELY(low_fast || high_fast)) {
415    // 1 instruction is enough to load the immediate.
416    if (LIKELY(low_bits == high_bits)) {
417      // Value is either 0 or -1: we can just use wzr.
418      ArmOpcode opcode = LIKELY(low_bits == 0) ? kA64Mov2rr : kA64Mvn2rr;
419      res = NewLIR2(opcode, r_dest.GetReg(), rwzr);
420    } else {
421      uint16_t uniform_bits, useful_bits;
422      int shift;
423
424      if (LIKELY(high_fast)) {
425        shift = 0;
426        uniform_bits = high_bits;
427        useful_bits = low_bits;
428      } else {
429        shift = 1;
430        uniform_bits = low_bits;
431        useful_bits = high_bits;
432      }
433
434      if (UNLIKELY(uniform_bits != 0)) {
435        res = NewLIR3(kA64Movn3rdM, r_dest.GetReg(), ~useful_bits, shift);
436      } else {
437        res = NewLIR3(kA64Movz3rdM, r_dest.GetReg(), useful_bits, shift);
438      }
439    }
440  } else {
441    // movk, movz require 2 instructions. Try detecting logical immediates.
442    int log_imm = EncodeLogicalImmediate(/*is_wide=*/false, value);
443    if (log_imm >= 0) {
444      res = NewLIR3(kA64Orr3Rrl, r_dest.GetReg(), rwzr, log_imm);
445    } else {
446      // Use 2 instructions.
447      res = NewLIR3(kA64Movz3rdM, r_dest.GetReg(), low_bits, 0);
448      NewLIR3(kA64Movk3rdM, r_dest.GetReg(), high_bits, 1);
449    }
450  }
451
452  return res;
453}
454
455// TODO: clean up the names. LoadConstantWide() should really be LoadConstantNoClobberWide().
456LIR* Arm64Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) {
457  if (r_dest.IsFloat()) {
458    return LoadFPConstantValueWide(r_dest, value);
459  }
460
461  DCHECK(r_dest.Is64Bit());
462
463  // Loading SP/ZR with an immediate is not supported.
464  DCHECK(!A64_REG_IS_SP(r_dest.GetReg()));
465  DCHECK(!A64_REG_IS_ZR(r_dest.GetReg()));
466
467  if (LIKELY(value == INT64_C(0) || value == INT64_C(-1))) {
468    // value is either 0 or -1: we can just use xzr.
469    ArmOpcode opcode = LIKELY(value == 0) ? WIDE(kA64Mov2rr) : WIDE(kA64Mvn2rr);
470    return NewLIR2(opcode, r_dest.GetReg(), rxzr);
471  }
472
473  // At least one in value's halfwords is not 0x0, nor 0xffff: find out how many.
474  uint64_t uvalue = static_cast<uint64_t>(value);
475  int num_fast_halfwords = GetNumFastHalfWords(uvalue);
476  int num_slow_halfwords = 4 - (num_fast_halfwords & 0x7);
477  bool more_ffff_halfwords = (num_fast_halfwords & 0x8) != 0;
478
479  if (num_slow_halfwords > 1) {
480    // A single movz/movn is not enough. Try the logical immediate route.
481    int log_imm = EncodeLogicalImmediate(/*is_wide=*/true, value);
482    if (log_imm >= 0) {
483      return NewLIR3(WIDE(kA64Orr3Rrl), r_dest.GetReg(), rxzr, log_imm);
484    }
485  }
486
487  if (num_slow_halfwords <= max_num_ops_per_const_load) {
488    // We can encode the number using a movz/movn followed by one or more movk.
489    ArmOpcode op;
490    uint16_t background;
491    LIR* res = nullptr;
492
493    // Decide whether to use a movz or a movn.
494    if (more_ffff_halfwords) {
495      op = WIDE(kA64Movn3rdM);
496      background = 0xffff;
497    } else {
498      op = WIDE(kA64Movz3rdM);
499      background = 0;
500    }
501
502    // Emit the first instruction (movz, movn).
503    int shift;
504    for (shift = 0; shift < 4; shift++) {
505      uint16_t halfword = static_cast<uint16_t>(uvalue >> (shift << 4));
506      if (halfword != background) {
507        res = NewLIR3(op, r_dest.GetReg(), halfword ^ background, shift);
508        break;
509      }
510    }
511
512    // Emit the movk instructions.
513    for (shift++; shift < 4; shift++) {
514      uint16_t halfword = static_cast<uint16_t>(uvalue >> (shift << 4));
515      if (halfword != background) {
516        NewLIR3(WIDE(kA64Movk3rdM), r_dest.GetReg(), halfword, shift);
517      }
518    }
519    return res;
520  }
521
522  // Use the literal pool.
523  int32_t val_lo = Low32Bits(value);
524  int32_t val_hi = High32Bits(value);
525  LIR* data_target = ScanLiteralPoolWide(literal_list_, val_lo, val_hi);
526  if (data_target == NULL) {
527    data_target = AddWideData(&literal_list_, val_lo, val_hi);
528  }
529
530  ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
531  LIR *res = RawLIR(current_dalvik_offset_, WIDE(kA64Ldr2rp),
532                    r_dest.GetReg(), 0, 0, 0, 0, data_target);
533  AppendLIR(res);
534  return res;
535}
536
537LIR* Arm64Mir2Lir::OpUnconditionalBranch(LIR* target) {
538  LIR* res = NewLIR1(kA64B1t, 0 /* offset to be patched  during assembly */);
539  res->target = target;
540  return res;
541}
542
543LIR* Arm64Mir2Lir::OpCondBranch(ConditionCode cc, LIR* target) {
544  LIR* branch = NewLIR2(kA64B2ct, ArmConditionEncoding(cc),
545                        0 /* offset to be patched */);
546  branch->target = target;
547  return branch;
548}
549
550LIR* Arm64Mir2Lir::OpReg(OpKind op, RegStorage r_dest_src) {
551  ArmOpcode opcode = kA64Brk1d;
552  switch (op) {
553    case kOpBlx:
554      opcode = kA64Blr1x;
555      break;
556    // TODO(Arm64): port kThumbBx.
557    // case kOpBx:
558    //   opcode = kThumbBx;
559    //   break;
560    default:
561      LOG(FATAL) << "Bad opcode " << op;
562  }
563  return NewLIR1(opcode, r_dest_src.GetReg());
564}
565
566LIR* Arm64Mir2Lir::OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, int shift) {
567  ArmOpcode wide = (r_dest_src1.Is64Bit()) ? WIDE(0) : UNWIDE(0);
568  CHECK_EQ(r_dest_src1.Is64Bit(), r_src2.Is64Bit());
569  ArmOpcode opcode = kA64Brk1d;
570
571  switch (op) {
572    case kOpCmn:
573      opcode = kA64Cmn3rro;
574      break;
575    case kOpCmp:
576      opcode = kA64Cmp3rro;
577      break;
578    case kOpMov:
579      opcode = kA64Mov2rr;
580      break;
581    case kOpMvn:
582      opcode = kA64Mvn2rr;
583      break;
584    case kOpNeg:
585      opcode = kA64Neg3rro;
586      break;
587    case kOpTst:
588      opcode = kA64Tst3rro;
589      break;
590    case kOpRev:
591      DCHECK_EQ(shift, 0);
592      // Binary, but rm is encoded twice.
593      return NewLIR2(kA64Rev2rr | wide, r_dest_src1.GetReg(), r_src2.GetReg());
594      break;
595    case kOpRevsh:
596      // Binary, but rm is encoded twice.
597      NewLIR2(kA64Rev162rr | wide, r_dest_src1.GetReg(), r_src2.GetReg());
598      // "sxth r1, r2" is "sbfm r1, r2, #0, #15"
599      return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1.GetReg(), r_dest_src1.GetReg(), 0, 15);
600      break;
601    case kOp2Byte:
602      DCHECK_EQ(shift, ENCODE_NO_SHIFT);
603      // "sbfx r1, r2, #imm1, #imm2" is "sbfm r1, r2, #imm1, #(imm1 + imm2 - 1)".
604      // For now we use sbfm directly.
605      return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 7);
606    case kOp2Short:
607      DCHECK_EQ(shift, ENCODE_NO_SHIFT);
608      // For now we use sbfm rather than its alias, sbfx.
609      return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 15);
610    case kOp2Char:
611      // "ubfx r1, r2, #imm1, #imm2" is "ubfm r1, r2, #imm1, #(imm1 + imm2 - 1)".
612      // For now we use ubfm directly.
613      DCHECK_EQ(shift, ENCODE_NO_SHIFT);
614      return NewLIR4(kA64Ubfm4rrdd | wide, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 15);
615    default:
616      return OpRegRegRegShift(op, r_dest_src1, r_dest_src1, r_src2, shift);
617  }
618
619  DCHECK(!IsPseudoLirOp(opcode));
620  if (EncodingMap[opcode].flags & IS_BINARY_OP) {
621    DCHECK_EQ(shift, ENCODE_NO_SHIFT);
622    return NewLIR2(opcode | wide, r_dest_src1.GetReg(), r_src2.GetReg());
623  } else if (EncodingMap[opcode].flags & IS_TERTIARY_OP) {
624    ArmEncodingKind kind = EncodingMap[opcode].field_loc[2].kind;
625    if (kind == kFmtShift) {
626      return NewLIR3(opcode | wide, r_dest_src1.GetReg(), r_src2.GetReg(), shift);
627    }
628  }
629
630  LOG(FATAL) << "Unexpected encoding operand count";
631  return NULL;
632}
633
634LIR* Arm64Mir2Lir::OpRegRegExtend(OpKind op, RegStorage r_dest_src1, RegStorage r_src2,
635                                  A64RegExtEncodings ext, uint8_t amount) {
636  ArmOpcode wide = (r_dest_src1.Is64Bit()) ? WIDE(0) : UNWIDE(0);
637  ArmOpcode opcode = kA64Brk1d;
638
639  switch (op) {
640    case kOpCmn:
641      opcode = kA64Cmn3Rre;
642      break;
643    case kOpCmp:
644      opcode = kA64Cmp3Rre;
645      break;
646    case kOpAdd:
647      // Note: intentional fallthrough
648    case kOpSub:
649      return OpRegRegRegExtend(op, r_dest_src1, r_dest_src1, r_src2, ext, amount);
650      break;
651    default:
652      LOG(FATAL) << "Bad Opcode: " << opcode;
653      break;
654  }
655
656  DCHECK(!IsPseudoLirOp(opcode));
657  if (EncodingMap[opcode].flags & IS_TERTIARY_OP) {
658    ArmEncodingKind kind = EncodingMap[opcode].field_loc[2].kind;
659    if (kind == kFmtExtend) {
660      return NewLIR3(opcode | wide, r_dest_src1.GetReg(), r_src2.GetReg(),
661                     EncodeExtend(ext, amount));
662    }
663  }
664
665  LOG(FATAL) << "Unexpected encoding operand count";
666  return NULL;
667}
668
669LIR* Arm64Mir2Lir::OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2) {
670  /* RegReg operations with SP in first parameter need extended register instruction form.
671   * Only CMN, CMP, ADD & SUB instructions are implemented.
672   */
673  if (r_dest_src1 == rs_sp) {
674    return OpRegRegExtend(op, r_dest_src1, r_src2, kA64Uxtx, 0);
675  } else {
676    return OpRegRegShift(op, r_dest_src1, r_src2, ENCODE_NO_SHIFT);
677  }
678}
679
680LIR* Arm64Mir2Lir::OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset, MoveType move_type) {
681  UNIMPLEMENTED(FATAL);
682  return nullptr;
683}
684
685LIR* Arm64Mir2Lir::OpMovMemReg(RegStorage r_base, int offset, RegStorage r_src, MoveType move_type) {
686  UNIMPLEMENTED(FATAL);
687  return nullptr;
688}
689
690LIR* Arm64Mir2Lir::OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest, RegStorage r_src) {
691  LOG(FATAL) << "Unexpected use of OpCondRegReg for Arm64";
692  return NULL;
693}
694
695LIR* Arm64Mir2Lir::OpRegRegRegShift(OpKind op, RegStorage r_dest, RegStorage r_src1,
696                                    RegStorage r_src2, int shift) {
697  ArmOpcode opcode = kA64Brk1d;
698
699  switch (op) {
700    case kOpAdd:
701      opcode = kA64Add4rrro;
702      break;
703    case kOpSub:
704      opcode = kA64Sub4rrro;
705      break;
706    // case kOpRsub:
707    //   opcode = kA64RsubWWW;
708    //   break;
709    case kOpAdc:
710      opcode = kA64Adc3rrr;
711      break;
712    case kOpAnd:
713      opcode = kA64And4rrro;
714      break;
715    case kOpXor:
716      opcode = kA64Eor4rrro;
717      break;
718    case kOpMul:
719      opcode = kA64Mul3rrr;
720      break;
721    case kOpDiv:
722      opcode = kA64Sdiv3rrr;
723      break;
724    case kOpOr:
725      opcode = kA64Orr4rrro;
726      break;
727    case kOpSbc:
728      opcode = kA64Sbc3rrr;
729      break;
730    case kOpLsl:
731      opcode = kA64Lsl3rrr;
732      break;
733    case kOpLsr:
734      opcode = kA64Lsr3rrr;
735      break;
736    case kOpAsr:
737      opcode = kA64Asr3rrr;
738      break;
739    case kOpRor:
740      opcode = kA64Ror3rrr;
741      break;
742    default:
743      LOG(FATAL) << "Bad opcode: " << op;
744      break;
745  }
746
747  // The instructions above belong to two kinds:
748  // - 4-operands instructions, where the last operand is a shift/extend immediate,
749  // - 3-operands instructions with no shift/extend.
750  ArmOpcode widened_opcode = r_dest.Is64Bit() ? WIDE(opcode) : opcode;
751  CHECK_EQ(r_dest.Is64Bit(), r_src1.Is64Bit());
752  CHECK_EQ(r_dest.Is64Bit(), r_src2.Is64Bit());
753  if (EncodingMap[opcode].flags & IS_QUAD_OP) {
754    DCHECK(!IsExtendEncoding(shift));
755    return NewLIR4(widened_opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg(), shift);
756  } else {
757    DCHECK(EncodingMap[opcode].flags & IS_TERTIARY_OP);
758    DCHECK_EQ(shift, ENCODE_NO_SHIFT);
759    return NewLIR3(widened_opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg());
760  }
761}
762
763LIR* Arm64Mir2Lir::OpRegRegRegExtend(OpKind op, RegStorage r_dest, RegStorage r_src1,
764                                     RegStorage r_src2, A64RegExtEncodings ext, uint8_t amount) {
765  ArmOpcode opcode = kA64Brk1d;
766
767  switch (op) {
768    case kOpAdd:
769      opcode = kA64Add4RRre;
770      break;
771    case kOpSub:
772      opcode = kA64Sub4RRre;
773      break;
774    default:
775      LOG(FATAL) << "Unimplemented opcode: " << op;
776      break;
777  }
778  ArmOpcode widened_opcode = r_dest.Is64Bit() ? WIDE(opcode) : opcode;
779
780  if (r_dest.Is64Bit()) {
781    CHECK(r_src1.Is64Bit());
782
783    // dest determines whether the op is wide or not. Up-convert src2 when necessary.
784    // Note: this is not according to aarch64 specifications, but our encoding.
785    if (!r_src2.Is64Bit()) {
786      r_src2 = As64BitReg(r_src2);
787    }
788  } else {
789    CHECK(!r_src1.Is64Bit());
790    CHECK(!r_src2.Is64Bit());
791  }
792
793  // Sanity checks.
794  //    1) Amount is in the range 0..4
795  CHECK_LE(amount, 4);
796
797  return NewLIR4(widened_opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg(),
798                 EncodeExtend(ext, amount));
799}
800
801LIR* Arm64Mir2Lir::OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2) {
802  return OpRegRegRegShift(op, r_dest, r_src1, r_src2, ENCODE_NO_SHIFT);
803}
804
805LIR* Arm64Mir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value) {
806  return OpRegRegImm64(op, r_dest, r_src1, static_cast<int64_t>(value));
807}
808
809LIR* Arm64Mir2Lir::OpRegRegImm64(OpKind op, RegStorage r_dest, RegStorage r_src1, int64_t value) {
810  LIR* res;
811  bool neg = (value < 0);
812  uint64_t abs_value = (neg & !(value == LLONG_MIN)) ? -value : value;
813  ArmOpcode opcode = kA64Brk1d;
814  ArmOpcode alt_opcode = kA64Brk1d;
815  bool is_logical = false;
816  bool is_wide = r_dest.Is64Bit();
817  ArmOpcode wide = (is_wide) ? WIDE(0) : UNWIDE(0);
818  int info = 0;
819
820  switch (op) {
821    case kOpLsl: {
822      // "lsl w1, w2, #imm" is an alias of "ubfm w1, w2, #(-imm MOD 32), #(31-imm)"
823      // and "lsl x1, x2, #imm" of "ubfm x1, x2, #(-imm MOD 64), #(63-imm)".
824      // For now, we just use ubfm directly.
825      int max_value = (is_wide) ? 63 : 31;
826      return NewLIR4(kA64Ubfm4rrdd | wide, r_dest.GetReg(), r_src1.GetReg(),
827                     (-value) & max_value, max_value - value);
828    }
829    case kOpLsr:
830      return NewLIR3(kA64Lsr3rrd | wide, r_dest.GetReg(), r_src1.GetReg(), value);
831    case kOpAsr:
832      return NewLIR3(kA64Asr3rrd | wide, r_dest.GetReg(), r_src1.GetReg(), value);
833    case kOpRor:
834      // "ror r1, r2, #imm" is an alias of "extr r1, r2, r2, #imm".
835      // For now, we just use extr directly.
836      return NewLIR4(kA64Extr4rrrd | wide, r_dest.GetReg(), r_src1.GetReg(), r_src1.GetReg(),
837                     value);
838    case kOpAdd:
839      neg = !neg;
840      // Note: intentional fallthrough
841    case kOpSub:
842      // Add and sub below read/write sp rather than xzr.
843      if (abs_value < 0x1000) {
844        opcode = (neg) ? kA64Add4RRdT : kA64Sub4RRdT;
845        return NewLIR4(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), abs_value, 0);
846      } else if ((abs_value & UINT64_C(0xfff)) == 0 && ((abs_value >> 12) < 0x1000)) {
847        opcode = (neg) ? kA64Add4RRdT : kA64Sub4RRdT;
848        return NewLIR4(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), abs_value >> 12, 1);
849      } else {
850        alt_opcode = (op == kOpAdd) ? kA64Add4RRre : kA64Sub4RRre;
851        info = EncodeExtend(is_wide ? kA64Uxtx : kA64Uxtw, 0);
852      }
853      break;
854    case kOpAdc:
855      alt_opcode = kA64Adc3rrr;
856      break;
857    case kOpSbc:
858      alt_opcode = kA64Sbc3rrr;
859      break;
860    case kOpOr:
861      is_logical = true;
862      opcode = kA64Orr3Rrl;
863      alt_opcode = kA64Orr4rrro;
864      break;
865    case kOpAnd:
866      is_logical = true;
867      opcode = kA64And3Rrl;
868      alt_opcode = kA64And4rrro;
869      break;
870    case kOpXor:
871      is_logical = true;
872      opcode = kA64Eor3Rrl;
873      alt_opcode = kA64Eor4rrro;
874      break;
875    case kOpMul:
876      // TUNING: power of 2, shift & add
877      alt_opcode = kA64Mul3rrr;
878      break;
879    default:
880      LOG(FATAL) << "Bad opcode: " << op;
881  }
882
883  if (is_logical) {
884    int log_imm = EncodeLogicalImmediate(is_wide, value);
885    if (log_imm >= 0) {
886      return NewLIR3(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), log_imm);
887    } else {
888      // When the immediate is either 0 or ~0, the logical operation can be trivially reduced
889      // to a - possibly negated - assignment.
890      if (value == 0) {
891        switch (op) {
892          case kOpOr:
893          case kOpXor:
894            // Or/Xor by zero reduces to an assignment.
895            return NewLIR2(kA64Mov2rr | wide, r_dest.GetReg(), r_src1.GetReg());
896          default:
897            // And by zero reduces to a `mov rdest, xzr'.
898            DCHECK(op == kOpAnd);
899            return NewLIR2(kA64Mov2rr | wide, r_dest.GetReg(), (is_wide) ? rxzr : rwzr);
900        }
901      } else if (value == INT64_C(-1)
902                 || (!is_wide && static_cast<uint32_t>(value) == ~UINT32_C(0))) {
903        switch (op) {
904          case kOpAnd:
905            // And by -1 reduces to an assignment.
906            return NewLIR2(kA64Mov2rr | wide, r_dest.GetReg(), r_src1.GetReg());
907          case kOpXor:
908            // Xor by -1 reduces to an `mvn rdest, rsrc'.
909            return NewLIR2(kA64Mvn2rr | wide, r_dest.GetReg(), r_src1.GetReg());
910          default:
911            // Or by -1 reduces to a `mvn rdest, xzr'.
912            DCHECK(op == kOpOr);
913            return NewLIR2(kA64Mvn2rr | wide, r_dest.GetReg(), (is_wide) ? rxzr : rwzr);
914        }
915      }
916    }
917  }
918
919  RegStorage r_scratch;
920  if (is_wide) {
921    r_scratch = AllocTempWide();
922    LoadConstantWide(r_scratch, value);
923  } else {
924    r_scratch = AllocTemp();
925    LoadConstant(r_scratch, value);
926  }
927  if (EncodingMap[alt_opcode].flags & IS_QUAD_OP)
928    res = NewLIR4(alt_opcode | wide, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg(), info);
929  else
930    res = NewLIR3(alt_opcode | wide, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg());
931  FreeTemp(r_scratch);
932  return res;
933}
934
935LIR* Arm64Mir2Lir::OpRegImm(OpKind op, RegStorage r_dest_src1, int value) {
936  return OpRegImm64(op, r_dest_src1, static_cast<int64_t>(value));
937}
938
939LIR* Arm64Mir2Lir::OpRegImm64(OpKind op, RegStorage r_dest_src1, int64_t value) {
940  ArmOpcode wide = (r_dest_src1.Is64Bit()) ? WIDE(0) : UNWIDE(0);
941  ArmOpcode opcode = kA64Brk1d;
942  ArmOpcode neg_opcode = kA64Brk1d;
943  bool shift;
944  bool neg = (value < 0);
945  uint64_t abs_value = (neg & !(value == LLONG_MIN)) ? -value : value;
946
947  if (LIKELY(abs_value < 0x1000)) {
948    // abs_value is a 12-bit immediate.
949    shift = false;
950  } else if ((abs_value & UINT64_C(0xfff)) == 0 && ((abs_value >> 12) < 0x1000)) {
951    // abs_value is a shifted 12-bit immediate.
952    shift = true;
953    abs_value >>= 12;
954  } else if (LIKELY(abs_value < 0x1000000 && (op == kOpAdd || op == kOpSub))) {
955    // Note: It is better to use two ADD/SUB instead of loading a number to a temp register.
956    // This works for both normal registers and SP.
957    // For a frame size == 0x2468, it will be encoded as:
958    //   sub sp, #0x2000
959    //   sub sp, #0x468
960    if (neg) {
961      op = (op == kOpAdd) ? kOpSub : kOpAdd;
962    }
963    OpRegImm64(op, r_dest_src1, abs_value & (~INT64_C(0xfff)));
964    return OpRegImm64(op, r_dest_src1, abs_value & 0xfff);
965  } else {
966    RegStorage r_tmp;
967    LIR* res;
968    if (IS_WIDE(wide)) {
969      r_tmp = AllocTempWide();
970      res = LoadConstantWide(r_tmp, value);
971    } else {
972      r_tmp = AllocTemp();
973      res = LoadConstant(r_tmp, value);
974    }
975    OpRegReg(op, r_dest_src1, r_tmp);
976    FreeTemp(r_tmp);
977    return res;
978  }
979
980  switch (op) {
981    case kOpAdd:
982      neg_opcode = kA64Sub4RRdT;
983      opcode = kA64Add4RRdT;
984      break;
985    case kOpSub:
986      neg_opcode = kA64Add4RRdT;
987      opcode = kA64Sub4RRdT;
988      break;
989    case kOpCmp:
990      neg_opcode = kA64Cmn3RdT;
991      opcode = kA64Cmp3RdT;
992      break;
993    default:
994      LOG(FATAL) << "Bad op-kind in OpRegImm: " << op;
995      break;
996  }
997
998  if (UNLIKELY(neg))
999    opcode = neg_opcode;
1000
1001  if (EncodingMap[opcode].flags & IS_QUAD_OP)
1002    return NewLIR4(opcode | wide, r_dest_src1.GetReg(), r_dest_src1.GetReg(), abs_value,
1003                   (shift) ? 1 : 0);
1004  else
1005    return NewLIR3(opcode | wide, r_dest_src1.GetReg(), abs_value, (shift) ? 1 : 0);
1006}
1007
1008int Arm64Mir2Lir::EncodeShift(int shift_type, int amount) {
1009  DCHECK_EQ(shift_type & 0x3, shift_type);
1010  DCHECK_EQ(amount & 0x3f, amount);
1011  return ((shift_type & 0x3) << 7) | (amount & 0x3f);
1012}
1013
1014int Arm64Mir2Lir::EncodeExtend(int extend_type, int amount) {
1015  DCHECK_EQ(extend_type & 0x7, extend_type);
1016  DCHECK_EQ(amount & 0x7, amount);
1017  return  (1 << 6) | ((extend_type & 0x7) << 3) | (amount & 0x7);
1018}
1019
1020bool Arm64Mir2Lir::IsExtendEncoding(int encoded_value) {
1021  return ((1 << 6) & encoded_value) != 0;
1022}
1023
1024LIR* Arm64Mir2Lir::LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest,
1025                                   int scale, OpSize size) {
1026  LIR* load;
1027  int expected_scale = 0;
1028  ArmOpcode opcode = kA64Brk1d;
1029  r_base = Check64BitReg(r_base);
1030
1031  // TODO(Arm64): The sign extension of r_index should be carried out by using an extended
1032  //   register offset load (rather than doing the sign extension in a separate instruction).
1033  if (r_index.Is32Bit()) {
1034    // Assemble: ``sxtw xN, wN''.
1035    r_index = As64BitReg(r_index);
1036    NewLIR4(WIDE(kA64Sbfm4rrdd), r_index.GetReg(), r_index.GetReg(), 0, 31);
1037  }
1038
1039  if (r_dest.IsFloat()) {
1040    if (r_dest.IsDouble()) {
1041      DCHECK(size == k64 || size == kDouble);
1042      expected_scale = 3;
1043      opcode = FWIDE(kA64Ldr4fXxG);
1044    } else {
1045      DCHECK(r_dest.IsSingle());
1046      DCHECK(size == k32 || size == kSingle);
1047      expected_scale = 2;
1048      opcode = kA64Ldr4fXxG;
1049    }
1050
1051    DCHECK(scale == 0 || scale == expected_scale);
1052    return NewLIR4(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg(),
1053                   (scale != 0) ? 1 : 0);
1054  }
1055
1056  switch (size) {
1057    case kDouble:
1058    case kWord:
1059    case k64:
1060      r_dest = Check64BitReg(r_dest);
1061      opcode = WIDE(kA64Ldr4rXxG);
1062      expected_scale = 3;
1063      break;
1064    case kSingle:     // Intentional fall-through.
1065    case k32:         // Intentional fall-through.
1066    case kReference:
1067      r_dest = Check32BitReg(r_dest);
1068      opcode = kA64Ldr4rXxG;
1069      expected_scale = 2;
1070      break;
1071    case kUnsignedHalf:
1072      r_dest = Check32BitReg(r_dest);
1073      opcode = kA64Ldrh4wXxd;
1074      expected_scale = 1;
1075      break;
1076    case kSignedHalf:
1077      r_dest = Check32BitReg(r_dest);
1078      opcode = kA64Ldrsh4rXxd;
1079      expected_scale = 1;
1080      break;
1081    case kUnsignedByte:
1082      r_dest = Check32BitReg(r_dest);
1083      opcode = kA64Ldrb3wXx;
1084      break;
1085    case kSignedByte:
1086      r_dest = Check32BitReg(r_dest);
1087      opcode = kA64Ldrsb3rXx;
1088      break;
1089    default:
1090      LOG(FATAL) << "Bad size: " << size;
1091  }
1092
1093  if (UNLIKELY(expected_scale == 0)) {
1094    // This is a tertiary op (e.g. ldrb, ldrsb), it does not not support scale.
1095    DCHECK_NE(EncodingMap[UNWIDE(opcode)].flags & IS_TERTIARY_OP, 0U);
1096    DCHECK_EQ(scale, 0);
1097    load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg());
1098  } else {
1099    DCHECK(scale == 0 || scale == expected_scale);
1100    load = NewLIR4(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg(),
1101                   (scale != 0) ? 1 : 0);
1102  }
1103
1104  return load;
1105}
1106
1107LIR* Arm64Mir2Lir::LoadRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest,
1108                                  int scale) {
1109  return LoadBaseIndexed(r_base, r_index, As32BitReg(r_dest), scale, kReference);
1110}
1111
1112LIR* Arm64Mir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src,
1113                                    int scale, OpSize size) {
1114  LIR* store;
1115  int expected_scale = 0;
1116  ArmOpcode opcode = kA64Brk1d;
1117  r_base = Check64BitReg(r_base);
1118
1119  // TODO(Arm64): The sign extension of r_index should be carried out by using an extended
1120  //   register offset store (rather than doing the sign extension in a separate instruction).
1121  if (r_index.Is32Bit()) {
1122    // Assemble: ``sxtw xN, wN''.
1123    r_index = As64BitReg(r_index);
1124    NewLIR4(WIDE(kA64Sbfm4rrdd), r_index.GetReg(), r_index.GetReg(), 0, 31);
1125  }
1126
1127  if (r_src.IsFloat()) {
1128    if (r_src.IsDouble()) {
1129      DCHECK(size == k64 || size == kDouble);
1130      expected_scale = 3;
1131      opcode = FWIDE(kA64Str4fXxG);
1132    } else {
1133      DCHECK(r_src.IsSingle());
1134      DCHECK(size == k32 || size == kSingle);
1135      expected_scale = 2;
1136      opcode = kA64Str4fXxG;
1137    }
1138
1139    DCHECK(scale == 0 || scale == expected_scale);
1140    return NewLIR4(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg(),
1141                   (scale != 0) ? 1 : 0);
1142  }
1143
1144  switch (size) {
1145    case kDouble:     // Intentional fall-trough.
1146    case kWord:       // Intentional fall-trough.
1147    case k64:
1148      r_src = Check64BitReg(r_src);
1149      opcode = WIDE(kA64Str4rXxG);
1150      expected_scale = 3;
1151      break;
1152    case kSingle:     // Intentional fall-trough.
1153    case k32:         // Intentional fall-trough.
1154    case kReference:
1155      r_src = Check32BitReg(r_src);
1156      opcode = kA64Str4rXxG;
1157      expected_scale = 2;
1158      break;
1159    case kUnsignedHalf:
1160    case kSignedHalf:
1161      r_src = Check32BitReg(r_src);
1162      opcode = kA64Strh4wXxd;
1163      expected_scale = 1;
1164      break;
1165    case kUnsignedByte:
1166    case kSignedByte:
1167      r_src = Check32BitReg(r_src);
1168      opcode = kA64Strb3wXx;
1169      break;
1170    default:
1171      LOG(FATAL) << "Bad size: " << size;
1172  }
1173
1174  if (UNLIKELY(expected_scale == 0)) {
1175    // This is a tertiary op (e.g. strb), it does not not support scale.
1176    DCHECK_NE(EncodingMap[UNWIDE(opcode)].flags & IS_TERTIARY_OP, 0U);
1177    DCHECK_EQ(scale, 0);
1178    store = NewLIR3(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg());
1179  } else {
1180    store = NewLIR4(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg(),
1181                    (scale != 0) ? 1 : 0);
1182  }
1183
1184  return store;
1185}
1186
1187LIR* Arm64Mir2Lir::StoreRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src,
1188                                   int scale) {
1189  return StoreBaseIndexed(r_base, r_index, As32BitReg(r_src), scale, kReference);
1190}
1191
1192/*
1193 * Load value from base + displacement.  Optionally perform null check
1194 * on base (which must have an associated s_reg and MIR).  If not
1195 * performing null check, incoming MIR can be null.
1196 */
1197LIR* Arm64Mir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest,
1198                                    OpSize size) {
1199  LIR* load = NULL;
1200  ArmOpcode opcode = kA64Brk1d;
1201  ArmOpcode alt_opcode = kA64Brk1d;
1202  int scale = 0;
1203
1204  switch (size) {
1205    case kDouble:     // Intentional fall-through.
1206    case kWord:       // Intentional fall-through.
1207    case k64:
1208      r_dest = Check64BitReg(r_dest);
1209      scale = 3;
1210      if (r_dest.IsFloat()) {
1211        DCHECK(r_dest.IsDouble());
1212        opcode = FWIDE(kA64Ldr3fXD);
1213        alt_opcode = FWIDE(kA64Ldur3fXd);
1214      } else {
1215        opcode = WIDE(kA64Ldr3rXD);
1216        alt_opcode = WIDE(kA64Ldur3rXd);
1217      }
1218      break;
1219    case kSingle:     // Intentional fall-through.
1220    case k32:         // Intentional fall-trough.
1221    case kReference:
1222      r_dest = Check32BitReg(r_dest);
1223      scale = 2;
1224      if (r_dest.IsFloat()) {
1225        DCHECK(r_dest.IsSingle());
1226        opcode = kA64Ldr3fXD;
1227      } else {
1228        opcode = kA64Ldr3rXD;
1229      }
1230      break;
1231    case kUnsignedHalf:
1232      scale = 1;
1233      opcode = kA64Ldrh3wXF;
1234      break;
1235    case kSignedHalf:
1236      scale = 1;
1237      opcode = kA64Ldrsh3rXF;
1238      break;
1239    case kUnsignedByte:
1240      opcode = kA64Ldrb3wXd;
1241      break;
1242    case kSignedByte:
1243      opcode = kA64Ldrsb3rXd;
1244      break;
1245    default:
1246      LOG(FATAL) << "Bad size: " << size;
1247  }
1248
1249  bool displacement_is_aligned = (displacement & ((1 << scale) - 1)) == 0;
1250  int scaled_disp = displacement >> scale;
1251  if (displacement_is_aligned && scaled_disp >= 0 && scaled_disp < 4096) {
1252    // Can use scaled load.
1253    load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), scaled_disp);
1254  } else if (alt_opcode != kA64Brk1d && IS_SIGNED_IMM9(displacement)) {
1255    // Can use unscaled load.
1256    load = NewLIR3(alt_opcode, r_dest.GetReg(), r_base.GetReg(), displacement);
1257  } else {
1258    // Use long sequence.
1259    // TODO: cleaner support for index/displacement registers?  Not a reference, but must match width.
1260    RegStorage r_scratch = AllocTempWide();
1261    LoadConstantWide(r_scratch, displacement);
1262    load = LoadBaseIndexed(r_base, r_scratch, r_dest, 0, size);
1263    FreeTemp(r_scratch);
1264  }
1265
1266  // TODO: in future may need to differentiate Dalvik accesses w/ spills
1267  if (mem_ref_type_ == ResourceMask::kDalvikReg) {
1268    DCHECK(r_base == rs_sp);
1269    AnnotateDalvikRegAccess(load, displacement >> 2, true /* is_load */, r_dest.Is64Bit());
1270  }
1271  return load;
1272}
1273
1274LIR* Arm64Mir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
1275                                OpSize size, VolatileKind is_volatile) {
1276  // LoadBaseDisp() will emit correct insn for atomic load on arm64
1277  // assuming r_dest is correctly prepared using RegClassForFieldLoadStore().
1278
1279  LIR* load = LoadBaseDispBody(r_base, displacement, r_dest, size);
1280
1281  if (UNLIKELY(is_volatile == kVolatile)) {
1282    // TODO: This should generate an acquire load instead of the barrier.
1283    GenMemBarrier(kLoadAny);
1284  }
1285
1286  return load;
1287}
1288
1289LIR* Arm64Mir2Lir::LoadRefDisp(RegStorage r_base, int displacement, RegStorage r_dest,
1290                               VolatileKind is_volatile) {
1291  return LoadBaseDisp(r_base, displacement, As32BitReg(r_dest), kReference, is_volatile);
1292}
1293
1294LIR* Arm64Mir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src,
1295                                     OpSize size) {
1296  LIR* store = NULL;
1297  ArmOpcode opcode = kA64Brk1d;
1298  ArmOpcode alt_opcode = kA64Brk1d;
1299  int scale = 0;
1300
1301  switch (size) {
1302    case kDouble:     // Intentional fall-through.
1303    case kWord:       // Intentional fall-through.
1304    case k64:
1305      r_src = Check64BitReg(r_src);
1306      scale = 3;
1307      if (r_src.IsFloat()) {
1308        DCHECK(r_src.IsDouble());
1309        opcode = FWIDE(kA64Str3fXD);
1310        alt_opcode = FWIDE(kA64Stur3fXd);
1311      } else {
1312        opcode = FWIDE(kA64Str3rXD);
1313        alt_opcode = FWIDE(kA64Stur3rXd);
1314      }
1315      break;
1316    case kSingle:     // Intentional fall-through.
1317    case k32:         // Intentional fall-trough.
1318    case kReference:
1319      r_src = Check32BitReg(r_src);
1320      scale = 2;
1321      if (r_src.IsFloat()) {
1322        DCHECK(r_src.IsSingle());
1323        opcode = kA64Str3fXD;
1324      } else {
1325        opcode = kA64Str3rXD;
1326      }
1327      break;
1328    case kUnsignedHalf:
1329    case kSignedHalf:
1330      scale = 1;
1331      opcode = kA64Strh3wXF;
1332      break;
1333    case kUnsignedByte:
1334    case kSignedByte:
1335      opcode = kA64Strb3wXd;
1336      break;
1337    default:
1338      LOG(FATAL) << "Bad size: " << size;
1339  }
1340
1341  bool displacement_is_aligned = (displacement & ((1 << scale) - 1)) == 0;
1342  int scaled_disp = displacement >> scale;
1343  if (displacement_is_aligned && scaled_disp >= 0 && scaled_disp < 4096) {
1344    // Can use scaled store.
1345    store = NewLIR3(opcode, r_src.GetReg(), r_base.GetReg(), scaled_disp);
1346  } else if (alt_opcode != kA64Brk1d && IS_SIGNED_IMM9(displacement)) {
1347    // Can use unscaled store.
1348    store = NewLIR3(alt_opcode, r_src.GetReg(), r_base.GetReg(), displacement);
1349  } else {
1350    // Use long sequence.
1351    RegStorage r_scratch = AllocTempWide();
1352    LoadConstantWide(r_scratch, displacement);
1353    store = StoreBaseIndexed(r_base, r_scratch, r_src, 0, size);
1354    FreeTemp(r_scratch);
1355  }
1356
1357  // TODO: In future, may need to differentiate Dalvik & spill accesses.
1358  if (mem_ref_type_ == ResourceMask::kDalvikReg) {
1359    DCHECK(r_base == rs_sp);
1360    AnnotateDalvikRegAccess(store, displacement >> 2, false /* is_load */, r_src.Is64Bit());
1361  }
1362  return store;
1363}
1364
1365LIR* Arm64Mir2Lir::StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src,
1366                                 OpSize size, VolatileKind is_volatile) {
1367  // TODO: This should generate a release store and no barriers.
1368  if (UNLIKELY(is_volatile == kVolatile)) {
1369    // Ensure that prior accesses become visible to other threads first.
1370    GenMemBarrier(kAnyStore);
1371  }
1372
1373  // StoreBaseDisp() will emit correct insn for atomic store on arm64
1374  // assuming r_dest is correctly prepared using RegClassForFieldLoadStore().
1375
1376  LIR* store = StoreBaseDispBody(r_base, displacement, r_src, size);
1377
1378  if (UNLIKELY(is_volatile == kVolatile)) {
1379    // Preserve order with respect to any subsequent volatile loads.
1380    // We need StoreLoad, but that generally requires the most expensive barrier.
1381    GenMemBarrier(kAnyAny);
1382  }
1383
1384  return store;
1385}
1386
1387LIR* Arm64Mir2Lir::StoreRefDisp(RegStorage r_base, int displacement, RegStorage r_src,
1388                                VolatileKind is_volatile) {
1389  return StoreBaseDisp(r_base, displacement, As32BitReg(r_src), kReference, is_volatile);
1390}
1391
1392LIR* Arm64Mir2Lir::OpFpRegCopy(RegStorage r_dest, RegStorage r_src) {
1393  LOG(FATAL) << "Unexpected use of OpFpRegCopy for Arm64";
1394  return NULL;
1395}
1396
1397LIR* Arm64Mir2Lir::OpMem(OpKind op, RegStorage r_base, int disp) {
1398  LOG(FATAL) << "Unexpected use of OpMem for Arm64";
1399  return NULL;
1400}
1401
1402LIR* Arm64Mir2Lir::InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) {
1403  return OpReg(op, r_tgt);
1404}
1405
1406}  // namespace art
1407