1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "codegen_arm64.h"
18
19#include "arm64_lir.h"
20#include "base/logging.h"
21#include "dex/quick/mir_to_lir-inl.h"
22#include "dex/reg_storage_eq.h"
23
24namespace art {
25
26/* This file contains codegen for the A64 ISA. */
27
28int32_t Arm64Mir2Lir::EncodeImmSingle(uint32_t bits) {
29  /*
30   * Valid values will have the form:
31   *
32   *   aBbb.bbbc.defg.h000.0000.0000.0000.0000
33   *
34   * where B = not(b). In other words, if b == 1, then B == 0 and viceversa.
35   */
36
37  // bits[19..0] are cleared.
38  if ((bits & 0x0007ffff) != 0)
39    return -1;
40
41  // bits[29..25] are all set or all cleared.
42  uint32_t b_pattern = (bits >> 16) & 0x3e00;
43  if (b_pattern != 0 && b_pattern != 0x3e00)
44    return -1;
45
46  // bit[30] and bit[29] are opposite.
47  if (((bits ^ (bits << 1)) & 0x40000000) == 0)
48    return -1;
49
50  // bits: aBbb.bbbc.defg.h000.0000.0000.0000.0000
51  // bit7: a000.0000
52  uint32_t bit7 = ((bits >> 31) & 0x1) << 7;
53  // bit6: 0b00.0000
54  uint32_t bit6 = ((bits >> 29) & 0x1) << 6;
55  // bit5_to_0: 00cd.efgh
56  uint32_t bit5_to_0 = (bits >> 19) & 0x3f;
57  return (bit7 | bit6 | bit5_to_0);
58}
59
60int32_t Arm64Mir2Lir::EncodeImmDouble(uint64_t bits) {
61  /*
62   * Valid values will have the form:
63   *
64   *   aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
65   *   0000.0000.0000.0000.0000.0000.0000.0000
66   *
67   * where B = not(b).
68   */
69
70  // bits[47..0] are cleared.
71  if ((bits & UINT64_C(0xffffffffffff)) != 0)
72    return -1;
73
74  // bits[61..54] are all set or all cleared.
75  uint32_t b_pattern = (bits >> 48) & 0x3fc0;
76  if (b_pattern != 0 && b_pattern != 0x3fc0)
77    return -1;
78
79  // bit[62] and bit[61] are opposite.
80  if (((bits ^ (bits << 1)) & UINT64_C(0x4000000000000000)) == 0)
81    return -1;
82
83  // bit7: a000.0000
84  uint32_t bit7 = ((bits >> 63) & 0x1) << 7;
85  // bit6: 0b00.0000
86  uint32_t bit6 = ((bits >> 61) & 0x1) << 6;
87  // bit5_to_0: 00cd.efgh
88  uint32_t bit5_to_0 = (bits >> 48) & 0x3f;
89  return (bit7 | bit6 | bit5_to_0);
90}
91
92size_t Arm64Mir2Lir::GetLoadStoreSize(LIR* lir) {
93  bool opcode_is_wide = IS_WIDE(lir->opcode);
94  A64Opcode opcode = UNWIDE(lir->opcode);
95  DCHECK(!IsPseudoLirOp(opcode));
96  const A64EncodingMap *encoder = &EncodingMap[opcode];
97  uint32_t bits = opcode_is_wide ? encoder->xskeleton : encoder->wskeleton;
98  return (bits >> 30);
99}
100
101size_t Arm64Mir2Lir::GetInstructionOffset(LIR* lir) {
102  size_t offset = lir->operands[2];
103  uint64_t check_flags = GetTargetInstFlags(lir->opcode);
104  DCHECK((check_flags & IS_LOAD) || (check_flags & IS_STORE));
105  if (check_flags & SCALED_OFFSET_X0) {
106    DCHECK(check_flags & IS_TERTIARY_OP);
107    offset = offset * (1 << GetLoadStoreSize(lir));
108  }
109  return offset;
110}
111
112LIR* Arm64Mir2Lir::LoadFPConstantValue(RegStorage r_dest, int32_t value) {
113  DCHECK(r_dest.IsSingle());
114  if (value == 0) {
115    return NewLIR2(kA64Fmov2sw, r_dest.GetReg(), rwzr);
116  } else {
117    int32_t encoded_imm = EncodeImmSingle((uint32_t)value);
118    if (encoded_imm >= 0) {
119      return NewLIR2(kA64Fmov2fI, r_dest.GetReg(), encoded_imm);
120    }
121  }
122
123  LIR* data_target = ScanLiteralPool(literal_list_, value, 0);
124  if (data_target == nullptr) {
125    // Wide, as we need 8B alignment.
126    data_target = AddWideData(&literal_list_, value, 0);
127  }
128
129  ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
130  LIR* load_pc_rel = RawLIR(current_dalvik_offset_, kA64Ldr2fp,
131                            r_dest.GetReg(), 0, 0, 0, 0, data_target);
132  AppendLIR(load_pc_rel);
133  return load_pc_rel;
134}
135
136LIR* Arm64Mir2Lir::LoadFPConstantValueWide(RegStorage r_dest, int64_t value) {
137  DCHECK(r_dest.IsDouble());
138  if (value == 0) {
139    return NewLIR2(kA64Fmov2Sx, r_dest.GetReg(), rxzr);
140  } else {
141    int32_t encoded_imm = EncodeImmDouble(value);
142    if (encoded_imm >= 0) {
143      return NewLIR2(WIDE(kA64Fmov2fI), r_dest.GetReg(), encoded_imm);
144    }
145  }
146
147  // No short form - load from the literal pool.
148  int32_t val_lo = Low32Bits(value);
149  int32_t val_hi = High32Bits(value);
150  LIR* data_target = ScanLiteralPoolWide(literal_list_, val_lo, val_hi);
151  if (data_target == nullptr) {
152    data_target = AddWideData(&literal_list_, val_lo, val_hi);
153  }
154
155  ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
156  LIR* load_pc_rel = RawLIR(current_dalvik_offset_, WIDE(kA64Ldr2fp),
157                            r_dest.GetReg(), 0, 0, 0, 0, data_target);
158  AppendLIR(load_pc_rel);
159  return load_pc_rel;
160}
161
162static int CountLeadingZeros(bool is_wide, uint64_t value) {
163  return (is_wide) ? __builtin_clzll(value) : __builtin_clz((uint32_t)value);
164}
165
166static int CountTrailingZeros(bool is_wide, uint64_t value) {
167  return (is_wide) ? __builtin_ctzll(value) : __builtin_ctz((uint32_t)value);
168}
169
170static int CountSetBits(bool is_wide, uint64_t value) {
171  return ((is_wide) ?
172          __builtin_popcountll(value) : __builtin_popcount((uint32_t)value));
173}
174
175/**
176 * @brief Try encoding an immediate in the form required by logical instructions.
177 *
178 * @param is_wide Whether @p value is a 64-bit (as opposed to 32-bit) value.
179 * @param value An integer to be encoded. This is interpreted as 64-bit if @p is_wide is true and as
180 *   32-bit if @p is_wide is false.
181 * @return A non-negative integer containing the encoded immediate or -1 if the encoding failed.
182 * @note This is the inverse of Arm64Mir2Lir::DecodeLogicalImmediate().
183 */
184int Arm64Mir2Lir::EncodeLogicalImmediate(bool is_wide, uint64_t value) {
185  unsigned n, imm_s, imm_r;
186
187  // Logical immediates are encoded using parameters n, imm_s and imm_r using
188  // the following table:
189  //
190  //  N   imms    immr    size        S             R
191  //  1  ssssss  rrrrrr    64    UInt(ssssss)  UInt(rrrrrr)
192  //  0  0sssss  xrrrrr    32    UInt(sssss)   UInt(rrrrr)
193  //  0  10ssss  xxrrrr    16    UInt(ssss)    UInt(rrrr)
194  //  0  110sss  xxxrrr     8    UInt(sss)     UInt(rrr)
195  //  0  1110ss  xxxxrr     4    UInt(ss)      UInt(rr)
196  //  0  11110s  xxxxxr     2    UInt(s)       UInt(r)
197  // (s bits must not be all set)
198  //
199  // A pattern is constructed of size bits, where the least significant S+1
200  // bits are set. The pattern is rotated right by R, and repeated across a
201  // 32 or 64-bit value, depending on destination register width.
202  //
203  // To test if an arbitary immediate can be encoded using this scheme, an
204  // iterative algorithm is used.
205  //
206
207  // 1. If the value has all set or all clear bits, it can't be encoded.
208  if (value == 0 || value == ~UINT64_C(0) ||
209      (!is_wide && (uint32_t)value == ~UINT32_C(0))) {
210    return -1;
211  }
212
213  unsigned lead_zero  = CountLeadingZeros(is_wide, value);
214  unsigned lead_one   = CountLeadingZeros(is_wide, ~value);
215  unsigned trail_zero = CountTrailingZeros(is_wide, value);
216  unsigned trail_one  = CountTrailingZeros(is_wide, ~value);
217  unsigned set_bits   = CountSetBits(is_wide, value);
218
219  // The fixed bits in the immediate s field.
220  // If width == 64 (X reg), start at 0xFFFFFF80.
221  // If width == 32 (W reg), start at 0xFFFFFFC0, as the iteration for 64-bit
222  // widths won't be executed.
223  unsigned width = (is_wide) ? 64 : 32;
224  int imm_s_fixed = (is_wide) ? -128 : -64;
225  int imm_s_mask = 0x3f;
226
227  for (;;) {
228    // 2. If the value is two bits wide, it can be encoded.
229    if (width == 2) {
230      n = 0;
231      imm_s = 0x3C;
232      imm_r = (value & 3) - 1;
233      break;
234    }
235
236    n = (width == 64) ? 1 : 0;
237    imm_s = ((imm_s_fixed | (set_bits - 1)) & imm_s_mask);
238    if ((lead_zero + set_bits) == width) {
239      imm_r = 0;
240    } else {
241      imm_r = (lead_zero > 0) ? (width - trail_zero) : lead_one;
242    }
243
244    // 3. If the sum of leading zeros, trailing zeros and set bits is
245    //    equal to the bit width of the value, it can be encoded.
246    if (lead_zero + trail_zero + set_bits == width) {
247      break;
248    }
249
250    // 4. If the sum of leading ones, trailing ones and unset bits in the
251    //    value is equal to the bit width of the value, it can be encoded.
252    if (lead_one + trail_one + (width - set_bits) == width) {
253      break;
254    }
255
256    // 5. If the most-significant half of the bitwise value is equal to
257    //    the least-significant half, return to step 2 using the
258    //    least-significant half of the value.
259    uint64_t mask = (UINT64_C(1) << (width >> 1)) - 1;
260    if ((value & mask) == ((value >> (width >> 1)) & mask)) {
261      width >>= 1;
262      set_bits >>= 1;
263      imm_s_fixed >>= 1;
264      continue;
265    }
266
267    // 6. Otherwise, the value can't be encoded.
268    return -1;
269  }
270
271  return (n << 12 | imm_r << 6 | imm_s);
272}
273
274// Maximum number of instructions to use for encoding the immediate.
275static const int max_num_ops_per_const_load = 2;
276
277/**
278 * @brief Return the number of fast halfwords in the given uint64_t integer.
279 * @details The input integer is split into 4 halfwords (bits 0-15, 16-31, 32-47, 48-63). The
280 *   number of fast halfwords (halfwords that are either 0 or 0xffff) is returned. See below for
281 *   a more accurate description.
282 * @param value The input 64-bit integer.
283 * @return Return @c retval such that (retval & 0x7) is the maximum between n and m, where n is
284 *   the number of halfwords with all bits unset (0) and m is the number of halfwords with all bits
285 *   set (0xffff). Additionally (retval & 0x8) is set when m > n.
286 */
287static int GetNumFastHalfWords(uint64_t value) {
288  unsigned int num_0000_halfwords = 0;
289  unsigned int num_ffff_halfwords = 0;
290  for (int shift = 0; shift < 64; shift += 16) {
291    uint16_t halfword = static_cast<uint16_t>(value >> shift);
292    if (halfword == 0)
293      num_0000_halfwords++;
294    else if (halfword == UINT16_C(0xffff))
295      num_ffff_halfwords++;
296  }
297  if (num_0000_halfwords >= num_ffff_halfwords) {
298    DCHECK_LE(num_0000_halfwords, 4U);
299    return num_0000_halfwords;
300  } else {
301    DCHECK_LE(num_ffff_halfwords, 4U);
302    return num_ffff_halfwords | 0x8;
303  }
304}
305
306// The InexpensiveConstantXXX variants below are used in the promotion algorithm to determine how a
307// constant is considered for promotion. If the constant is "inexpensive" then the promotion
308// algorithm will give it a low priority for promotion, even when it is referenced many times in
309// the code.
310
311bool Arm64Mir2Lir::InexpensiveConstantInt(int32_t value ATTRIBUTE_UNUSED) {
312  // A 32-bit int can always be loaded with 2 instructions (and without using the literal pool).
313  // We therefore return true and give it a low priority for promotion.
314  return true;
315}
316
317bool Arm64Mir2Lir::InexpensiveConstantFloat(int32_t value) {
318  return EncodeImmSingle(value) >= 0;
319}
320
321bool Arm64Mir2Lir::InexpensiveConstantLong(int64_t value) {
322  int num_slow_halfwords = 4 - (GetNumFastHalfWords(value) & 0x7);
323  if (num_slow_halfwords <= max_num_ops_per_const_load) {
324    return true;
325  }
326  return (EncodeLogicalImmediate(/*is_wide=*/true, value) >= 0);
327}
328
329bool Arm64Mir2Lir::InexpensiveConstantDouble(int64_t value) {
330  return EncodeImmDouble(value) >= 0;
331}
332
333// The InexpensiveConstantXXX variants below are used to determine which A64 instructions to use
334// when one of the operands is an immediate (e.g. register version or immediate version of add).
335
336bool Arm64Mir2Lir::InexpensiveConstantInt(int32_t value, Instruction::Code opcode) {
337  switch (opcode) {
338  case Instruction::IF_EQ:
339  case Instruction::IF_NE:
340  case Instruction::IF_LT:
341  case Instruction::IF_GE:
342  case Instruction::IF_GT:
343  case Instruction::IF_LE:
344  case Instruction::ADD_INT:
345  case Instruction::ADD_INT_2ADDR:
346  case Instruction::SUB_INT:
347  case Instruction::SUB_INT_2ADDR:
348    // The code below is consistent with the implementation of OpRegRegImm().
349    {
350      uint32_t abs_value = (value == INT_MIN) ? value : std::abs(value);
351      if (abs_value < 0x1000) {
352        return true;
353      } else if ((abs_value & UINT64_C(0xfff)) == 0 && ((abs_value >> 12) < 0x1000)) {
354        return true;
355      }
356      return false;
357    }
358  case Instruction::SHL_INT:
359  case Instruction::SHL_INT_2ADDR:
360  case Instruction::SHR_INT:
361  case Instruction::SHR_INT_2ADDR:
362  case Instruction::USHR_INT:
363  case Instruction::USHR_INT_2ADDR:
364    return true;
365  case Instruction::AND_INT:
366  case Instruction::AND_INT_2ADDR:
367  case Instruction::AND_INT_LIT16:
368  case Instruction::AND_INT_LIT8:
369  case Instruction::OR_INT:
370  case Instruction::OR_INT_2ADDR:
371  case Instruction::OR_INT_LIT16:
372  case Instruction::OR_INT_LIT8:
373  case Instruction::XOR_INT:
374  case Instruction::XOR_INT_2ADDR:
375  case Instruction::XOR_INT_LIT16:
376  case Instruction::XOR_INT_LIT8:
377    if (value == 0 || value == INT32_C(-1)) {
378      return true;
379    }
380    return (EncodeLogicalImmediate(/*is_wide=*/false, value) >= 0);
381  default:
382    return false;
383  }
384}
385
386/*
387 * Load a immediate using one single instruction when possible; otherwise
388 * use a pair of movz and movk instructions.
389 *
390 * No additional register clobbering operation performed. Use this version when
391 * 1) r_dest is freshly returned from AllocTemp or
392 * 2) The codegen is under fixed register usage
393 */
394LIR* Arm64Mir2Lir::LoadConstantNoClobber(RegStorage r_dest, int value) {
395  LIR* res;
396
397  if (r_dest.IsFloat()) {
398    return LoadFPConstantValue(r_dest, value);
399  }
400
401  if (r_dest.Is64Bit()) {
402    return LoadConstantWide(r_dest, value);
403  }
404
405  // Loading SP/ZR with an immediate is not supported.
406  DCHECK(!A64_REG_IS_SP(r_dest.GetReg()));
407  DCHECK(!A64_REG_IS_ZR(r_dest.GetReg()));
408
409  // Compute how many movk, movz instructions are needed to load the value.
410  uint16_t high_bits = High16Bits(value);
411  uint16_t low_bits = Low16Bits(value);
412
413  bool low_fast = ((uint16_t)(low_bits + 1) <= 1);
414  bool high_fast = ((uint16_t)(high_bits + 1) <= 1);
415
416  if (LIKELY(low_fast || high_fast)) {
417    // 1 instruction is enough to load the immediate.
418    if (LIKELY(low_bits == high_bits)) {
419      // Value is either 0 or -1: we can just use wzr.
420      A64Opcode opcode = LIKELY(low_bits == 0) ? kA64Mov2rr : kA64Mvn2rr;
421      res = NewLIR2(opcode, r_dest.GetReg(), rwzr);
422    } else {
423      uint16_t uniform_bits, useful_bits;
424      int shift;
425
426      if (LIKELY(high_fast)) {
427        shift = 0;
428        uniform_bits = high_bits;
429        useful_bits = low_bits;
430      } else {
431        shift = 1;
432        uniform_bits = low_bits;
433        useful_bits = high_bits;
434      }
435
436      if (UNLIKELY(uniform_bits != 0)) {
437        res = NewLIR3(kA64Movn3rdM, r_dest.GetReg(), ~useful_bits, shift);
438      } else {
439        res = NewLIR3(kA64Movz3rdM, r_dest.GetReg(), useful_bits, shift);
440      }
441    }
442  } else {
443    // movk, movz require 2 instructions. Try detecting logical immediates.
444    int log_imm = EncodeLogicalImmediate(/*is_wide=*/false, value);
445    if (log_imm >= 0) {
446      res = NewLIR3(kA64Orr3Rrl, r_dest.GetReg(), rwzr, log_imm);
447    } else {
448      // Use 2 instructions.
449      res = NewLIR3(kA64Movz3rdM, r_dest.GetReg(), low_bits, 0);
450      NewLIR3(kA64Movk3rdM, r_dest.GetReg(), high_bits, 1);
451    }
452  }
453
454  return res;
455}
456
457// TODO: clean up the names. LoadConstantWide() should really be LoadConstantNoClobberWide().
458LIR* Arm64Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) {
459  if (r_dest.IsFloat()) {
460    return LoadFPConstantValueWide(r_dest, value);
461  }
462
463  DCHECK(r_dest.Is64Bit());
464
465  // Loading SP/ZR with an immediate is not supported.
466  DCHECK(!A64_REG_IS_SP(r_dest.GetReg()));
467  DCHECK(!A64_REG_IS_ZR(r_dest.GetReg()));
468
469  if (LIKELY(value == INT64_C(0) || value == INT64_C(-1))) {
470    // value is either 0 or -1: we can just use xzr.
471    A64Opcode opcode = LIKELY(value == 0) ? WIDE(kA64Mov2rr) : WIDE(kA64Mvn2rr);
472    return NewLIR2(opcode, r_dest.GetReg(), rxzr);
473  }
474
475  // At least one in value's halfwords is not 0x0, nor 0xffff: find out how many.
476  uint64_t uvalue = static_cast<uint64_t>(value);
477  int num_fast_halfwords = GetNumFastHalfWords(uvalue);
478  int num_slow_halfwords = 4 - (num_fast_halfwords & 0x7);
479  bool more_ffff_halfwords = (num_fast_halfwords & 0x8) != 0;
480
481  if (num_slow_halfwords > 1) {
482    // A single movz/movn is not enough. Try the logical immediate route.
483    int log_imm = EncodeLogicalImmediate(/*is_wide=*/true, value);
484    if (log_imm >= 0) {
485      return NewLIR3(WIDE(kA64Orr3Rrl), r_dest.GetReg(), rxzr, log_imm);
486    }
487  }
488
489  if (num_slow_halfwords <= max_num_ops_per_const_load) {
490    // We can encode the number using a movz/movn followed by one or more movk.
491    A64Opcode op;
492    uint16_t background;
493    LIR* res = nullptr;
494
495    // Decide whether to use a movz or a movn.
496    if (more_ffff_halfwords) {
497      op = WIDE(kA64Movn3rdM);
498      background = 0xffff;
499    } else {
500      op = WIDE(kA64Movz3rdM);
501      background = 0;
502    }
503
504    // Emit the first instruction (movz, movn).
505    int shift;
506    for (shift = 0; shift < 4; shift++) {
507      uint16_t halfword = static_cast<uint16_t>(uvalue >> (shift << 4));
508      if (halfword != background) {
509        res = NewLIR3(op, r_dest.GetReg(), halfword ^ background, shift);
510        break;
511      }
512    }
513
514    // Emit the movk instructions.
515    for (shift++; shift < 4; shift++) {
516      uint16_t halfword = static_cast<uint16_t>(uvalue >> (shift << 4));
517      if (halfword != background) {
518        NewLIR3(WIDE(kA64Movk3rdM), r_dest.GetReg(), halfword, shift);
519      }
520    }
521    return res;
522  }
523
524  // Use the literal pool.
525  int32_t val_lo = Low32Bits(value);
526  int32_t val_hi = High32Bits(value);
527  LIR* data_target = ScanLiteralPoolWide(literal_list_, val_lo, val_hi);
528  if (data_target == nullptr) {
529    data_target = AddWideData(&literal_list_, val_lo, val_hi);
530  }
531
532  ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
533  LIR *res = RawLIR(current_dalvik_offset_, WIDE(kA64Ldr2rp),
534                    r_dest.GetReg(), 0, 0, 0, 0, data_target);
535  AppendLIR(res);
536  return res;
537}
538
539LIR* Arm64Mir2Lir::OpUnconditionalBranch(LIR* target) {
540  LIR* res = NewLIR1(kA64B1t, 0 /* offset to be patched  during assembly */);
541  res->target = target;
542  return res;
543}
544
545LIR* Arm64Mir2Lir::OpCondBranch(ConditionCode cc, LIR* target) {
546  LIR* branch = NewLIR2(kA64B2ct, ArmConditionEncoding(cc),
547                        0 /* offset to be patched */);
548  branch->target = target;
549  return branch;
550}
551
552LIR* Arm64Mir2Lir::OpReg(OpKind op, RegStorage r_dest_src) {
553  A64Opcode opcode = kA64Brk1d;
554  switch (op) {
555    case kOpBlx:
556      opcode = kA64Blr1x;
557      break;
558    default:
559      LOG(FATAL) << "Bad opcode " << op;
560  }
561  return NewLIR1(opcode, r_dest_src.GetReg());
562}
563
564LIR* Arm64Mir2Lir::OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, int shift) {
565  A64Opcode wide = (r_dest_src1.Is64Bit()) ? WIDE(0) : UNWIDE(0);
566  CHECK_EQ(r_dest_src1.Is64Bit(), r_src2.Is64Bit());
567  A64Opcode opcode = kA64Brk1d;
568
569  switch (op) {
570    case kOpCmn:
571      opcode = kA64Cmn3rro;
572      break;
573    case kOpCmp:
574      opcode = kA64Cmp3rro;
575      break;
576    case kOpMov:
577      opcode = kA64Mov2rr;
578      break;
579    case kOpMvn:
580      opcode = kA64Mvn2rr;
581      break;
582    case kOpNeg:
583      opcode = kA64Neg3rro;
584      break;
585    case kOpTst:
586      opcode = kA64Tst3rro;
587      break;
588    case kOpRev:
589      DCHECK_EQ(shift, 0);
590      // Binary, but rm is encoded twice.
591      return NewLIR2(kA64Rev2rr | wide, r_dest_src1.GetReg(), r_src2.GetReg());
592    case kOpRevsh:
593      // Binary, but rm is encoded twice.
594      NewLIR2(kA64Rev162rr | wide, r_dest_src1.GetReg(), r_src2.GetReg());
595      // "sxth r1, r2" is "sbfm r1, r2, #0, #15"
596      return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1.GetReg(), r_dest_src1.GetReg(), 0, 15);
597    case kOp2Byte:
598      DCHECK_EQ(shift, ENCODE_NO_SHIFT);
599      // "sbfx r1, r2, #imm1, #imm2" is "sbfm r1, r2, #imm1, #(imm1 + imm2 - 1)".
600      // For now we use sbfm directly.
601      return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 7);
602    case kOp2Short:
603      DCHECK_EQ(shift, ENCODE_NO_SHIFT);
604      // For now we use sbfm rather than its alias, sbfx.
605      return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 15);
606    case kOp2Char:
607      // "ubfx r1, r2, #imm1, #imm2" is "ubfm r1, r2, #imm1, #(imm1 + imm2 - 1)".
608      // For now we use ubfm directly.
609      DCHECK_EQ(shift, ENCODE_NO_SHIFT);
610      return NewLIR4(kA64Ubfm4rrdd | wide, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 15);
611    default:
612      return OpRegRegRegShift(op, r_dest_src1, r_dest_src1, r_src2, shift);
613  }
614
615  DCHECK(!IsPseudoLirOp(opcode));
616  if (EncodingMap[opcode].flags & IS_BINARY_OP) {
617    DCHECK_EQ(shift, ENCODE_NO_SHIFT);
618    return NewLIR2(opcode | wide, r_dest_src1.GetReg(), r_src2.GetReg());
619  } else if (EncodingMap[opcode].flags & IS_TERTIARY_OP) {
620    A64EncodingKind kind = EncodingMap[opcode].field_loc[2].kind;
621    if (kind == kFmtShift) {
622      return NewLIR3(opcode | wide, r_dest_src1.GetReg(), r_src2.GetReg(), shift);
623    }
624  }
625
626  LOG(FATAL) << "Unexpected encoding operand count";
627  return nullptr;
628}
629
630LIR* Arm64Mir2Lir::OpRegRegExtend(OpKind op, RegStorage r_dest_src1, RegStorage r_src2,
631                                  A64RegExtEncodings ext, uint8_t amount) {
632  A64Opcode wide = (r_dest_src1.Is64Bit()) ? WIDE(0) : UNWIDE(0);
633  A64Opcode opcode = kA64Brk1d;
634
635  switch (op) {
636    case kOpCmn:
637      opcode = kA64Cmn3Rre;
638      break;
639    case kOpCmp:
640      opcode = kA64Cmp3Rre;
641      break;
642    case kOpAdd:
643      // Note: intentional fallthrough
644    case kOpSub:
645      return OpRegRegRegExtend(op, r_dest_src1, r_dest_src1, r_src2, ext, amount);
646    default:
647      LOG(FATAL) << "Bad Opcode: " << opcode;
648      UNREACHABLE();
649  }
650
651  DCHECK(!IsPseudoLirOp(opcode));
652  if (EncodingMap[opcode].flags & IS_TERTIARY_OP) {
653    A64EncodingKind kind = EncodingMap[opcode].field_loc[2].kind;
654    if (kind == kFmtExtend) {
655      return NewLIR3(opcode | wide, r_dest_src1.GetReg(), r_src2.GetReg(),
656                     EncodeExtend(ext, amount));
657    }
658  }
659
660  LOG(FATAL) << "Unexpected encoding operand count";
661  return nullptr;
662}
663
664LIR* Arm64Mir2Lir::OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2) {
665  /* RegReg operations with SP in first parameter need extended register instruction form.
666   * Only CMN, CMP, ADD & SUB instructions are implemented.
667   */
668  if (r_dest_src1 == rs_sp) {
669    return OpRegRegExtend(op, r_dest_src1, r_src2, kA64Uxtx, 0);
670  } else {
671    return OpRegRegShift(op, r_dest_src1, r_src2, ENCODE_NO_SHIFT);
672  }
673}
674
675LIR* Arm64Mir2Lir::OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset,
676                               MoveType move_type) {
677  UNUSED(r_dest, r_base, offset, move_type);
678  UNIMPLEMENTED(FATAL);
679  UNREACHABLE();
680}
681
682LIR* Arm64Mir2Lir::OpMovMemReg(RegStorage r_base, int offset, RegStorage r_src,
683                               MoveType move_type) {
684  UNUSED(r_base, offset, r_src, move_type);
685  UNIMPLEMENTED(FATAL);
686  return nullptr;
687}
688
689LIR* Arm64Mir2Lir::OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest, RegStorage r_src) {
690  UNUSED(op, cc, r_dest, r_src);
691  LOG(FATAL) << "Unexpected use of OpCondRegReg for Arm64";
692  UNREACHABLE();
693}
694
695LIR* Arm64Mir2Lir::OpRegRegRegShift(OpKind op, RegStorage r_dest, RegStorage r_src1,
696                                    RegStorage r_src2, int shift) {
697  A64Opcode opcode = kA64Brk1d;
698
699  switch (op) {
700    case kOpAdd:
701      opcode = kA64Add4rrro;
702      break;
703    case kOpSub:
704      opcode = kA64Sub4rrro;
705      break;
706    // case kOpRsub:
707    //   opcode = kA64RsubWWW;
708    //   break;
709    case kOpAdc:
710      opcode = kA64Adc3rrr;
711      break;
712    case kOpAnd:
713      opcode = kA64And4rrro;
714      break;
715    case kOpXor:
716      opcode = kA64Eor4rrro;
717      break;
718    case kOpMul:
719      opcode = kA64Mul3rrr;
720      break;
721    case kOpDiv:
722      opcode = kA64Sdiv3rrr;
723      break;
724    case kOpOr:
725      opcode = kA64Orr4rrro;
726      break;
727    case kOpSbc:
728      opcode = kA64Sbc3rrr;
729      break;
730    case kOpLsl:
731      opcode = kA64Lsl3rrr;
732      break;
733    case kOpLsr:
734      opcode = kA64Lsr3rrr;
735      break;
736    case kOpAsr:
737      opcode = kA64Asr3rrr;
738      break;
739    case kOpRor:
740      opcode = kA64Ror3rrr;
741      break;
742    default:
743      LOG(FATAL) << "Bad opcode: " << op;
744      break;
745  }
746
747  // The instructions above belong to two kinds:
748  // - 4-operands instructions, where the last operand is a shift/extend immediate,
749  // - 3-operands instructions with no shift/extend.
750  A64Opcode widened_opcode = r_dest.Is64Bit() ? WIDE(opcode) : opcode;
751  CHECK_EQ(r_dest.Is64Bit(), r_src1.Is64Bit());
752  CHECK_EQ(r_dest.Is64Bit(), r_src2.Is64Bit());
753  if (EncodingMap[opcode].flags & IS_QUAD_OP) {
754    DCHECK(!IsExtendEncoding(shift));
755    return NewLIR4(widened_opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg(), shift);
756  } else {
757    DCHECK(EncodingMap[opcode].flags & IS_TERTIARY_OP);
758    DCHECK_EQ(shift, ENCODE_NO_SHIFT);
759    return NewLIR3(widened_opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg());
760  }
761}
762
763LIR* Arm64Mir2Lir::OpRegRegRegExtend(OpKind op, RegStorage r_dest, RegStorage r_src1,
764                                     RegStorage r_src2, A64RegExtEncodings ext, uint8_t amount) {
765  A64Opcode opcode = kA64Brk1d;
766
767  switch (op) {
768    case kOpAdd:
769      opcode = kA64Add4RRre;
770      break;
771    case kOpSub:
772      opcode = kA64Sub4RRre;
773      break;
774    default:
775      UNIMPLEMENTED(FATAL) << "Unimplemented opcode: " << op;
776      UNREACHABLE();
777  }
778  A64Opcode widened_opcode = r_dest.Is64Bit() ? WIDE(opcode) : opcode;
779
780  if (r_dest.Is64Bit()) {
781    CHECK(r_src1.Is64Bit());
782
783    // dest determines whether the op is wide or not. Up-convert src2 when necessary.
784    // Note: this is not according to aarch64 specifications, but our encoding.
785    if (!r_src2.Is64Bit()) {
786      r_src2 = As64BitReg(r_src2);
787    }
788  } else {
789    CHECK(!r_src1.Is64Bit());
790    CHECK(!r_src2.Is64Bit());
791  }
792
793  // Sanity checks.
794  //    1) Amount is in the range 0..4
795  CHECK_LE(amount, 4);
796
797  return NewLIR4(widened_opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg(),
798                 EncodeExtend(ext, amount));
799}
800
801LIR* Arm64Mir2Lir::OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2) {
802  return OpRegRegRegShift(op, r_dest, r_src1, r_src2, ENCODE_NO_SHIFT);
803}
804
805LIR* Arm64Mir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value) {
806  return OpRegRegImm64(op, r_dest, r_src1, static_cast<int64_t>(value));
807}
808
809LIR* Arm64Mir2Lir::OpRegRegImm64(OpKind op, RegStorage r_dest, RegStorage r_src1, int64_t value) {
810  LIR* res;
811  bool neg = (value < 0);
812  uint64_t abs_value = (neg & !(value == LLONG_MIN)) ? -value : value;
813  A64Opcode opcode = kA64Brk1d;
814  A64Opcode alt_opcode = kA64Brk1d;
815  bool is_logical = false;
816  bool is_wide = r_dest.Is64Bit();
817  A64Opcode wide = (is_wide) ? WIDE(0) : UNWIDE(0);
818  int info = 0;
819
820  switch (op) {
821    case kOpLsl: {
822      // "lsl w1, w2, #imm" is an alias of "ubfm w1, w2, #(-imm MOD 32), #(31-imm)"
823      // and "lsl x1, x2, #imm" of "ubfm x1, x2, #(-imm MOD 64), #(63-imm)".
824      // For now, we just use ubfm directly.
825      int max_value = (is_wide) ? 63 : 31;
826      return NewLIR4(kA64Ubfm4rrdd | wide, r_dest.GetReg(), r_src1.GetReg(),
827                     (-value) & max_value, max_value - value);
828    }
829    case kOpLsr:
830      return NewLIR3(kA64Lsr3rrd | wide, r_dest.GetReg(), r_src1.GetReg(), value);
831    case kOpAsr:
832      return NewLIR3(kA64Asr3rrd | wide, r_dest.GetReg(), r_src1.GetReg(), value);
833    case kOpRor:
834      // "ror r1, r2, #imm" is an alias of "extr r1, r2, r2, #imm".
835      // For now, we just use extr directly.
836      return NewLIR4(kA64Extr4rrrd | wide, r_dest.GetReg(), r_src1.GetReg(), r_src1.GetReg(),
837                     value);
838    case kOpAdd:
839      neg = !neg;
840      FALLTHROUGH_INTENDED;
841    case kOpSub:
842      // Add and sub below read/write sp rather than xzr.
843      if (abs_value < 0x1000) {
844        opcode = (neg) ? kA64Add4RRdT : kA64Sub4RRdT;
845        return NewLIR4(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), abs_value, 0);
846      } else if ((abs_value & UINT64_C(0xfff)) == 0 && ((abs_value >> 12) < 0x1000)) {
847        opcode = (neg) ? kA64Add4RRdT : kA64Sub4RRdT;
848        return NewLIR4(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), abs_value >> 12, 1);
849      } else {
850        alt_opcode = (op == kOpAdd) ? kA64Add4RRre : kA64Sub4RRre;
851        info = EncodeExtend(is_wide ? kA64Uxtx : kA64Uxtw, 0);
852      }
853      break;
854    case kOpAdc:
855      alt_opcode = kA64Adc3rrr;
856      break;
857    case kOpSbc:
858      alt_opcode = kA64Sbc3rrr;
859      break;
860    case kOpOr:
861      is_logical = true;
862      opcode = kA64Orr3Rrl;
863      alt_opcode = kA64Orr4rrro;
864      break;
865    case kOpAnd:
866      is_logical = true;
867      opcode = kA64And3Rrl;
868      alt_opcode = kA64And4rrro;
869      break;
870    case kOpXor:
871      is_logical = true;
872      opcode = kA64Eor3Rrl;
873      alt_opcode = kA64Eor4rrro;
874      break;
875    case kOpMul:
876      // TUNING: power of 2, shift & add
877      alt_opcode = kA64Mul3rrr;
878      break;
879    default:
880      LOG(FATAL) << "Bad opcode: " << op;
881  }
882
883  if (is_logical) {
884    int log_imm = EncodeLogicalImmediate(is_wide, value);
885    if (log_imm >= 0) {
886      return NewLIR3(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), log_imm);
887    } else {
888      // When the immediate is either 0 or ~0, the logical operation can be trivially reduced
889      // to a - possibly negated - assignment.
890      if (value == 0) {
891        switch (op) {
892          case kOpOr:
893          case kOpXor:
894            // Or/Xor by zero reduces to an assignment.
895            return NewLIR2(kA64Mov2rr | wide, r_dest.GetReg(), r_src1.GetReg());
896          default:
897            // And by zero reduces to a `mov rdest, xzr'.
898            DCHECK(op == kOpAnd);
899            return NewLIR2(kA64Mov2rr | wide, r_dest.GetReg(), (is_wide) ? rxzr : rwzr);
900        }
901      } else if (value == INT64_C(-1)
902                 || (!is_wide && static_cast<uint32_t>(value) == ~UINT32_C(0))) {
903        switch (op) {
904          case kOpAnd:
905            // And by -1 reduces to an assignment.
906            return NewLIR2(kA64Mov2rr | wide, r_dest.GetReg(), r_src1.GetReg());
907          case kOpXor:
908            // Xor by -1 reduces to an `mvn rdest, rsrc'.
909            return NewLIR2(kA64Mvn2rr | wide, r_dest.GetReg(), r_src1.GetReg());
910          default:
911            // Or by -1 reduces to a `mvn rdest, xzr'.
912            DCHECK(op == kOpOr);
913            return NewLIR2(kA64Mvn2rr | wide, r_dest.GetReg(), (is_wide) ? rxzr : rwzr);
914        }
915      }
916    }
917  }
918
919  RegStorage r_scratch;
920  if (is_wide) {
921    r_scratch = AllocTempWide();
922    LoadConstantWide(r_scratch, value);
923  } else {
924    r_scratch = AllocTemp();
925    LoadConstant(r_scratch, value);
926  }
927  if (EncodingMap[alt_opcode].flags & IS_QUAD_OP)
928    res = NewLIR4(alt_opcode | wide, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg(), info);
929  else
930    res = NewLIR3(alt_opcode | wide, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg());
931  FreeTemp(r_scratch);
932  return res;
933}
934
935LIR* Arm64Mir2Lir::OpRegImm(OpKind op, RegStorage r_dest_src1, int value) {
936  return OpRegImm64(op, r_dest_src1, static_cast<int64_t>(value));
937}
938
939LIR* Arm64Mir2Lir::OpRegImm64(OpKind op, RegStorage r_dest_src1, int64_t value) {
940  A64Opcode wide = (r_dest_src1.Is64Bit()) ? WIDE(0) : UNWIDE(0);
941  A64Opcode opcode = kA64Brk1d;
942  A64Opcode neg_opcode = kA64Brk1d;
943  bool shift;
944  bool neg = (value < 0);
945  uint64_t abs_value = (neg & !(value == LLONG_MIN)) ? -value : value;
946
947  if (LIKELY(abs_value < 0x1000)) {
948    // abs_value is a 12-bit immediate.
949    shift = false;
950  } else if ((abs_value & UINT64_C(0xfff)) == 0 && ((abs_value >> 12) < 0x1000)) {
951    // abs_value is a shifted 12-bit immediate.
952    shift = true;
953    abs_value >>= 12;
954  } else if (LIKELY(abs_value < 0x1000000 && (op == kOpAdd || op == kOpSub))) {
955    // Note: It is better to use two ADD/SUB instead of loading a number to a temp register.
956    // This works for both normal registers and SP.
957    // For a frame size == 0x2468, it will be encoded as:
958    //   sub sp, #0x2000
959    //   sub sp, #0x468
960    if (neg) {
961      op = (op == kOpAdd) ? kOpSub : kOpAdd;
962    }
963    OpRegImm64(op, r_dest_src1, abs_value & (~INT64_C(0xfff)));
964    return OpRegImm64(op, r_dest_src1, abs_value & 0xfff);
965  } else {
966    RegStorage r_tmp;
967    LIR* res;
968    if (IS_WIDE(wide)) {
969      r_tmp = AllocTempWide();
970      res = LoadConstantWide(r_tmp, value);
971    } else {
972      r_tmp = AllocTemp();
973      res = LoadConstant(r_tmp, value);
974    }
975    OpRegReg(op, r_dest_src1, r_tmp);
976    FreeTemp(r_tmp);
977    return res;
978  }
979
980  switch (op) {
981    case kOpAdd:
982      neg_opcode = kA64Sub4RRdT;
983      opcode = kA64Add4RRdT;
984      break;
985    case kOpSub:
986      neg_opcode = kA64Add4RRdT;
987      opcode = kA64Sub4RRdT;
988      break;
989    case kOpCmp:
990      neg_opcode = kA64Cmn3RdT;
991      opcode = kA64Cmp3RdT;
992      break;
993    default:
994      LOG(FATAL) << "Bad op-kind in OpRegImm: " << op;
995      break;
996  }
997
998  if (UNLIKELY(neg))
999    opcode = neg_opcode;
1000
1001  if (EncodingMap[opcode].flags & IS_QUAD_OP)
1002    return NewLIR4(opcode | wide, r_dest_src1.GetReg(), r_dest_src1.GetReg(), abs_value,
1003                   (shift) ? 1 : 0);
1004  else
1005    return NewLIR3(opcode | wide, r_dest_src1.GetReg(), abs_value, (shift) ? 1 : 0);
1006}
1007
1008int Arm64Mir2Lir::EncodeShift(int shift_type, int amount) {
1009  DCHECK_EQ(shift_type & 0x3, shift_type);
1010  DCHECK_EQ(amount & 0x3f, amount);
1011  return ((shift_type & 0x3) << 7) | (amount & 0x3f);
1012}
1013
1014int Arm64Mir2Lir::EncodeExtend(int extend_type, int amount) {
1015  DCHECK_EQ(extend_type & 0x7, extend_type);
1016  DCHECK_EQ(amount & 0x7, amount);
1017  return  (1 << 6) | ((extend_type & 0x7) << 3) | (amount & 0x7);
1018}
1019
1020bool Arm64Mir2Lir::IsExtendEncoding(int encoded_value) {
1021  return ((1 << 6) & encoded_value) != 0;
1022}
1023
1024LIR* Arm64Mir2Lir::LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest,
1025                                   int scale, OpSize size) {
1026  LIR* load;
1027  int expected_scale = 0;
1028  A64Opcode opcode = kA64Brk1d;
1029  r_base = Check64BitReg(r_base);
1030
1031  // TODO(Arm64): The sign extension of r_index should be carried out by using an extended
1032  //   register offset load (rather than doing the sign extension in a separate instruction).
1033  if (r_index.Is32Bit()) {
1034    // Assemble: ``sxtw xN, wN''.
1035    r_index = As64BitReg(r_index);
1036    NewLIR4(WIDE(kA64Sbfm4rrdd), r_index.GetReg(), r_index.GetReg(), 0, 31);
1037  }
1038
1039  if (r_dest.IsFloat()) {
1040    if (r_dest.IsDouble()) {
1041      DCHECK(size == k64 || size == kDouble);
1042      expected_scale = 3;
1043      opcode = WIDE(kA64Ldr4fXxG);
1044    } else {
1045      DCHECK(r_dest.IsSingle());
1046      DCHECK(size == k32 || size == kSingle);
1047      expected_scale = 2;
1048      opcode = kA64Ldr4fXxG;
1049    }
1050
1051    DCHECK(scale == 0 || scale == expected_scale);
1052    return NewLIR4(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg(),
1053                   (scale != 0) ? 1 : 0);
1054  }
1055
1056  switch (size) {
1057    case kDouble:
1058    case kWord:
1059    case k64:
1060      r_dest = Check64BitReg(r_dest);
1061      opcode = WIDE(kA64Ldr4rXxG);
1062      expected_scale = 3;
1063      break;
1064    case kReference:
1065      r_dest = As32BitReg(r_dest);
1066      FALLTHROUGH_INTENDED;
1067    case kSingle:     // Intentional fall-through.
1068    case k32:
1069      r_dest = Check32BitReg(r_dest);
1070      opcode = kA64Ldr4rXxG;
1071      expected_scale = 2;
1072      break;
1073    case kUnsignedHalf:
1074      r_dest = Check32BitReg(r_dest);
1075      opcode = kA64Ldrh4wXxd;
1076      expected_scale = 1;
1077      break;
1078    case kSignedHalf:
1079      r_dest = Check32BitReg(r_dest);
1080      opcode = kA64Ldrsh4rXxd;
1081      expected_scale = 1;
1082      break;
1083    case kUnsignedByte:
1084      r_dest = Check32BitReg(r_dest);
1085      opcode = kA64Ldrb3wXx;
1086      break;
1087    case kSignedByte:
1088      r_dest = Check32BitReg(r_dest);
1089      opcode = kA64Ldrsb3rXx;
1090      break;
1091    default:
1092      LOG(FATAL) << "Bad size: " << size;
1093  }
1094
1095  if (UNLIKELY(expected_scale == 0)) {
1096    // This is a tertiary op (e.g. ldrb, ldrsb), it does not not support scale.
1097    DCHECK_NE(EncodingMap[UNWIDE(opcode)].flags & IS_TERTIARY_OP, 0U);
1098    DCHECK_EQ(scale, 0);
1099    load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg());
1100  } else {
1101    DCHECK(scale == 0 || scale == expected_scale);
1102    load = NewLIR4(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg(),
1103                   (scale != 0) ? 1 : 0);
1104  }
1105
1106  return load;
1107}
1108
1109LIR* Arm64Mir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src,
1110                                    int scale, OpSize size) {
1111  LIR* store;
1112  int expected_scale = 0;
1113  A64Opcode opcode = kA64Brk1d;
1114  r_base = Check64BitReg(r_base);
1115
1116  // TODO(Arm64): The sign extension of r_index should be carried out by using an extended
1117  //   register offset store (rather than doing the sign extension in a separate instruction).
1118  if (r_index.Is32Bit()) {
1119    // Assemble: ``sxtw xN, wN''.
1120    r_index = As64BitReg(r_index);
1121    NewLIR4(WIDE(kA64Sbfm4rrdd), r_index.GetReg(), r_index.GetReg(), 0, 31);
1122  }
1123
1124  if (r_src.IsFloat()) {
1125    if (r_src.IsDouble()) {
1126      DCHECK(size == k64 || size == kDouble);
1127      expected_scale = 3;
1128      opcode = WIDE(kA64Str4fXxG);
1129    } else {
1130      DCHECK(r_src.IsSingle());
1131      DCHECK(size == k32 || size == kSingle);
1132      expected_scale = 2;
1133      opcode = kA64Str4fXxG;
1134    }
1135
1136    DCHECK(scale == 0 || scale == expected_scale);
1137    return NewLIR4(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg(),
1138                   (scale != 0) ? 1 : 0);
1139  }
1140
1141  switch (size) {
1142    case kDouble:     // Intentional fall-trough.
1143    case kWord:       // Intentional fall-trough.
1144    case k64:
1145      r_src = Check64BitReg(r_src);
1146      opcode = WIDE(kA64Str4rXxG);
1147      expected_scale = 3;
1148      break;
1149    case kReference:
1150      r_src = As32BitReg(r_src);
1151      FALLTHROUGH_INTENDED;
1152    case kSingle:     // Intentional fall-trough.
1153    case k32:
1154      r_src = Check32BitReg(r_src);
1155      opcode = kA64Str4rXxG;
1156      expected_scale = 2;
1157      break;
1158    case kUnsignedHalf:
1159    case kSignedHalf:
1160      r_src = Check32BitReg(r_src);
1161      opcode = kA64Strh4wXxd;
1162      expected_scale = 1;
1163      break;
1164    case kUnsignedByte:
1165    case kSignedByte:
1166      r_src = Check32BitReg(r_src);
1167      opcode = kA64Strb3wXx;
1168      break;
1169    default:
1170      LOG(FATAL) << "Bad size: " << size;
1171  }
1172
1173  if (UNLIKELY(expected_scale == 0)) {
1174    // This is a tertiary op (e.g. strb), it does not not support scale.
1175    DCHECK_NE(EncodingMap[UNWIDE(opcode)].flags & IS_TERTIARY_OP, 0U);
1176    DCHECK_EQ(scale, 0);
1177    store = NewLIR3(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg());
1178  } else {
1179    store = NewLIR4(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg(),
1180                    (scale != 0) ? 1 : 0);
1181  }
1182
1183  return store;
1184}
1185
1186/*
1187 * Load value from base + displacement.  Optionally perform null check
1188 * on base (which must have an associated s_reg and MIR).  If not
1189 * performing null check, incoming MIR can be null.
1190 */
1191LIR* Arm64Mir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest,
1192                                    OpSize size) {
1193  LIR* load = nullptr;
1194  A64Opcode opcode = kA64Brk1d;
1195  A64Opcode alt_opcode = kA64Brk1d;
1196  int scale = 0;
1197
1198  switch (size) {
1199    case kDouble:     // Intentional fall-through.
1200    case kWord:       // Intentional fall-through.
1201    case k64:
1202      r_dest = Check64BitReg(r_dest);
1203      scale = 3;
1204      if (r_dest.IsFloat()) {
1205        DCHECK(r_dest.IsDouble());
1206        opcode = WIDE(kA64Ldr3fXD);
1207        alt_opcode = WIDE(kA64Ldur3fXd);
1208      } else {
1209        opcode = WIDE(kA64Ldr3rXD);
1210        alt_opcode = WIDE(kA64Ldur3rXd);
1211      }
1212      break;
1213    case kReference:
1214      r_dest = As32BitReg(r_dest);
1215      FALLTHROUGH_INTENDED;
1216    case kSingle:     // Intentional fall-through.
1217    case k32:
1218      r_dest = Check32BitReg(r_dest);
1219      scale = 2;
1220      if (r_dest.IsFloat()) {
1221        DCHECK(r_dest.IsSingle());
1222        opcode = kA64Ldr3fXD;
1223      } else {
1224        opcode = kA64Ldr3rXD;
1225      }
1226      break;
1227    case kUnsignedHalf:
1228      scale = 1;
1229      opcode = kA64Ldrh3wXF;
1230      break;
1231    case kSignedHalf:
1232      scale = 1;
1233      opcode = kA64Ldrsh3rXF;
1234      break;
1235    case kUnsignedByte:
1236      opcode = kA64Ldrb3wXd;
1237      break;
1238    case kSignedByte:
1239      opcode = kA64Ldrsb3rXd;
1240      break;
1241    default:
1242      LOG(FATAL) << "Bad size: " << size;
1243  }
1244
1245  bool displacement_is_aligned = (displacement & ((1 << scale) - 1)) == 0;
1246  int scaled_disp = displacement >> scale;
1247  if (displacement_is_aligned && scaled_disp >= 0 && scaled_disp < 4096) {
1248    // Can use scaled load.
1249    load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), scaled_disp);
1250  } else if (alt_opcode != kA64Brk1d && IS_SIGNED_IMM9(displacement)) {
1251    // Can use unscaled load.
1252    load = NewLIR3(alt_opcode, r_dest.GetReg(), r_base.GetReg(), displacement);
1253  } else {
1254    // Use long sequence.
1255    // TODO: cleaner support for index/displacement registers?  Not a reference, but must match width.
1256    RegStorage r_scratch = AllocTempWide();
1257    LoadConstantWide(r_scratch, displacement);
1258    load = LoadBaseIndexed(r_base, r_scratch,
1259                           (size == kReference) ? As64BitReg(r_dest) : r_dest,
1260                           0, size);
1261    FreeTemp(r_scratch);
1262  }
1263
1264  // TODO: in future may need to differentiate Dalvik accesses w/ spills
1265  if (mem_ref_type_ == ResourceMask::kDalvikReg) {
1266    DCHECK_EQ(r_base, rs_sp);
1267    AnnotateDalvikRegAccess(load, displacement >> 2, true /* is_load */, r_dest.Is64Bit());
1268  }
1269  return load;
1270}
1271
1272LIR* Arm64Mir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
1273                                OpSize size, VolatileKind is_volatile) {
1274  // LoadBaseDisp() will emit correct insn for atomic load on arm64
1275  // assuming r_dest is correctly prepared using RegClassForFieldLoadStore().
1276
1277  LIR* load = LoadBaseDispBody(r_base, displacement, r_dest, size);
1278
1279  if (UNLIKELY(is_volatile == kVolatile)) {
1280    // TODO: This should generate an acquire load instead of the barrier.
1281    GenMemBarrier(kLoadAny);
1282  }
1283
1284  return load;
1285}
1286
1287LIR* Arm64Mir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src,
1288                                     OpSize size) {
1289  LIR* store = nullptr;
1290  A64Opcode opcode = kA64Brk1d;
1291  A64Opcode alt_opcode = kA64Brk1d;
1292  int scale = 0;
1293
1294  switch (size) {
1295    case kDouble:     // Intentional fall-through.
1296    case kWord:       // Intentional fall-through.
1297    case k64:
1298      r_src = Check64BitReg(r_src);
1299      scale = 3;
1300      if (r_src.IsFloat()) {
1301        DCHECK(r_src.IsDouble());
1302        opcode = WIDE(kA64Str3fXD);
1303        alt_opcode = WIDE(kA64Stur3fXd);
1304      } else {
1305        opcode = WIDE(kA64Str3rXD);
1306        alt_opcode = WIDE(kA64Stur3rXd);
1307      }
1308      break;
1309    case kReference:
1310      r_src = As32BitReg(r_src);
1311      FALLTHROUGH_INTENDED;
1312    case kSingle:     // Intentional fall-through.
1313    case k32:
1314      r_src = Check32BitReg(r_src);
1315      scale = 2;
1316      if (r_src.IsFloat()) {
1317        DCHECK(r_src.IsSingle());
1318        opcode = kA64Str3fXD;
1319      } else {
1320        opcode = kA64Str3rXD;
1321      }
1322      break;
1323    case kUnsignedHalf:
1324    case kSignedHalf:
1325      scale = 1;
1326      opcode = kA64Strh3wXF;
1327      break;
1328    case kUnsignedByte:
1329    case kSignedByte:
1330      opcode = kA64Strb3wXd;
1331      break;
1332    default:
1333      LOG(FATAL) << "Bad size: " << size;
1334  }
1335
1336  bool displacement_is_aligned = (displacement & ((1 << scale) - 1)) == 0;
1337  int scaled_disp = displacement >> scale;
1338  if (displacement_is_aligned && scaled_disp >= 0 && scaled_disp < 4096) {
1339    // Can use scaled store.
1340    store = NewLIR3(opcode, r_src.GetReg(), r_base.GetReg(), scaled_disp);
1341  } else if (alt_opcode != kA64Brk1d && IS_SIGNED_IMM9(displacement)) {
1342    // Can use unscaled store.
1343    store = NewLIR3(alt_opcode, r_src.GetReg(), r_base.GetReg(), displacement);
1344  } else {
1345    // Use long sequence.
1346    RegStorage r_scratch = AllocTempWide();
1347    LoadConstantWide(r_scratch, displacement);
1348    store = StoreBaseIndexed(r_base, r_scratch,
1349                             (size == kReference) ? As64BitReg(r_src) : r_src,
1350                             0, size);
1351    FreeTemp(r_scratch);
1352  }
1353
1354  // TODO: In future, may need to differentiate Dalvik & spill accesses.
1355  if (mem_ref_type_ == ResourceMask::kDalvikReg) {
1356    DCHECK_EQ(r_base, rs_sp);
1357    AnnotateDalvikRegAccess(store, displacement >> 2, false /* is_load */, r_src.Is64Bit());
1358  }
1359  return store;
1360}
1361
1362LIR* Arm64Mir2Lir::StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src,
1363                                 OpSize size, VolatileKind is_volatile) {
1364  // TODO: This should generate a release store and no barriers.
1365  if (UNLIKELY(is_volatile == kVolatile)) {
1366    // Ensure that prior accesses become visible to other threads first.
1367    GenMemBarrier(kAnyStore);
1368  }
1369
1370  // StoreBaseDisp() will emit correct insn for atomic store on arm64
1371  // assuming r_dest is correctly prepared using RegClassForFieldLoadStore().
1372
1373  LIR* store = StoreBaseDispBody(r_base, displacement, r_src, size);
1374
1375  if (UNLIKELY(is_volatile == kVolatile)) {
1376    // Preserve order with respect to any subsequent volatile loads.
1377    // We need StoreLoad, but that generally requires the most expensive barrier.
1378    GenMemBarrier(kAnyAny);
1379  }
1380
1381  return store;
1382}
1383
1384LIR* Arm64Mir2Lir::OpFpRegCopy(RegStorage r_dest, RegStorage r_src) {
1385  UNUSED(r_dest, r_src);
1386  LOG(FATAL) << "Unexpected use of OpFpRegCopy for Arm64";
1387  UNREACHABLE();
1388}
1389
1390LIR* Arm64Mir2Lir::OpMem(OpKind op, RegStorage r_base, int disp) {
1391  UNUSED(op, r_base, disp);
1392  LOG(FATAL) << "Unexpected use of OpMem for Arm64";
1393  UNREACHABLE();
1394}
1395
1396LIR* Arm64Mir2Lir::InvokeTrampoline(OpKind op, RegStorage r_tgt,
1397                                    QuickEntrypointEnum trampoline ATTRIBUTE_UNUSED) {
1398  // The address of the trampoline is already loaded into r_tgt.
1399  return OpReg(op, r_tgt);
1400}
1401
1402}  // namespace art
1403