1// Copyright 2017, VIXL authors
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are met:
6//
7//   * Redistributions of source code must retain the above copyright notice,
8//     this list of conditions and the following disclaimer.
9//   * Redistributions in binary form must reproduce the above copyright
10//     notice, this list of conditions and the following disclaimer in the
11//     documentation and/or other materials provided with the distribution.
12//   * Neither the name of ARM Limited nor the names of its contributors may
13//     be used to endorse or promote products derived from this software
14//     without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26// POSSIBILITY OF SUCH DAMAGE.
27
28#include "aarch32/macro-assembler-aarch32.h"
29
30#define STRINGIFY(x) #x
31#define TOSTRING(x) STRINGIFY(x)
32
33#define CONTEXT_SCOPE \
34  ContextScope context(this, __FILE__ ":" TOSTRING(__LINE__))
35
36namespace vixl {
37namespace aarch32 {
38
39ExactAssemblyScopeWithoutPoolsCheck::ExactAssemblyScopeWithoutPoolsCheck(
40    MacroAssembler* masm, size_t size, SizePolicy size_policy)
41    : ExactAssemblyScope(masm,
42                         size,
43                         size_policy,
44                         ExactAssemblyScope::kIgnorePools) {}
45
46void UseScratchRegisterScope::Open(MacroAssembler* masm) {
47  VIXL_ASSERT(masm_ == NULL);
48  VIXL_ASSERT(masm != NULL);
49  masm_ = masm;
50
51  old_available_ = masm_->GetScratchRegisterList()->GetList();
52  old_available_vfp_ = masm_->GetScratchVRegisterList()->GetList();
53
54  parent_ = masm->GetCurrentScratchRegisterScope();
55  masm->SetCurrentScratchRegisterScope(this);
56}
57
58
59void UseScratchRegisterScope::Close() {
60  if (masm_ != NULL) {
61    // Ensure that scopes nest perfectly, and do not outlive their parents.
62    // This is a run-time check because the order of destruction of objects in
63    // the _same_ scope is implementation-defined, and is likely to change in
64    // optimised builds.
65    VIXL_CHECK(masm_->GetCurrentScratchRegisterScope() == this);
66    masm_->SetCurrentScratchRegisterScope(parent_);
67
68    masm_->GetScratchRegisterList()->SetList(old_available_);
69    masm_->GetScratchVRegisterList()->SetList(old_available_vfp_);
70
71    masm_ = NULL;
72  }
73}
74
75
76bool UseScratchRegisterScope::IsAvailable(const Register& reg) const {
77  VIXL_ASSERT(masm_ != NULL);
78  VIXL_ASSERT(reg.IsValid());
79  return masm_->GetScratchRegisterList()->Includes(reg);
80}
81
82
83bool UseScratchRegisterScope::IsAvailable(const VRegister& reg) const {
84  VIXL_ASSERT(masm_ != NULL);
85  VIXL_ASSERT(reg.IsValid());
86  return masm_->GetScratchVRegisterList()->IncludesAllOf(reg);
87}
88
89
90Register UseScratchRegisterScope::Acquire() {
91  VIXL_ASSERT(masm_ != NULL);
92  Register reg = masm_->GetScratchRegisterList()->GetFirstAvailableRegister();
93  VIXL_CHECK(reg.IsValid());
94  masm_->GetScratchRegisterList()->Remove(reg);
95  return reg;
96}
97
98
99VRegister UseScratchRegisterScope::AcquireV(unsigned size_in_bits) {
100  switch (size_in_bits) {
101    case kSRegSizeInBits:
102      return AcquireS();
103    case kDRegSizeInBits:
104      return AcquireD();
105    case kQRegSizeInBits:
106      return AcquireQ();
107    default:
108      VIXL_UNREACHABLE();
109      return NoVReg;
110  }
111}
112
113
114QRegister UseScratchRegisterScope::AcquireQ() {
115  VIXL_ASSERT(masm_ != NULL);
116  QRegister reg =
117      masm_->GetScratchVRegisterList()->GetFirstAvailableQRegister();
118  VIXL_CHECK(reg.IsValid());
119  masm_->GetScratchVRegisterList()->Remove(reg);
120  return reg;
121}
122
123
124DRegister UseScratchRegisterScope::AcquireD() {
125  VIXL_ASSERT(masm_ != NULL);
126  DRegister reg =
127      masm_->GetScratchVRegisterList()->GetFirstAvailableDRegister();
128  VIXL_CHECK(reg.IsValid());
129  masm_->GetScratchVRegisterList()->Remove(reg);
130  return reg;
131}
132
133
134SRegister UseScratchRegisterScope::AcquireS() {
135  VIXL_ASSERT(masm_ != NULL);
136  SRegister reg =
137      masm_->GetScratchVRegisterList()->GetFirstAvailableSRegister();
138  VIXL_CHECK(reg.IsValid());
139  masm_->GetScratchVRegisterList()->Remove(reg);
140  return reg;
141}
142
143
144void UseScratchRegisterScope::Release(const Register& reg) {
145  VIXL_ASSERT(masm_ != NULL);
146  VIXL_ASSERT(reg.IsValid());
147  VIXL_ASSERT(!masm_->GetScratchRegisterList()->Includes(reg));
148  masm_->GetScratchRegisterList()->Combine(reg);
149}
150
151
152void UseScratchRegisterScope::Release(const VRegister& reg) {
153  VIXL_ASSERT(masm_ != NULL);
154  VIXL_ASSERT(reg.IsValid());
155  VIXL_ASSERT(!masm_->GetScratchVRegisterList()->IncludesAliasOf(reg));
156  masm_->GetScratchVRegisterList()->Combine(reg);
157}
158
159
160void UseScratchRegisterScope::Include(const RegisterList& list) {
161  VIXL_ASSERT(masm_ != NULL);
162  RegisterList excluded_registers(sp, lr, pc);
163  uint32_t mask = list.GetList() & ~excluded_registers.GetList();
164  RegisterList* available = masm_->GetScratchRegisterList();
165  available->SetList(available->GetList() | mask);
166}
167
168
169void UseScratchRegisterScope::Include(const VRegisterList& list) {
170  VIXL_ASSERT(masm_ != NULL);
171  VRegisterList* available = masm_->GetScratchVRegisterList();
172  available->SetList(available->GetList() | list.GetList());
173}
174
175
176void UseScratchRegisterScope::Exclude(const RegisterList& list) {
177  VIXL_ASSERT(masm_ != NULL);
178  RegisterList* available = masm_->GetScratchRegisterList();
179  available->SetList(available->GetList() & ~list.GetList());
180}
181
182
183void UseScratchRegisterScope::Exclude(const VRegisterList& list) {
184  VIXL_ASSERT(masm_ != NULL);
185  VRegisterList* available = masm_->GetScratchVRegisterList();
186  available->SetList(available->GetList() & ~list.GetList());
187}
188
189
190void UseScratchRegisterScope::Exclude(const Operand& operand) {
191  if (operand.IsImmediateShiftedRegister()) {
192    Exclude(operand.GetBaseRegister());
193  } else if (operand.IsRegisterShiftedRegister()) {
194    Exclude(operand.GetBaseRegister(), operand.GetShiftRegister());
195  } else {
196    VIXL_ASSERT(operand.IsImmediate());
197  }
198}
199
200
201void UseScratchRegisterScope::ExcludeAll() {
202  VIXL_ASSERT(masm_ != NULL);
203  masm_->GetScratchRegisterList()->SetList(0);
204  masm_->GetScratchVRegisterList()->SetList(0);
205}
206
207
208void MacroAssembler::EnsureEmitPoolsFor(size_t size_arg) {
209  // We skip the check when the pools are blocked.
210  if (ArePoolsBlocked()) return;
211
212  VIXL_ASSERT(IsUint32(size_arg));
213  uint32_t size = static_cast<uint32_t>(size_arg);
214
215  if (pool_manager_.MustEmit(GetCursorOffset(), size)) {
216    int32_t new_pc = pool_manager_.Emit(this, GetCursorOffset(), size);
217    VIXL_ASSERT(new_pc == GetCursorOffset());
218    USE(new_pc);
219  }
220}
221
222
223void MacroAssembler::HandleOutOfBoundsImmediate(Condition cond,
224                                                Register tmp,
225                                                uint32_t imm) {
226  if (IsUintN(16, imm)) {
227    CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
228    mov(cond, tmp, imm & 0xffff);
229    return;
230  }
231  if (IsUsingT32()) {
232    if (ImmediateT32::IsImmediateT32(~imm)) {
233      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
234      mvn(cond, tmp, ~imm);
235      return;
236    }
237  } else {
238    if (ImmediateA32::IsImmediateA32(~imm)) {
239      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
240      mvn(cond, tmp, ~imm);
241      return;
242    }
243  }
244  CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
245  mov(cond, tmp, imm & 0xffff);
246  movt(cond, tmp, imm >> 16);
247}
248
249
250MemOperand MacroAssembler::MemOperandComputationHelper(
251    Condition cond,
252    Register scratch,
253    Register base,
254    uint32_t offset,
255    uint32_t extra_offset_mask) {
256  VIXL_ASSERT(!AliasesAvailableScratchRegister(scratch));
257  VIXL_ASSERT(!AliasesAvailableScratchRegister(base));
258  VIXL_ASSERT(allow_macro_instructions_);
259  VIXL_ASSERT(OutsideITBlock());
260
261  // Check for the simple pass-through case.
262  if ((offset & extra_offset_mask) == offset) return MemOperand(base, offset);
263
264  MacroEmissionCheckScope guard(this);
265  ITScope it_scope(this, &cond, guard);
266
267  uint32_t load_store_offset = offset & extra_offset_mask;
268  uint32_t add_offset = offset & ~extra_offset_mask;
269  if ((add_offset != 0) &&
270      (IsModifiedImmediate(offset) || IsModifiedImmediate(-offset))) {
271    load_store_offset = 0;
272    add_offset = offset;
273  }
274
275  if (base.IsPC()) {
276    // Special handling for PC bases. We must read the PC in the first
277    // instruction (and only in that instruction), and we must also take care to
278    // keep the same address calculation as loads and stores. For T32, that
279    // means using something like ADR, which uses AlignDown(PC, 4).
280
281    // We don't handle positive offsets from PC because the intention is not
282    // clear; does the user expect the offset from the current
283    // GetCursorOffset(), or to allow a certain amount of space after the
284    // instruction?
285    VIXL_ASSERT((offset & 0x80000000) != 0);
286    if (IsUsingT32()) {
287      // T32: make the first instruction "SUB (immediate, from PC)" -- an alias
288      // of ADR -- to get behaviour like loads and stores. This ADR can handle
289      // at least as much offset as the load_store_offset so it can replace it.
290
291      uint32_t sub_pc_offset = (-offset) & 0xfff;
292      load_store_offset = (offset + sub_pc_offset) & extra_offset_mask;
293      add_offset = (offset + sub_pc_offset) & ~extra_offset_mask;
294
295      ExactAssemblyScope scope(this, k32BitT32InstructionSizeInBytes);
296      sub(cond, scratch, base, sub_pc_offset);
297
298      if (add_offset == 0) return MemOperand(scratch, load_store_offset);
299
300      // The rest of the offset can be generated in the usual way.
301      base = scratch;
302    }
303    // A32 can use any SUB instruction, so we don't have to do anything special
304    // here except to ensure that we read the PC first.
305  }
306
307  add(cond, scratch, base, add_offset);
308  return MemOperand(scratch, load_store_offset);
309}
310
311
312uint32_t MacroAssembler::GetOffsetMask(InstructionType type,
313                                       AddrMode addrmode) {
314  switch (type) {
315    case kLdr:
316    case kLdrb:
317    case kStr:
318    case kStrb:
319      if (IsUsingA32() || (addrmode == Offset)) {
320        return 0xfff;
321      } else {
322        return 0xff;
323      }
324    case kLdrsb:
325    case kLdrh:
326    case kLdrsh:
327    case kStrh:
328      if (IsUsingT32() && (addrmode == Offset)) {
329        return 0xfff;
330      } else {
331        return 0xff;
332      }
333    case kVldr:
334    case kVstr:
335      return 0x3fc;
336    case kLdrd:
337    case kStrd:
338      if (IsUsingA32()) {
339        return 0xff;
340      } else {
341        return 0x3fc;
342      }
343    default:
344      VIXL_UNREACHABLE();
345      return 0;
346  }
347}
348
349
350HARDFLOAT void PrintfTrampolineRRRR(
351    const char* format, uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
352  printf(format, a, b, c, d);
353}
354
355
356HARDFLOAT void PrintfTrampolineRRRD(
357    const char* format, uint32_t a, uint32_t b, uint32_t c, double d) {
358  printf(format, a, b, c, d);
359}
360
361
362HARDFLOAT void PrintfTrampolineRRDR(
363    const char* format, uint32_t a, uint32_t b, double c, uint32_t d) {
364  printf(format, a, b, c, d);
365}
366
367
368HARDFLOAT void PrintfTrampolineRRDD(
369    const char* format, uint32_t a, uint32_t b, double c, double d) {
370  printf(format, a, b, c, d);
371}
372
373
374HARDFLOAT void PrintfTrampolineRDRR(
375    const char* format, uint32_t a, double b, uint32_t c, uint32_t d) {
376  printf(format, a, b, c, d);
377}
378
379
380HARDFLOAT void PrintfTrampolineRDRD(
381    const char* format, uint32_t a, double b, uint32_t c, double d) {
382  printf(format, a, b, c, d);
383}
384
385
386HARDFLOAT void PrintfTrampolineRDDR(
387    const char* format, uint32_t a, double b, double c, uint32_t d) {
388  printf(format, a, b, c, d);
389}
390
391
392HARDFLOAT void PrintfTrampolineRDDD(
393    const char* format, uint32_t a, double b, double c, double d) {
394  printf(format, a, b, c, d);
395}
396
397
398HARDFLOAT void PrintfTrampolineDRRR(
399    const char* format, double a, uint32_t b, uint32_t c, uint32_t d) {
400  printf(format, a, b, c, d);
401}
402
403
404HARDFLOAT void PrintfTrampolineDRRD(
405    const char* format, double a, uint32_t b, uint32_t c, double d) {
406  printf(format, a, b, c, d);
407}
408
409
410HARDFLOAT void PrintfTrampolineDRDR(
411    const char* format, double a, uint32_t b, double c, uint32_t d) {
412  printf(format, a, b, c, d);
413}
414
415
416HARDFLOAT void PrintfTrampolineDRDD(
417    const char* format, double a, uint32_t b, double c, double d) {
418  printf(format, a, b, c, d);
419}
420
421
422HARDFLOAT void PrintfTrampolineDDRR(
423    const char* format, double a, double b, uint32_t c, uint32_t d) {
424  printf(format, a, b, c, d);
425}
426
427
428HARDFLOAT void PrintfTrampolineDDRD(
429    const char* format, double a, double b, uint32_t c, double d) {
430  printf(format, a, b, c, d);
431}
432
433
434HARDFLOAT void PrintfTrampolineDDDR(
435    const char* format, double a, double b, double c, uint32_t d) {
436  printf(format, a, b, c, d);
437}
438
439
440HARDFLOAT void PrintfTrampolineDDDD(
441    const char* format, double a, double b, double c, double d) {
442  printf(format, a, b, c, d);
443}
444
445
446void MacroAssembler::Printf(const char* format,
447                            CPURegister reg1,
448                            CPURegister reg2,
449                            CPURegister reg3,
450                            CPURegister reg4) {
451  // Exclude all registers from the available scratch registers, so
452  // that we are able to use ip below.
453  // TODO: Refactor this function to use UseScratchRegisterScope
454  // for temporary registers below.
455  UseScratchRegisterScope scratch(this);
456  scratch.ExcludeAll();
457  if (generate_simulator_code_) {
458    PushRegister(reg4);
459    PushRegister(reg3);
460    PushRegister(reg2);
461    PushRegister(reg1);
462    Push(RegisterList(r0, r1));
463    StringLiteral* format_literal =
464        new StringLiteral(format, RawLiteral::kDeletedOnPlacementByPool);
465    Adr(r0, format_literal);
466    uint32_t args = (reg4.GetType() << 12) | (reg3.GetType() << 8) |
467                    (reg2.GetType() << 4) | reg1.GetType();
468    Mov(r1, args);
469    Hvc(kPrintfCode);
470    Pop(RegisterList(r0, r1));
471    int size = reg4.GetRegSizeInBytes() + reg3.GetRegSizeInBytes() +
472               reg2.GetRegSizeInBytes() + reg1.GetRegSizeInBytes();
473    Drop(size);
474  } else {
475    // Generate on a native platform => 32 bit environment.
476    // Preserve core registers r0-r3, r12, r14
477    const uint32_t saved_registers_mask =
478        kCallerSavedRegistersMask | (1 << r5.GetCode());
479    Push(RegisterList(saved_registers_mask));
480    // Push VFP registers.
481    Vpush(Untyped64, DRegisterList(d0, 8));
482    if (Has32DRegs()) Vpush(Untyped64, DRegisterList(d16, 16));
483    // Search one register which has been saved and which doesn't need to be
484    // printed.
485    RegisterList available_registers(kCallerSavedRegistersMask);
486    if (reg1.GetType() == CPURegister::kRRegister) {
487      available_registers.Remove(Register(reg1.GetCode()));
488    }
489    if (reg2.GetType() == CPURegister::kRRegister) {
490      available_registers.Remove(Register(reg2.GetCode()));
491    }
492    if (reg3.GetType() == CPURegister::kRRegister) {
493      available_registers.Remove(Register(reg3.GetCode()));
494    }
495    if (reg4.GetType() == CPURegister::kRRegister) {
496      available_registers.Remove(Register(reg4.GetCode()));
497    }
498    Register tmp = available_registers.GetFirstAvailableRegister();
499    VIXL_ASSERT(tmp.GetType() == CPURegister::kRRegister);
500    // Push the flags.
501    Mrs(tmp, APSR);
502    Push(tmp);
503    Vmrs(RegisterOrAPSR_nzcv(tmp.GetCode()), FPSCR);
504    Push(tmp);
505    // Push the registers to print on the stack.
506    PushRegister(reg4);
507    PushRegister(reg3);
508    PushRegister(reg2);
509    PushRegister(reg1);
510    int core_count = 1;
511    int vfp_count = 0;
512    uint32_t printf_type = 0;
513    // Pop the registers to print and store them into r1-r3 and/or d0-d3.
514    // Reg4 may stay into the stack if all the register to print are core
515    // registers.
516    PreparePrintfArgument(reg1, &core_count, &vfp_count, &printf_type);
517    PreparePrintfArgument(reg2, &core_count, &vfp_count, &printf_type);
518    PreparePrintfArgument(reg3, &core_count, &vfp_count, &printf_type);
519    PreparePrintfArgument(reg4, &core_count, &vfp_count, &printf_type);
520    // Ensure that the stack is aligned on 8 bytes.
521    And(r5, sp, 0x7);
522    if (core_count == 5) {
523      // One 32 bit argument (reg4) has been left on the stack =>  align the
524      // stack
525      // before the argument.
526      Pop(r0);
527      Sub(sp, sp, r5);
528      Push(r0);
529    } else {
530      Sub(sp, sp, r5);
531    }
532    // Select the right trampoline depending on the arguments.
533    uintptr_t address;
534    switch (printf_type) {
535      case 0:
536        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRRR);
537        break;
538      case 1:
539        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRRR);
540        break;
541      case 2:
542        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDRR);
543        break;
544      case 3:
545        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDRR);
546        break;
547      case 4:
548        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRDR);
549        break;
550      case 5:
551        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRDR);
552        break;
553      case 6:
554        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDDR);
555        break;
556      case 7:
557        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDDR);
558        break;
559      case 8:
560        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRRD);
561        break;
562      case 9:
563        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRRD);
564        break;
565      case 10:
566        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDRD);
567        break;
568      case 11:
569        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDRD);
570        break;
571      case 12:
572        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRDD);
573        break;
574      case 13:
575        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRDD);
576        break;
577      case 14:
578        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDDD);
579        break;
580      case 15:
581        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDDD);
582        break;
583      default:
584        VIXL_UNREACHABLE();
585        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRRR);
586        break;
587    }
588    StringLiteral* format_literal =
589        new StringLiteral(format, RawLiteral::kDeletedOnPlacementByPool);
590    Adr(r0, format_literal);
591    Mov(ip, Operand::From(address));
592    Blx(ip);
593    // If register reg4 was left on the stack => skip it.
594    if (core_count == 5) Drop(kRegSizeInBytes);
595    // Restore the stack as it was before alignment.
596    Add(sp, sp, r5);
597    // Restore the flags.
598    Pop(tmp);
599    Vmsr(FPSCR, tmp);
600    Pop(tmp);
601    Msr(APSR_nzcvqg, tmp);
602    // Restore the regsisters.
603    if (Has32DRegs()) Vpop(Untyped64, DRegisterList(d16, 16));
604    Vpop(Untyped64, DRegisterList(d0, 8));
605    Pop(RegisterList(saved_registers_mask));
606  }
607}
608
609
610void MacroAssembler::PushRegister(CPURegister reg) {
611  switch (reg.GetType()) {
612    case CPURegister::kNoRegister:
613      break;
614    case CPURegister::kRRegister:
615      Push(Register(reg.GetCode()));
616      break;
617    case CPURegister::kSRegister:
618      Vpush(Untyped32, SRegisterList(SRegister(reg.GetCode())));
619      break;
620    case CPURegister::kDRegister:
621      Vpush(Untyped64, DRegisterList(DRegister(reg.GetCode())));
622      break;
623    case CPURegister::kQRegister:
624      VIXL_UNIMPLEMENTED();
625      break;
626  }
627}
628
629
630void MacroAssembler::PreparePrintfArgument(CPURegister reg,
631                                           int* core_count,
632                                           int* vfp_count,
633                                           uint32_t* printf_type) {
634  switch (reg.GetType()) {
635    case CPURegister::kNoRegister:
636      break;
637    case CPURegister::kRRegister:
638      VIXL_ASSERT(*core_count <= 4);
639      if (*core_count < 4) Pop(Register(*core_count));
640      *core_count += 1;
641      break;
642    case CPURegister::kSRegister:
643      VIXL_ASSERT(*vfp_count < 4);
644      *printf_type |= 1 << (*core_count + *vfp_count - 1);
645      Vpop(Untyped32, SRegisterList(SRegister(*vfp_count * 2)));
646      Vcvt(F64, F32, DRegister(*vfp_count), SRegister(*vfp_count * 2));
647      *vfp_count += 1;
648      break;
649    case CPURegister::kDRegister:
650      VIXL_ASSERT(*vfp_count < 4);
651      *printf_type |= 1 << (*core_count + *vfp_count - 1);
652      Vpop(Untyped64, DRegisterList(DRegister(*vfp_count)));
653      *vfp_count += 1;
654      break;
655    case CPURegister::kQRegister:
656      VIXL_UNIMPLEMENTED();
657      break;
658  }
659}
660
661
662void MacroAssembler::Delegate(InstructionType type,
663                              InstructionCondROp instruction,
664                              Condition cond,
665                              Register rn,
666                              const Operand& operand) {
667  VIXL_ASSERT((type == kMovt) || (type == kSxtb16) || (type == kTeq) ||
668              (type == kUxtb16));
669
670  if (type == kMovt) {
671    VIXL_ABORT_WITH_MSG("`Movt` expects a 16-bit immediate.\n");
672  }
673
674  // This delegate only supports teq with immediates.
675  CONTEXT_SCOPE;
676  if ((type == kTeq) && operand.IsImmediate()) {
677    UseScratchRegisterScope temps(this);
678    Register scratch = temps.Acquire();
679    HandleOutOfBoundsImmediate(cond, scratch, operand.GetImmediate());
680    CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
681    teq(cond, rn, scratch);
682    return;
683  }
684  Assembler::Delegate(type, instruction, cond, rn, operand);
685}
686
687
688void MacroAssembler::Delegate(InstructionType type,
689                              InstructionCondSizeROp instruction,
690                              Condition cond,
691                              EncodingSize size,
692                              Register rn,
693                              const Operand& operand) {
694  CONTEXT_SCOPE;
695  VIXL_ASSERT(size.IsBest());
696  VIXL_ASSERT((type == kCmn) || (type == kCmp) || (type == kMov) ||
697              (type == kMovs) || (type == kMvn) || (type == kMvns) ||
698              (type == kSxtb) || (type == kSxth) || (type == kTst) ||
699              (type == kUxtb) || (type == kUxth));
700  if (IsUsingT32() && operand.IsRegisterShiftedRegister()) {
701    VIXL_ASSERT((type != kMov) || (type != kMovs));
702    InstructionCondRROp shiftop = NULL;
703    switch (operand.GetShift().GetType()) {
704      case LSL:
705        shiftop = &Assembler::lsl;
706        break;
707      case LSR:
708        shiftop = &Assembler::lsr;
709        break;
710      case ASR:
711        shiftop = &Assembler::asr;
712        break;
713      case RRX:
714        // A RegisterShiftedRegister operand cannot have a shift of type RRX.
715        VIXL_UNREACHABLE();
716        break;
717      case ROR:
718        shiftop = &Assembler::ror;
719        break;
720      default:
721        VIXL_UNREACHABLE();
722    }
723    if (shiftop != NULL) {
724      UseScratchRegisterScope temps(this);
725      Register scratch = temps.Acquire();
726      CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
727      (this->*shiftop)(cond,
728                       scratch,
729                       operand.GetBaseRegister(),
730                       operand.GetShiftRegister());
731      (this->*instruction)(cond, size, rn, scratch);
732      return;
733    }
734  }
735  if (operand.IsImmediate()) {
736    uint32_t imm = operand.GetImmediate();
737    switch (type) {
738      case kMov:
739      case kMovs:
740        if (!rn.IsPC()) {
741          // Immediate is too large, but not using PC, so handle with mov{t}.
742          HandleOutOfBoundsImmediate(cond, rn, imm);
743          if (type == kMovs) {
744            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
745            tst(cond, rn, rn);
746          }
747          return;
748        } else if (type == kMov) {
749          VIXL_ASSERT(IsUsingA32() || cond.Is(al));
750          // Immediate is too large and using PC, so handle using a temporary
751          // register.
752          UseScratchRegisterScope temps(this);
753          Register scratch = temps.Acquire();
754          HandleOutOfBoundsImmediate(al, scratch, imm);
755          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
756          bx(cond, scratch);
757          return;
758        }
759        break;
760      case kCmn:
761      case kCmp:
762        if (IsUsingA32() || !rn.IsPC()) {
763          UseScratchRegisterScope temps(this);
764          Register scratch = temps.Acquire();
765          HandleOutOfBoundsImmediate(cond, scratch, imm);
766          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
767          (this->*instruction)(cond, size, rn, scratch);
768          return;
769        }
770        break;
771      case kMvn:
772      case kMvns:
773        if (!rn.IsPC()) {
774          UseScratchRegisterScope temps(this);
775          Register scratch = temps.Acquire();
776          HandleOutOfBoundsImmediate(cond, scratch, imm);
777          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
778          (this->*instruction)(cond, size, rn, scratch);
779          return;
780        }
781        break;
782      case kTst:
783        if (IsUsingA32() || !rn.IsPC()) {
784          UseScratchRegisterScope temps(this);
785          Register scratch = temps.Acquire();
786          HandleOutOfBoundsImmediate(cond, scratch, imm);
787          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
788          (this->*instruction)(cond, size, rn, scratch);
789          return;
790        }
791        break;
792      default:  // kSxtb, Sxth, Uxtb, Uxth
793        break;
794    }
795  }
796  Assembler::Delegate(type, instruction, cond, size, rn, operand);
797}
798
799
800void MacroAssembler::Delegate(InstructionType type,
801                              InstructionCondRROp instruction,
802                              Condition cond,
803                              Register rd,
804                              Register rn,
805                              const Operand& operand) {
806  if ((type == kSxtab) || (type == kSxtab16) || (type == kSxtah) ||
807      (type == kUxtab) || (type == kUxtab16) || (type == kUxtah) ||
808      (type == kPkhbt) || (type == kPkhtb)) {
809    UnimplementedDelegate(type);
810    return;
811  }
812
813  // This delegate only handles the following instructions.
814  VIXL_ASSERT((type == kOrn) || (type == kOrns) || (type == kRsc) ||
815              (type == kRscs));
816  CONTEXT_SCOPE;
817
818  // T32 does not support register shifted register operands, emulate it.
819  if (IsUsingT32() && operand.IsRegisterShiftedRegister()) {
820    InstructionCondRROp shiftop = NULL;
821    switch (operand.GetShift().GetType()) {
822      case LSL:
823        shiftop = &Assembler::lsl;
824        break;
825      case LSR:
826        shiftop = &Assembler::lsr;
827        break;
828      case ASR:
829        shiftop = &Assembler::asr;
830        break;
831      case RRX:
832        // A RegisterShiftedRegister operand cannot have a shift of type RRX.
833        VIXL_UNREACHABLE();
834        break;
835      case ROR:
836        shiftop = &Assembler::ror;
837        break;
838      default:
839        VIXL_UNREACHABLE();
840    }
841    if (shiftop != NULL) {
842      UseScratchRegisterScope temps(this);
843      Register rm = operand.GetBaseRegister();
844      Register rs = operand.GetShiftRegister();
845      // Try to use rd as a scratch register. We can do this if it aliases rs or
846      // rm (because we read them in the first instruction), but not rn.
847      if (!rd.Is(rn)) temps.Include(rd);
848      Register scratch = temps.Acquire();
849      // TODO: The scope length was measured empirically. We should analyse the
850      // worst-case size and add targetted tests.
851      CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
852      (this->*shiftop)(cond, scratch, rm, rs);
853      (this->*instruction)(cond, rd, rn, scratch);
854      return;
855    }
856  }
857
858  // T32 does not have a Rsc instruction, negate the lhs input and turn it into
859  // an Adc. Adc and Rsc are equivalent using a bitwise NOT:
860  //   adc rd, rn, operand <-> rsc rd, NOT(rn), operand
861  if (IsUsingT32() && ((type == kRsc) || (type == kRscs))) {
862    // The RegisterShiftRegister case should have been handled above.
863    VIXL_ASSERT(!operand.IsRegisterShiftedRegister());
864    UseScratchRegisterScope temps(this);
865    // Try to use rd as a scratch register. We can do this if it aliases rn
866    // (because we read it in the first instruction), but not rm.
867    temps.Include(rd);
868    temps.Exclude(operand);
869    Register negated_rn = temps.Acquire();
870    {
871      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
872      mvn(cond, negated_rn, rn);
873    }
874    if (type == kRsc) {
875      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
876      adc(cond, rd, negated_rn, operand);
877      return;
878    }
879    // TODO: We shouldn't have to specify how much space the next instruction
880    // needs.
881    CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
882    adcs(cond, rd, negated_rn, operand);
883    return;
884  }
885
886  if (operand.IsImmediate()) {
887    // If the immediate can be encoded when inverted, turn Orn into Orr.
888    // Otherwise rely on HandleOutOfBoundsImmediate to generate a series of
889    // mov.
890    int32_t imm = operand.GetSignedImmediate();
891    if (((type == kOrn) || (type == kOrns)) && IsModifiedImmediate(~imm)) {
892      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
893      switch (type) {
894        case kOrn:
895          orr(cond, rd, rn, ~imm);
896          return;
897        case kOrns:
898          orrs(cond, rd, rn, ~imm);
899          return;
900        default:
901          VIXL_UNREACHABLE();
902          break;
903      }
904    }
905  }
906
907  // A32 does not have a Orn instruction, negate the rhs input and turn it into
908  // a Orr.
909  if (IsUsingA32() && ((type == kOrn) || (type == kOrns))) {
910    // TODO: orn r0, r1, imm -> orr r0, r1, neg(imm) if doable
911    //  mvn r0, r2
912    //  orr r0, r1, r0
913    Register scratch;
914    UseScratchRegisterScope temps(this);
915    // Try to use rd as a scratch register. We can do this if it aliases rs or
916    // rm (because we read them in the first instruction), but not rn.
917    if (!rd.Is(rn)) temps.Include(rd);
918    scratch = temps.Acquire();
919    {
920      // TODO: We shouldn't have to specify how much space the next instruction
921      // needs.
922      CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
923      mvn(cond, scratch, operand);
924    }
925    if (type == kOrns) {
926      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
927      orrs(cond, rd, rn, scratch);
928      return;
929    }
930    CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
931    orr(cond, rd, rn, scratch);
932    return;
933  }
934
935  if (operand.IsImmediate()) {
936    UseScratchRegisterScope temps(this);
937    // Allow using the destination as a scratch register if possible.
938    if (!rd.Is(rn)) temps.Include(rd);
939    Register scratch = temps.Acquire();
940    int32_t imm = operand.GetSignedImmediate();
941    HandleOutOfBoundsImmediate(cond, scratch, imm);
942    CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
943    (this->*instruction)(cond, rd, rn, scratch);
944    return;
945  }
946  Assembler::Delegate(type, instruction, cond, rd, rn, operand);
947}
948
949
950void MacroAssembler::Delegate(InstructionType type,
951                              InstructionCondSizeRL instruction,
952                              Condition cond,
953                              EncodingSize size,
954                              Register rd,
955                              Location* location) {
956  VIXL_ASSERT((type == kLdr) || (type == kAdr));
957
958  CONTEXT_SCOPE;
959  VIXL_ASSERT(size.IsBest());
960
961  if ((type == kLdr) && location->IsBound()) {
962    CodeBufferCheckScope scope(this, 5 * kMaxInstructionSizeInBytes);
963    UseScratchRegisterScope temps(this);
964    temps.Include(rd);
965    uint32_t mask = GetOffsetMask(type, Offset);
966    ldr(rd, MemOperandComputationHelper(cond, temps.Acquire(), location, mask));
967    return;
968  }
969
970  Assembler::Delegate(type, instruction, cond, size, rd, location);
971}
972
973
974bool MacroAssembler::GenerateSplitInstruction(
975    InstructionCondSizeRROp instruction,
976    Condition cond,
977    Register rd,
978    Register rn,
979    uint32_t imm,
980    uint32_t mask) {
981  uint32_t high = imm & ~mask;
982  if (!IsModifiedImmediate(high) && !rn.IsPC()) return false;
983  // If high is a modified immediate, we can perform the operation with
984  // only 2 instructions.
985  // Else, if rn is PC, we want to avoid moving PC into a temporary.
986  // Therefore, we also use the pattern even if the second call may
987  // generate 3 instructions.
988  uint32_t low = imm & mask;
989  CodeBufferCheckScope scope(this,
990                             (rn.IsPC() ? 4 : 2) * kMaxInstructionSizeInBytes);
991  (this->*instruction)(cond, Best, rd, rn, low);
992  (this->*instruction)(cond, Best, rd, rd, high);
993  return true;
994}
995
996
997void MacroAssembler::Delegate(InstructionType type,
998                              InstructionCondSizeRROp instruction,
999                              Condition cond,
1000                              EncodingSize size,
1001                              Register rd,
1002                              Register rn,
1003                              const Operand& operand) {
1004  VIXL_ASSERT(
1005      (type == kAdc) || (type == kAdcs) || (type == kAdd) || (type == kAdds) ||
1006      (type == kAnd) || (type == kAnds) || (type == kAsr) || (type == kAsrs) ||
1007      (type == kBic) || (type == kBics) || (type == kEor) || (type == kEors) ||
1008      (type == kLsl) || (type == kLsls) || (type == kLsr) || (type == kLsrs) ||
1009      (type == kOrr) || (type == kOrrs) || (type == kRor) || (type == kRors) ||
1010      (type == kRsb) || (type == kRsbs) || (type == kSbc) || (type == kSbcs) ||
1011      (type == kSub) || (type == kSubs));
1012
1013  CONTEXT_SCOPE;
1014  VIXL_ASSERT(size.IsBest());
1015  if (IsUsingT32() && operand.IsRegisterShiftedRegister()) {
1016    InstructionCondRROp shiftop = NULL;
1017    switch (operand.GetShift().GetType()) {
1018      case LSL:
1019        shiftop = &Assembler::lsl;
1020        break;
1021      case LSR:
1022        shiftop = &Assembler::lsr;
1023        break;
1024      case ASR:
1025        shiftop = &Assembler::asr;
1026        break;
1027      case RRX:
1028        // A RegisterShiftedRegister operand cannot have a shift of type RRX.
1029        VIXL_UNREACHABLE();
1030        break;
1031      case ROR:
1032        shiftop = &Assembler::ror;
1033        break;
1034      default:
1035        VIXL_UNREACHABLE();
1036    }
1037    if (shiftop != NULL) {
1038      UseScratchRegisterScope temps(this);
1039      Register rm = operand.GetBaseRegister();
1040      Register rs = operand.GetShiftRegister();
1041      // Try to use rd as a scratch register. We can do this if it aliases rs or
1042      // rm (because we read them in the first instruction), but not rn.
1043      if (!rd.Is(rn)) temps.Include(rd);
1044      Register scratch = temps.Acquire();
1045      CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
1046      (this->*shiftop)(cond, scratch, rm, rs);
1047      (this->*instruction)(cond, size, rd, rn, scratch);
1048      return;
1049    }
1050  }
1051  if (operand.IsImmediate()) {
1052    int32_t imm = operand.GetSignedImmediate();
1053    if (ImmediateT32::IsImmediateT32(~imm)) {
1054      if (IsUsingT32()) {
1055        switch (type) {
1056          case kOrr:
1057            orn(cond, rd, rn, ~imm);
1058            return;
1059          case kOrrs:
1060            orns(cond, rd, rn, ~imm);
1061            return;
1062          default:
1063            break;
1064        }
1065      }
1066    }
1067    if (imm < 0) {
1068      InstructionCondSizeRROp asmcb = NULL;
1069      // Add and sub are equivalent using an arithmetic negation:
1070      //   add rd, rn, #imm <-> sub rd, rn, - #imm
1071      // Add and sub with carry are equivalent using a bitwise NOT:
1072      //   adc rd, rn, #imm <-> sbc rd, rn, NOT #imm
1073      switch (type) {
1074        case kAdd:
1075          asmcb = &Assembler::sub;
1076          imm = -imm;
1077          break;
1078        case kAdds:
1079          asmcb = &Assembler::subs;
1080          imm = -imm;
1081          break;
1082        case kSub:
1083          asmcb = &Assembler::add;
1084          imm = -imm;
1085          break;
1086        case kSubs:
1087          asmcb = &Assembler::adds;
1088          imm = -imm;
1089          break;
1090        case kAdc:
1091          asmcb = &Assembler::sbc;
1092          imm = ~imm;
1093          break;
1094        case kAdcs:
1095          asmcb = &Assembler::sbcs;
1096          imm = ~imm;
1097          break;
1098        case kSbc:
1099          asmcb = &Assembler::adc;
1100          imm = ~imm;
1101          break;
1102        case kSbcs:
1103          asmcb = &Assembler::adcs;
1104          imm = ~imm;
1105          break;
1106        default:
1107          break;
1108      }
1109      if (asmcb != NULL) {
1110        CodeBufferCheckScope scope(this, 4 * kMaxInstructionSizeInBytes);
1111        (this->*asmcb)(cond, size, rd, rn, Operand(imm));
1112        return;
1113      }
1114    }
1115
1116    // When rn is PC, only handle negative offsets. The correct way to handle
1117    // positive offsets isn't clear; does the user want the offset from the
1118    // start of the macro, or from the end (to allow a certain amount of space)?
1119    // When type is Add or Sub, imm is always positive (imm < 0 has just been
1120    // handled and imm == 0 would have been generated without the need of a
1121    // delegate). Therefore, only add to PC is forbidden here.
1122    if ((((type == kAdd) && !rn.IsPC()) || (type == kSub)) &&
1123        (IsUsingA32() || (!rd.IsPC() && !rn.IsPC()))) {
1124      VIXL_ASSERT(imm > 0);
1125      // Try to break the constant into two modified immediates.
1126      // For T32 also try to break the constant into one imm12 and one modified
1127      // immediate. Count the trailing zeroes and get the biggest even value.
1128      int trailing_zeroes = CountTrailingZeros(imm) & ~1u;
1129      uint32_t mask = ((trailing_zeroes < 4) && IsUsingT32())
1130                          ? 0xfff
1131                          : (0xff << trailing_zeroes);
1132      if (GenerateSplitInstruction(instruction, cond, rd, rn, imm, mask)) {
1133        return;
1134      }
1135      InstructionCondSizeRROp asmcb = NULL;
1136      switch (type) {
1137        case kAdd:
1138          asmcb = &Assembler::sub;
1139          break;
1140        case kSub:
1141          asmcb = &Assembler::add;
1142          break;
1143        default:
1144          VIXL_UNREACHABLE();
1145      }
1146      if (GenerateSplitInstruction(asmcb, cond, rd, rn, -imm, mask)) {
1147        return;
1148      }
1149    }
1150
1151    UseScratchRegisterScope temps(this);
1152    // Allow using the destination as a scratch register if possible.
1153    if (!rd.Is(rn)) temps.Include(rd);
1154    if (rn.IsPC()) {
1155      // If we're reading the PC, we need to do it in the first instruction,
1156      // otherwise we'll read the wrong value. We rely on this to handle the
1157      // long-range PC-relative MemOperands which can result from user-managed
1158      // literals.
1159
1160      // Only handle negative offsets. The correct way to handle positive
1161      // offsets isn't clear; does the user want the offset from the start of
1162      // the macro, or from the end (to allow a certain amount of space)?
1163      bool offset_is_negative_or_zero = (imm <= 0);
1164      switch (type) {
1165        case kAdd:
1166        case kAdds:
1167          offset_is_negative_or_zero = (imm <= 0);
1168          break;
1169        case kSub:
1170        case kSubs:
1171          offset_is_negative_or_zero = (imm >= 0);
1172          break;
1173        case kAdc:
1174        case kAdcs:
1175          offset_is_negative_or_zero = (imm < 0);
1176          break;
1177        case kSbc:
1178        case kSbcs:
1179          offset_is_negative_or_zero = (imm > 0);
1180          break;
1181        default:
1182          break;
1183      }
1184      if (offset_is_negative_or_zero) {
1185        {
1186          rn = temps.Acquire();
1187          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1188          mov(cond, rn, pc);
1189        }
1190        // Recurse rather than falling through, to try to get the immediate into
1191        // a single instruction.
1192        CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1193        (this->*instruction)(cond, size, rd, rn, operand);
1194        return;
1195      }
1196    } else {
1197      Register scratch = temps.Acquire();
1198      // TODO: The scope length was measured empirically. We should analyse the
1199      // worst-case size and add targetted tests.
1200      CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1201      mov(cond, scratch, operand.GetImmediate());
1202      (this->*instruction)(cond, size, rd, rn, scratch);
1203      return;
1204    }
1205  }
1206  Assembler::Delegate(type, instruction, cond, size, rd, rn, operand);
1207}
1208
1209
1210void MacroAssembler::Delegate(InstructionType type,
1211                              InstructionRL instruction,
1212                              Register rn,
1213                              Location* location) {
1214  VIXL_ASSERT((type == kCbz) || (type == kCbnz));
1215
1216  CONTEXT_SCOPE;
1217  CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
1218  if (IsUsingA32()) {
1219    if (type == kCbz) {
1220      VIXL_ABORT_WITH_MSG("Cbz is only available for T32.\n");
1221    } else {
1222      VIXL_ABORT_WITH_MSG("Cbnz is only available for T32.\n");
1223    }
1224  } else if (rn.IsLow()) {
1225    switch (type) {
1226      case kCbnz: {
1227        Label done;
1228        cbz(rn, &done);
1229        b(location);
1230        Bind(&done);
1231        return;
1232      }
1233      case kCbz: {
1234        Label done;
1235        cbnz(rn, &done);
1236        b(location);
1237        Bind(&done);
1238        return;
1239      }
1240      default:
1241        break;
1242    }
1243  }
1244  Assembler::Delegate(type, instruction, rn, location);
1245}
1246
1247
1248template <typename T>
1249static inline bool IsI64BitPattern(T imm) {
1250  for (T mask = 0xff << ((sizeof(T) - 1) * 8); mask != 0; mask >>= 8) {
1251    if (((imm & mask) != mask) && ((imm & mask) != 0)) return false;
1252  }
1253  return true;
1254}
1255
1256
1257template <typename T>
1258static inline bool IsI8BitPattern(T imm) {
1259  uint8_t imm8 = imm & 0xff;
1260  for (unsigned rep = sizeof(T) - 1; rep > 0; rep--) {
1261    imm >>= 8;
1262    if ((imm & 0xff) != imm8) return false;
1263  }
1264  return true;
1265}
1266
1267
1268static inline bool CanBeInverted(uint32_t imm32) {
1269  uint32_t fill8 = 0;
1270
1271  if ((imm32 & 0xffffff00) == 0xffffff00) {
1272    //    11111111 11111111 11111111 abcdefgh
1273    return true;
1274  }
1275  if (((imm32 & 0xff) == 0) || ((imm32 & 0xff) == 0xff)) {
1276    fill8 = imm32 & 0xff;
1277    imm32 >>= 8;
1278    if ((imm32 >> 8) == 0xffff) {
1279      //    11111111 11111111 abcdefgh 00000000
1280      // or 11111111 11111111 abcdefgh 11111111
1281      return true;
1282    }
1283    if ((imm32 & 0xff) == fill8) {
1284      imm32 >>= 8;
1285      if ((imm32 >> 8) == 0xff) {
1286        //    11111111 abcdefgh 00000000 00000000
1287        // or 11111111 abcdefgh 11111111 11111111
1288        return true;
1289      }
1290      if ((fill8 == 0xff) && ((imm32 & 0xff) == 0xff)) {
1291        //    abcdefgh 11111111 11111111 11111111
1292        return true;
1293      }
1294    }
1295  }
1296  return false;
1297}
1298
1299
1300template <typename RES, typename T>
1301static inline RES replicate(T imm) {
1302  VIXL_ASSERT((sizeof(RES) > sizeof(T)) &&
1303              (((sizeof(RES) / sizeof(T)) * sizeof(T)) == sizeof(RES)));
1304  RES res = imm;
1305  for (unsigned i = sizeof(RES) / sizeof(T) - 1; i > 0; i--) {
1306    res = (res << (sizeof(T) * 8)) | imm;
1307  }
1308  return res;
1309}
1310
1311
1312void MacroAssembler::Delegate(InstructionType type,
1313                              InstructionCondDtSSop instruction,
1314                              Condition cond,
1315                              DataType dt,
1316                              SRegister rd,
1317                              const SOperand& operand) {
1318  CONTEXT_SCOPE;
1319  if (type == kVmov) {
1320    if (operand.IsImmediate() && dt.Is(F32)) {
1321      const NeonImmediate& neon_imm = operand.GetNeonImmediate();
1322      if (neon_imm.CanConvert<float>()) {
1323        // movw ip, imm16
1324        // movk ip, imm16
1325        // vmov s0, ip
1326        UseScratchRegisterScope temps(this);
1327        Register scratch = temps.Acquire();
1328        float f = neon_imm.GetImmediate<float>();
1329        // TODO: The scope length was measured empirically. We should analyse
1330        // the
1331        // worst-case size and add targetted tests.
1332        CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1333        mov(cond, scratch, FloatToRawbits(f));
1334        vmov(cond, rd, scratch);
1335        return;
1336      }
1337    }
1338  }
1339  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
1340}
1341
1342
1343void MacroAssembler::Delegate(InstructionType type,
1344                              InstructionCondDtDDop instruction,
1345                              Condition cond,
1346                              DataType dt,
1347                              DRegister rd,
1348                              const DOperand& operand) {
1349  CONTEXT_SCOPE;
1350  if (type == kVmov) {
1351    if (operand.IsImmediate()) {
1352      const NeonImmediate& neon_imm = operand.GetNeonImmediate();
1353      switch (dt.GetValue()) {
1354        case I32:
1355          if (neon_imm.CanConvert<uint32_t>()) {
1356            uint32_t imm = neon_imm.GetImmediate<uint32_t>();
1357            // vmov.i32 d0, 0xabababab will translate into vmov.i8 d0, 0xab
1358            if (IsI8BitPattern(imm)) {
1359              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1360              vmov(cond, I8, rd, imm & 0xff);
1361              return;
1362            }
1363            // vmov.i32 d0, 0xff0000ff will translate into
1364            // vmov.i64 d0, 0xff0000ffff0000ff
1365            if (IsI64BitPattern(imm)) {
1366              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1367              vmov(cond, I64, rd, replicate<uint64_t>(imm));
1368              return;
1369            }
1370            // vmov.i32 d0, 0xffab0000 will translate into
1371            // vmvn.i32 d0, 0x0054ffff
1372            if (cond.Is(al) && CanBeInverted(imm)) {
1373              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1374              vmvn(I32, rd, ~imm);
1375              return;
1376            }
1377          }
1378          break;
1379        case I16:
1380          if (neon_imm.CanConvert<uint16_t>()) {
1381            uint16_t imm = neon_imm.GetImmediate<uint16_t>();
1382            // vmov.i16 d0, 0xabab will translate into vmov.i8 d0, 0xab
1383            if (IsI8BitPattern(imm)) {
1384              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1385              vmov(cond, I8, rd, imm & 0xff);
1386              return;
1387            }
1388          }
1389          break;
1390        case I64:
1391          if (neon_imm.CanConvert<uint64_t>()) {
1392            uint64_t imm = neon_imm.GetImmediate<uint64_t>();
1393            // vmov.i64 d0, -1 will translate into vmov.i8 d0, 0xff
1394            if (IsI8BitPattern(imm)) {
1395              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1396              vmov(cond, I8, rd, imm & 0xff);
1397              return;
1398            }
1399            // mov ip, lo(imm64)
1400            // vdup d0, ip
1401            // vdup is prefered to 'vmov d0[0]' as d0[1] does not need to be
1402            // preserved
1403            {
1404              UseScratchRegisterScope temps(this);
1405              Register scratch = temps.Acquire();
1406              {
1407                // TODO: The scope length was measured empirically. We should
1408                // analyse the
1409                // worst-case size and add targetted tests.
1410                CodeBufferCheckScope scope(this,
1411                                           2 * kMaxInstructionSizeInBytes);
1412                mov(cond, scratch, static_cast<uint32_t>(imm & 0xffffffff));
1413              }
1414              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1415              vdup(cond, Untyped32, rd, scratch);
1416            }
1417            // mov ip, hi(imm64)
1418            // vmov d0[1], ip
1419            {
1420              UseScratchRegisterScope temps(this);
1421              Register scratch = temps.Acquire();
1422              {
1423                // TODO: The scope length was measured empirically. We should
1424                // analyse the
1425                // worst-case size and add targetted tests.
1426                CodeBufferCheckScope scope(this,
1427                                           2 * kMaxInstructionSizeInBytes);
1428                mov(cond, scratch, static_cast<uint32_t>(imm >> 32));
1429              }
1430              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1431              vmov(cond, Untyped32, DRegisterLane(rd, 1), scratch);
1432            }
1433            return;
1434          }
1435          break;
1436        default:
1437          break;
1438      }
1439      VIXL_ASSERT(!dt.Is(I8));  // I8 cases should have been handled already.
1440      if ((dt.Is(I16) || dt.Is(I32)) && neon_imm.CanConvert<uint32_t>()) {
1441        // mov ip, imm32
1442        // vdup.16 d0, ip
1443        UseScratchRegisterScope temps(this);
1444        Register scratch = temps.Acquire();
1445        {
1446          CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
1447          mov(cond, scratch, neon_imm.GetImmediate<uint32_t>());
1448        }
1449        DataTypeValue vdup_dt = Untyped32;
1450        switch (dt.GetValue()) {
1451          case I16:
1452            vdup_dt = Untyped16;
1453            break;
1454          case I32:
1455            vdup_dt = Untyped32;
1456            break;
1457          default:
1458            VIXL_UNREACHABLE();
1459        }
1460        CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1461        vdup(cond, vdup_dt, rd, scratch);
1462        return;
1463      }
1464      if (dt.Is(F32) && neon_imm.CanConvert<float>()) {
1465        float f = neon_imm.GetImmediate<float>();
1466        // Punt to vmov.i32
1467        // TODO: The scope length was guessed based on the double case below. We
1468        // should analyse the worst-case size and add targetted tests.
1469        CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1470        vmov(cond, I32, rd, FloatToRawbits(f));
1471        return;
1472      }
1473      if (dt.Is(F64) && neon_imm.CanConvert<double>()) {
1474        // Punt to vmov.i64
1475        double d = neon_imm.GetImmediate<double>();
1476        // TODO: The scope length was measured empirically. We should analyse
1477        // the
1478        // worst-case size and add targetted tests.
1479        CodeBufferCheckScope scope(this, 6 * kMaxInstructionSizeInBytes);
1480        vmov(cond, I64, rd, DoubleToRawbits(d));
1481        return;
1482      }
1483    }
1484  }
1485  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
1486}
1487
1488
1489void MacroAssembler::Delegate(InstructionType type,
1490                              InstructionCondDtQQop instruction,
1491                              Condition cond,
1492                              DataType dt,
1493                              QRegister rd,
1494                              const QOperand& operand) {
1495  CONTEXT_SCOPE;
1496  if (type == kVmov) {
1497    if (operand.IsImmediate()) {
1498      const NeonImmediate& neon_imm = operand.GetNeonImmediate();
1499      switch (dt.GetValue()) {
1500        case I32:
1501          if (neon_imm.CanConvert<uint32_t>()) {
1502            uint32_t imm = neon_imm.GetImmediate<uint32_t>();
1503            // vmov.i32 d0, 0xabababab will translate into vmov.i8 d0, 0xab
1504            if (IsI8BitPattern(imm)) {
1505              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1506              vmov(cond, I8, rd, imm & 0xff);
1507              return;
1508            }
1509            // vmov.i32 d0, 0xff0000ff will translate into
1510            // vmov.i64 d0, 0xff0000ffff0000ff
1511            if (IsI64BitPattern(imm)) {
1512              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1513              vmov(cond, I64, rd, replicate<uint64_t>(imm));
1514              return;
1515            }
1516            // vmov.i32 d0, 0xffab0000 will translate into
1517            // vmvn.i32 d0, 0x0054ffff
1518            if (CanBeInverted(imm)) {
1519              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1520              vmvn(cond, I32, rd, ~imm);
1521              return;
1522            }
1523          }
1524          break;
1525        case I16:
1526          if (neon_imm.CanConvert<uint16_t>()) {
1527            uint16_t imm = neon_imm.GetImmediate<uint16_t>();
1528            // vmov.i16 d0, 0xabab will translate into vmov.i8 d0, 0xab
1529            if (IsI8BitPattern(imm)) {
1530              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1531              vmov(cond, I8, rd, imm & 0xff);
1532              return;
1533            }
1534          }
1535          break;
1536        case I64:
1537          if (neon_imm.CanConvert<uint64_t>()) {
1538            uint64_t imm = neon_imm.GetImmediate<uint64_t>();
1539            // vmov.i64 d0, -1 will translate into vmov.i8 d0, 0xff
1540            if (IsI8BitPattern(imm)) {
1541              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1542              vmov(cond, I8, rd, imm & 0xff);
1543              return;
1544            }
1545            // mov ip, lo(imm64)
1546            // vdup q0, ip
1547            // vdup is prefered to 'vmov d0[0]' as d0[1-3] don't need to be
1548            // preserved
1549            {
1550              UseScratchRegisterScope temps(this);
1551              Register scratch = temps.Acquire();
1552              {
1553                CodeBufferCheckScope scope(this,
1554                                           2 * kMaxInstructionSizeInBytes);
1555                mov(cond, scratch, static_cast<uint32_t>(imm & 0xffffffff));
1556              }
1557              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1558              vdup(cond, Untyped32, rd, scratch);
1559            }
1560            // mov ip, hi(imm64)
1561            // vmov.i32 d0[1], ip
1562            // vmov d1, d0
1563            {
1564              UseScratchRegisterScope temps(this);
1565              Register scratch = temps.Acquire();
1566              {
1567                CodeBufferCheckScope scope(this,
1568                                           2 * kMaxInstructionSizeInBytes);
1569                mov(cond, scratch, static_cast<uint32_t>(imm >> 32));
1570              }
1571              {
1572                CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1573                vmov(cond,
1574                     Untyped32,
1575                     DRegisterLane(rd.GetLowDRegister(), 1),
1576                     scratch);
1577              }
1578              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1579              vmov(cond, F64, rd.GetHighDRegister(), rd.GetLowDRegister());
1580            }
1581            return;
1582          }
1583          break;
1584        default:
1585          break;
1586      }
1587      VIXL_ASSERT(!dt.Is(I8));  // I8 cases should have been handled already.
1588      if ((dt.Is(I16) || dt.Is(I32)) && neon_imm.CanConvert<uint32_t>()) {
1589        // mov ip, imm32
1590        // vdup.16 d0, ip
1591        UseScratchRegisterScope temps(this);
1592        Register scratch = temps.Acquire();
1593        {
1594          CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
1595          mov(cond, scratch, neon_imm.GetImmediate<uint32_t>());
1596        }
1597        DataTypeValue vdup_dt = Untyped32;
1598        switch (dt.GetValue()) {
1599          case I16:
1600            vdup_dt = Untyped16;
1601            break;
1602          case I32:
1603            vdup_dt = Untyped32;
1604            break;
1605          default:
1606            VIXL_UNREACHABLE();
1607        }
1608        CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1609        vdup(cond, vdup_dt, rd, scratch);
1610        return;
1611      }
1612      if (dt.Is(F32) && neon_imm.CanConvert<float>()) {
1613        // Punt to vmov.i64
1614        float f = neon_imm.GetImmediate<float>();
1615        CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1616        vmov(cond, I32, rd, FloatToRawbits(f));
1617        return;
1618      }
1619      if (dt.Is(F64) && neon_imm.CanConvert<double>()) {
1620        // Use vmov to create the double in the low D register, then duplicate
1621        // it into the high D register.
1622        double d = neon_imm.GetImmediate<double>();
1623        CodeBufferCheckScope scope(this, 7 * kMaxInstructionSizeInBytes);
1624        vmov(cond, F64, rd.GetLowDRegister(), d);
1625        vmov(cond, F64, rd.GetHighDRegister(), rd.GetLowDRegister());
1626        return;
1627      }
1628    }
1629  }
1630  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
1631}
1632
1633
1634void MacroAssembler::Delegate(InstructionType type,
1635                              InstructionCondRL instruction,
1636                              Condition cond,
1637                              Register rt,
1638                              Location* location) {
1639  VIXL_ASSERT((type == kLdrb) || (type == kLdrh) || (type == kLdrsb) ||
1640              (type == kLdrsh));
1641
1642  CONTEXT_SCOPE;
1643
1644  if (location->IsBound()) {
1645    CodeBufferCheckScope scope(this, 5 * kMaxInstructionSizeInBytes);
1646    UseScratchRegisterScope temps(this);
1647    temps.Include(rt);
1648    Register scratch = temps.Acquire();
1649    uint32_t mask = GetOffsetMask(type, Offset);
1650    switch (type) {
1651      case kLdrb:
1652        ldrb(rt, MemOperandComputationHelper(cond, scratch, location, mask));
1653        return;
1654      case kLdrh:
1655        ldrh(rt, MemOperandComputationHelper(cond, scratch, location, mask));
1656        return;
1657      case kLdrsb:
1658        ldrsb(rt, MemOperandComputationHelper(cond, scratch, location, mask));
1659        return;
1660      case kLdrsh:
1661        ldrsh(rt, MemOperandComputationHelper(cond, scratch, location, mask));
1662        return;
1663      default:
1664        VIXL_UNREACHABLE();
1665    }
1666    return;
1667  }
1668
1669  Assembler::Delegate(type, instruction, cond, rt, location);
1670}
1671
1672
1673void MacroAssembler::Delegate(InstructionType type,
1674                              InstructionCondRRL instruction,
1675                              Condition cond,
1676                              Register rt,
1677                              Register rt2,
1678                              Location* location) {
1679  VIXL_ASSERT(type == kLdrd);
1680
1681  CONTEXT_SCOPE;
1682
1683  if (location->IsBound()) {
1684    CodeBufferCheckScope scope(this, 6 * kMaxInstructionSizeInBytes);
1685    UseScratchRegisterScope temps(this);
1686    temps.Include(rt, rt2);
1687    Register scratch = temps.Acquire();
1688    uint32_t mask = GetOffsetMask(type, Offset);
1689    ldrd(rt, rt2, MemOperandComputationHelper(cond, scratch, location, mask));
1690    return;
1691  }
1692
1693  Assembler::Delegate(type, instruction, cond, rt, rt2, location);
1694}
1695
1696
1697void MacroAssembler::Delegate(InstructionType type,
1698                              InstructionCondSizeRMop instruction,
1699                              Condition cond,
1700                              EncodingSize size,
1701                              Register rd,
1702                              const MemOperand& operand) {
1703  CONTEXT_SCOPE;
1704  VIXL_ASSERT(size.IsBest());
1705  VIXL_ASSERT((type == kLdr) || (type == kLdrb) || (type == kLdrh) ||
1706              (type == kLdrsb) || (type == kLdrsh) || (type == kStr) ||
1707              (type == kStrb) || (type == kStrh));
1708  if (operand.IsImmediate()) {
1709    const Register& rn = operand.GetBaseRegister();
1710    AddrMode addrmode = operand.GetAddrMode();
1711    int32_t offset = operand.GetOffsetImmediate();
1712    uint32_t extra_offset_mask = GetOffsetMask(type, addrmode);
1713    // Try to maximize the offset used by the MemOperand (load_store_offset).
1714    // Add the part which can't be used by the MemOperand (add_offset).
1715    uint32_t load_store_offset = offset & extra_offset_mask;
1716    uint32_t add_offset = offset & ~extra_offset_mask;
1717    if ((add_offset != 0) &&
1718        (IsModifiedImmediate(offset) || IsModifiedImmediate(-offset))) {
1719      load_store_offset = 0;
1720      add_offset = offset;
1721    }
1722    switch (addrmode) {
1723      case PreIndex:
1724        // Avoid the unpredictable case 'str r0, [r0, imm]!'
1725        if (!rn.Is(rd)) {
1726          // Pre-Indexed case:
1727          // ldr r0, [r1, 12345]! will translate into
1728          //   add r1, r1, 12345
1729          //   ldr r0, [r1]
1730          {
1731            CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1732            add(cond, rn, rn, add_offset);
1733          }
1734          {
1735            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1736            (this->*instruction)(cond,
1737                                 size,
1738                                 rd,
1739                                 MemOperand(rn, load_store_offset, PreIndex));
1740          }
1741          return;
1742        }
1743        break;
1744      case Offset: {
1745        UseScratchRegisterScope temps(this);
1746        // Allow using the destination as a scratch register if possible.
1747        if ((type != kStr) && (type != kStrb) && (type != kStrh) &&
1748            !rd.Is(rn)) {
1749          temps.Include(rd);
1750        }
1751        Register scratch = temps.Acquire();
1752        // Offset case:
1753        // ldr r0, [r1, 12345] will translate into
1754        //   add r0, r1, 12345
1755        //   ldr r0, [r0]
1756        {
1757          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1758          add(cond, scratch, rn, add_offset);
1759        }
1760        {
1761          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1762          (this->*instruction)(cond,
1763                               size,
1764                               rd,
1765                               MemOperand(scratch, load_store_offset));
1766        }
1767        return;
1768      }
1769      case PostIndex:
1770        // Avoid the unpredictable case 'ldr r0, [r0], imm'
1771        if (!rn.Is(rd)) {
1772          // Post-indexed case:
1773          // ldr r0. [r1], imm32 will translate into
1774          //   ldr r0, [r1]
1775          //   movw ip. imm32 & 0xffffffff
1776          //   movt ip, imm32 >> 16
1777          //   add r1, r1, ip
1778          {
1779            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1780            (this->*instruction)(cond,
1781                                 size,
1782                                 rd,
1783                                 MemOperand(rn, load_store_offset, PostIndex));
1784          }
1785          {
1786            CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1787            add(cond, rn, rn, add_offset);
1788          }
1789          return;
1790        }
1791        break;
1792    }
1793  } else if (operand.IsPlainRegister()) {
1794    const Register& rn = operand.GetBaseRegister();
1795    AddrMode addrmode = operand.GetAddrMode();
1796    const Register& rm = operand.GetOffsetRegister();
1797    if (rm.IsPC()) {
1798      VIXL_ABORT_WITH_MSG(
1799          "The MacroAssembler does not convert loads and stores with a PC "
1800          "offset register.\n");
1801    }
1802    if (rn.IsPC()) {
1803      if (addrmode == Offset) {
1804        if (IsUsingT32()) {
1805          VIXL_ABORT_WITH_MSG(
1806              "The MacroAssembler does not convert loads and stores with a PC "
1807              "base register for T32.\n");
1808        }
1809      } else {
1810        VIXL_ABORT_WITH_MSG(
1811            "The MacroAssembler does not convert loads and stores with a PC "
1812            "base register in pre-index or post-index mode.\n");
1813      }
1814    }
1815    switch (addrmode) {
1816      case PreIndex:
1817        // Avoid the unpredictable case 'str r0, [r0, imm]!'
1818        if (!rn.Is(rd)) {
1819          // Pre-Indexed case:
1820          // ldr r0, [r1, r2]! will translate into
1821          //   add r1, r1, r2
1822          //   ldr r0, [r1]
1823          {
1824            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1825            if (operand.GetSign().IsPlus()) {
1826              add(cond, rn, rn, rm);
1827            } else {
1828              sub(cond, rn, rn, rm);
1829            }
1830          }
1831          {
1832            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1833            (this->*instruction)(cond, size, rd, MemOperand(rn, Offset));
1834          }
1835          return;
1836        }
1837        break;
1838      case Offset: {
1839        UseScratchRegisterScope temps(this);
1840        // Allow using the destination as a scratch register if this is not a
1841        // store.
1842        // Avoid using PC as a temporary as this has side-effects.
1843        if ((type != kStr) && (type != kStrb) && (type != kStrh) &&
1844            !rd.IsPC()) {
1845          temps.Include(rd);
1846        }
1847        Register scratch = temps.Acquire();
1848        // Offset case:
1849        // ldr r0, [r1, r2] will translate into
1850        //   add r0, r1, r2
1851        //   ldr r0, [r0]
1852        {
1853          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1854          if (operand.GetSign().IsPlus()) {
1855            add(cond, scratch, rn, rm);
1856          } else {
1857            sub(cond, scratch, rn, rm);
1858          }
1859        }
1860        {
1861          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1862          (this->*instruction)(cond, size, rd, MemOperand(scratch, Offset));
1863        }
1864        return;
1865      }
1866      case PostIndex:
1867        // Avoid the unpredictable case 'ldr r0, [r0], imm'
1868        if (!rn.Is(rd)) {
1869          // Post-indexed case:
1870          // ldr r0. [r1], r2 will translate into
1871          //   ldr r0, [r1]
1872          //   add r1, r1, r2
1873          {
1874            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1875            (this->*instruction)(cond, size, rd, MemOperand(rn, Offset));
1876          }
1877          {
1878            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1879            if (operand.GetSign().IsPlus()) {
1880              add(cond, rn, rn, rm);
1881            } else {
1882              sub(cond, rn, rn, rm);
1883            }
1884          }
1885          return;
1886        }
1887        break;
1888    }
1889  }
1890  Assembler::Delegate(type, instruction, cond, size, rd, operand);
1891}
1892
1893
1894void MacroAssembler::Delegate(InstructionType type,
1895                              InstructionCondRRMop instruction,
1896                              Condition cond,
1897                              Register rt,
1898                              Register rt2,
1899                              const MemOperand& operand) {
1900  if ((type == kLdaexd) || (type == kLdrexd) || (type == kStlex) ||
1901      (type == kStlexb) || (type == kStlexh) || (type == kStrex) ||
1902      (type == kStrexb) || (type == kStrexh)) {
1903    UnimplementedDelegate(type);
1904    return;
1905  }
1906
1907  VIXL_ASSERT((type == kLdrd) || (type == kStrd));
1908
1909  CONTEXT_SCOPE;
1910
1911  // TODO: Should we allow these cases?
1912  if (IsUsingA32()) {
1913    // The first register needs to be even.
1914    if ((rt.GetCode() & 1) != 0) {
1915      UnimplementedDelegate(type);
1916      return;
1917    }
1918    // Registers need to be adjacent.
1919    if (((rt.GetCode() + 1) % kNumberOfRegisters) != rt2.GetCode()) {
1920      UnimplementedDelegate(type);
1921      return;
1922    }
1923    // LDRD lr, pc [...] is not allowed.
1924    if (rt.Is(lr)) {
1925      UnimplementedDelegate(type);
1926      return;
1927    }
1928  }
1929
1930  if (operand.IsImmediate()) {
1931    const Register& rn = operand.GetBaseRegister();
1932    AddrMode addrmode = operand.GetAddrMode();
1933    int32_t offset = operand.GetOffsetImmediate();
1934    uint32_t extra_offset_mask = GetOffsetMask(type, addrmode);
1935    // Try to maximize the offset used by the MemOperand (load_store_offset).
1936    // Add the part which can't be used by the MemOperand (add_offset).
1937    uint32_t load_store_offset = offset & extra_offset_mask;
1938    uint32_t add_offset = offset & ~extra_offset_mask;
1939    if ((add_offset != 0) &&
1940        (IsModifiedImmediate(offset) || IsModifiedImmediate(-offset))) {
1941      load_store_offset = 0;
1942      add_offset = offset;
1943    }
1944    switch (addrmode) {
1945      case PreIndex: {
1946        // Allow using the destinations as a scratch registers if possible.
1947        UseScratchRegisterScope temps(this);
1948        if (type == kLdrd) {
1949          if (!rt.Is(rn)) temps.Include(rt);
1950          if (!rt2.Is(rn)) temps.Include(rt2);
1951        }
1952
1953        // Pre-Indexed case:
1954        // ldrd r0, r1, [r2, 12345]! will translate into
1955        //   add r2, 12345
1956        //   ldrd r0, r1, [r2]
1957        {
1958          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1959          add(cond, rn, rn, add_offset);
1960        }
1961        {
1962          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1963          (this->*instruction)(cond,
1964                               rt,
1965                               rt2,
1966                               MemOperand(rn, load_store_offset, PreIndex));
1967        }
1968        return;
1969      }
1970      case Offset: {
1971        UseScratchRegisterScope temps(this);
1972        // Allow using the destinations as a scratch registers if possible.
1973        if (type == kLdrd) {
1974          if (!rt.Is(rn)) temps.Include(rt);
1975          if (!rt2.Is(rn)) temps.Include(rt2);
1976        }
1977        Register scratch = temps.Acquire();
1978        // Offset case:
1979        // ldrd r0, r1, [r2, 12345] will translate into
1980        //   add r0, r2, 12345
1981        //   ldrd r0, r1, [r0]
1982        {
1983          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1984          add(cond, scratch, rn, add_offset);
1985        }
1986        {
1987          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1988          (this->*instruction)(cond,
1989                               rt,
1990                               rt2,
1991                               MemOperand(scratch, load_store_offset));
1992        }
1993        return;
1994      }
1995      case PostIndex:
1996        // Avoid the unpredictable case 'ldrd r0, r1, [r0], imm'
1997        if (!rn.Is(rt) && !rn.Is(rt2)) {
1998          // Post-indexed case:
1999          // ldrd r0, r1, [r2], imm32 will translate into
2000          //   ldrd r0, r1, [r2]
2001          //   movw ip. imm32 & 0xffffffff
2002          //   movt ip, imm32 >> 16
2003          //   add r2, ip
2004          {
2005            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2006            (this->*instruction)(cond,
2007                                 rt,
2008                                 rt2,
2009                                 MemOperand(rn, load_store_offset, PostIndex));
2010          }
2011          {
2012            CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2013            add(cond, rn, rn, add_offset);
2014          }
2015          return;
2016        }
2017        break;
2018    }
2019  }
2020  if (operand.IsPlainRegister()) {
2021    const Register& rn = operand.GetBaseRegister();
2022    const Register& rm = operand.GetOffsetRegister();
2023    AddrMode addrmode = operand.GetAddrMode();
2024    switch (addrmode) {
2025      case PreIndex:
2026        // ldrd r0, r1, [r2, r3]! will translate into
2027        //   add r2, r3
2028        //   ldrd r0, r1, [r2]
2029        {
2030          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2031          if (operand.GetSign().IsPlus()) {
2032            add(cond, rn, rn, rm);
2033          } else {
2034            sub(cond, rn, rn, rm);
2035          }
2036        }
2037        {
2038          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2039          (this->*instruction)(cond, rt, rt2, MemOperand(rn, Offset));
2040        }
2041        return;
2042      case PostIndex:
2043        // ldrd r0, r1, [r2], r3 will translate into
2044        //   ldrd r0, r1, [r2]
2045        //   add r2, r3
2046        {
2047          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2048          (this->*instruction)(cond, rt, rt2, MemOperand(rn, Offset));
2049        }
2050        {
2051          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2052          if (operand.GetSign().IsPlus()) {
2053            add(cond, rn, rn, rm);
2054          } else {
2055            sub(cond, rn, rn, rm);
2056          }
2057        }
2058        return;
2059      case Offset: {
2060        UseScratchRegisterScope temps(this);
2061        // Allow using the destinations as a scratch registers if possible.
2062        if (type == kLdrd) {
2063          if (!rt.Is(rn)) temps.Include(rt);
2064          if (!rt2.Is(rn)) temps.Include(rt2);
2065        }
2066        Register scratch = temps.Acquire();
2067        // Offset case:
2068        // ldrd r0, r1, [r2, r3] will translate into
2069        //   add r0, r2, r3
2070        //   ldrd r0, r1, [r0]
2071        {
2072          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2073          if (operand.GetSign().IsPlus()) {
2074            add(cond, scratch, rn, rm);
2075          } else {
2076            sub(cond, scratch, rn, rm);
2077          }
2078        }
2079        {
2080          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2081          (this->*instruction)(cond, rt, rt2, MemOperand(scratch, Offset));
2082        }
2083        return;
2084      }
2085    }
2086  }
2087  Assembler::Delegate(type, instruction, cond, rt, rt2, operand);
2088}
2089
2090
2091void MacroAssembler::Delegate(InstructionType type,
2092                              InstructionCondDtSMop instruction,
2093                              Condition cond,
2094                              DataType dt,
2095                              SRegister rd,
2096                              const MemOperand& operand) {
2097  CONTEXT_SCOPE;
2098  if (operand.IsImmediate()) {
2099    const Register& rn = operand.GetBaseRegister();
2100    AddrMode addrmode = operand.GetAddrMode();
2101    int32_t offset = operand.GetOffsetImmediate();
2102    VIXL_ASSERT(((offset > 0) && operand.GetSign().IsPlus()) ||
2103                ((offset < 0) && operand.GetSign().IsMinus()) || (offset == 0));
2104    if (rn.IsPC()) {
2105      VIXL_ABORT_WITH_MSG(
2106          "The MacroAssembler does not convert vldr or vstr with a PC base "
2107          "register.\n");
2108    }
2109    switch (addrmode) {
2110      case PreIndex:
2111        // Pre-Indexed case:
2112        // vldr.32 s0, [r1, 12345]! will translate into
2113        //   add r1, 12345
2114        //   vldr.32 s0, [r1]
2115        if (offset != 0) {
2116          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2117          add(cond, rn, rn, offset);
2118        }
2119        {
2120          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2121          (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
2122        }
2123        return;
2124      case Offset: {
2125        UseScratchRegisterScope temps(this);
2126        Register scratch = temps.Acquire();
2127        // Offset case:
2128        // vldr.32 s0, [r1, 12345] will translate into
2129        //   add ip, r1, 12345
2130        //   vldr.32 s0, [ip]
2131        {
2132          VIXL_ASSERT(offset != 0);
2133          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2134          add(cond, scratch, rn, offset);
2135        }
2136        {
2137          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2138          (this->*instruction)(cond, dt, rd, MemOperand(scratch, Offset));
2139        }
2140        return;
2141      }
2142      case PostIndex:
2143        // Post-indexed case:
2144        // vldr.32 s0, [r1], imm32 will translate into
2145        //   vldr.32 s0, [r1]
2146        //   movw ip. imm32 & 0xffffffff
2147        //   movt ip, imm32 >> 16
2148        //   add r1, ip
2149        {
2150          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2151          (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
2152        }
2153        if (offset != 0) {
2154          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2155          add(cond, rn, rn, offset);
2156        }
2157        return;
2158    }
2159  }
2160  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
2161}
2162
2163
2164void MacroAssembler::Delegate(InstructionType type,
2165                              InstructionCondDtDMop instruction,
2166                              Condition cond,
2167                              DataType dt,
2168                              DRegister rd,
2169                              const MemOperand& operand) {
2170  CONTEXT_SCOPE;
2171  if (operand.IsImmediate()) {
2172    const Register& rn = operand.GetBaseRegister();
2173    AddrMode addrmode = operand.GetAddrMode();
2174    int32_t offset = operand.GetOffsetImmediate();
2175    VIXL_ASSERT(((offset > 0) && operand.GetSign().IsPlus()) ||
2176                ((offset < 0) && operand.GetSign().IsMinus()) || (offset == 0));
2177    if (rn.IsPC()) {
2178      VIXL_ABORT_WITH_MSG(
2179          "The MacroAssembler does not convert vldr or vstr with a PC base "
2180          "register.\n");
2181    }
2182    switch (addrmode) {
2183      case PreIndex:
2184        // Pre-Indexed case:
2185        // vldr.64 d0, [r1, 12345]! will translate into
2186        //   add r1, 12345
2187        //   vldr.64 d0, [r1]
2188        if (offset != 0) {
2189          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2190          add(cond, rn, rn, offset);
2191        }
2192        {
2193          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2194          (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
2195        }
2196        return;
2197      case Offset: {
2198        UseScratchRegisterScope temps(this);
2199        Register scratch = temps.Acquire();
2200        // Offset case:
2201        // vldr.64 d0, [r1, 12345] will translate into
2202        //   add ip, r1, 12345
2203        //   vldr.32 s0, [ip]
2204        {
2205          VIXL_ASSERT(offset != 0);
2206          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2207          add(cond, scratch, rn, offset);
2208        }
2209        {
2210          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2211          (this->*instruction)(cond, dt, rd, MemOperand(scratch, Offset));
2212        }
2213        return;
2214      }
2215      case PostIndex:
2216        // Post-indexed case:
2217        // vldr.64 d0. [r1], imm32 will translate into
2218        //   vldr.64 d0, [r1]
2219        //   movw ip. imm32 & 0xffffffff
2220        //   movt ip, imm32 >> 16
2221        //   add r1, ip
2222        {
2223          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2224          (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
2225        }
2226        if (offset != 0) {
2227          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2228          add(cond, rn, rn, offset);
2229        }
2230        return;
2231    }
2232  }
2233  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
2234}
2235
2236
2237void MacroAssembler::Delegate(InstructionType type,
2238                              InstructionCondMsrOp instruction,
2239                              Condition cond,
2240                              MaskedSpecialRegister spec_reg,
2241                              const Operand& operand) {
2242  USE(type);
2243  VIXL_ASSERT(type == kMsr);
2244  if (operand.IsImmediate()) {
2245    UseScratchRegisterScope temps(this);
2246    Register scratch = temps.Acquire();
2247    {
2248      CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
2249      mov(cond, scratch, operand);
2250    }
2251    CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2252    msr(cond, spec_reg, scratch);
2253    return;
2254  }
2255  Assembler::Delegate(type, instruction, cond, spec_reg, operand);
2256}
2257
2258
2259void MacroAssembler::Delegate(InstructionType type,
2260                              InstructionCondDtDL instruction,
2261                              Condition cond,
2262                              DataType dt,
2263                              DRegister rd,
2264                              Location* location) {
2265  VIXL_ASSERT(type == kVldr);
2266
2267  CONTEXT_SCOPE;
2268
2269  if (location->IsBound()) {
2270    CodeBufferCheckScope scope(this, 5 * kMaxInstructionSizeInBytes);
2271    UseScratchRegisterScope temps(this);
2272    Register scratch = temps.Acquire();
2273    uint32_t mask = GetOffsetMask(type, Offset);
2274    vldr(dt, rd, MemOperandComputationHelper(cond, scratch, location, mask));
2275    return;
2276  }
2277
2278  Assembler::Delegate(type, instruction, cond, dt, rd, location);
2279}
2280
2281
2282void MacroAssembler::Delegate(InstructionType type,
2283                              InstructionCondDtSL instruction,
2284                              Condition cond,
2285                              DataType dt,
2286                              SRegister rd,
2287                              Location* location) {
2288  VIXL_ASSERT(type == kVldr);
2289
2290  CONTEXT_SCOPE;
2291
2292  if (location->IsBound()) {
2293    CodeBufferCheckScope scope(this, 5 * kMaxInstructionSizeInBytes);
2294    UseScratchRegisterScope temps(this);
2295    Register scratch = temps.Acquire();
2296    uint32_t mask = GetOffsetMask(type, Offset);
2297    vldr(dt, rd, MemOperandComputationHelper(cond, scratch, location, mask));
2298    return;
2299  }
2300
2301  Assembler::Delegate(type, instruction, cond, dt, rd, location);
2302}
2303
2304
2305#undef CONTEXT_SCOPE
2306#undef TOSTRING
2307#undef STRINGIFY
2308
2309// Start of generated code.
2310// End of generated code.
2311}  // namespace aarch32
2312}  // namespace vixl
2313