macro-assembler-aarch32.cc revision e42218c6ca969b7d4032da978fb05f06641df100
1// Copyright 2015, VIXL authors
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are met:
6//
7//   * Redistributions of source code must retain the above copyright notice,
8//     this list of conditions and the following disclaimer.
9//   * Redistributions in binary form must reproduce the above copyright
10//     notice, this list of conditions and the following disclaimer in the
11//     documentation and/or other materials provided with the distribution.
12//   * Neither the name of ARM Limited nor the names of its contributors may
13//     be used to endorse or promote products derived from this software
14//     without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26// POSSIBILITY OF SUCH DAMAGE.
27
28#include "aarch32/macro-assembler-aarch32.h"
29
30namespace vixl {
31namespace aarch32 {
32
33void UseScratchRegisterScope::Open(MacroAssembler* masm) {
34  VIXL_ASSERT((available_ == NULL) && (available_vfp_ == NULL));
35  available_ = masm->GetScratchRegisterList();
36  old_available_ = available_->GetList();
37  available_vfp_ = masm->GetScratchVRegisterList();
38  old_available_vfp_ = available_vfp_->GetList();
39}
40
41
42void UseScratchRegisterScope::Close() {
43  if (available_ != NULL) {
44    available_->SetList(old_available_);
45    available_ = NULL;
46  }
47  if (available_vfp_ != NULL) {
48    available_vfp_->SetList(old_available_vfp_);
49    available_vfp_ = NULL;
50  }
51}
52
53
54bool UseScratchRegisterScope::IsAvailable(const Register& reg) const {
55  VIXL_ASSERT(available_ != NULL);
56  VIXL_ASSERT(reg.IsValid());
57  return available_->Includes(reg);
58}
59
60
61bool UseScratchRegisterScope::IsAvailable(const VRegister& reg) const {
62  VIXL_ASSERT(available_vfp_ != NULL);
63  VIXL_ASSERT(reg.IsValid());
64  return available_vfp_->Includes(reg);
65}
66
67
68Register UseScratchRegisterScope::Acquire() {
69  VIXL_ASSERT(available_ != NULL);
70  VIXL_CHECK(!available_->IsEmpty());
71  Register reg = available_->GetFirstAvailableRegister();
72  available_->Remove(reg);
73  return reg;
74}
75
76
77VRegister UseScratchRegisterScope::AcquireV(unsigned size_in_bits) {
78  switch (size_in_bits) {
79    case kSRegSizeInBits:
80      return AcquireS();
81    case kDRegSizeInBits:
82      return AcquireD();
83    case kQRegSizeInBits:
84      return AcquireQ();
85    default:
86      VIXL_UNREACHABLE();
87      return NoVReg;
88  }
89}
90
91
92QRegister UseScratchRegisterScope::AcquireQ() {
93  VIXL_ASSERT(available_vfp_ != NULL);
94  VIXL_CHECK(!available_vfp_->IsEmpty());
95  QRegister reg = available_vfp_->GetFirstAvailableQRegister();
96  available_vfp_->Remove(reg);
97  return reg;
98}
99
100
101DRegister UseScratchRegisterScope::AcquireD() {
102  VIXL_ASSERT(available_vfp_ != NULL);
103  VIXL_CHECK(!available_vfp_->IsEmpty());
104  DRegister reg = available_vfp_->GetFirstAvailableDRegister();
105  available_vfp_->Remove(reg);
106  return reg;
107}
108
109
110SRegister UseScratchRegisterScope::AcquireS() {
111  VIXL_ASSERT(available_vfp_ != NULL);
112  VIXL_CHECK(!available_vfp_->IsEmpty());
113  SRegister reg = available_vfp_->GetFirstAvailableSRegister();
114  available_vfp_->Remove(reg);
115  return reg;
116}
117
118
119void UseScratchRegisterScope::Release(const Register& reg) {
120  VIXL_ASSERT(available_ != NULL);
121  VIXL_ASSERT(reg.IsValid());
122  VIXL_ASSERT(!available_->Includes(reg));
123  available_->Combine(reg);
124}
125
126
127void UseScratchRegisterScope::Release(const VRegister& reg) {
128  VIXL_ASSERT(available_vfp_ != NULL);
129  VIXL_ASSERT(reg.IsValid());
130  VIXL_ASSERT(!available_vfp_->Includes(reg));
131  available_vfp_->Combine(reg);
132}
133
134
135void UseScratchRegisterScope::Include(const RegisterList& list) {
136  VIXL_ASSERT(available_ != NULL);
137  RegisterList excluded_registers(sp, lr, pc);
138  uint32_t mask = list.GetList() & ~excluded_registers.GetList();
139  available_->SetList(available_->GetList() | mask);
140}
141
142
143void UseScratchRegisterScope::Include(const VRegisterList& list) {
144  VIXL_ASSERT(available_vfp_ != NULL);
145  available_vfp_->SetList(available_vfp_->GetList() | list.GetList());
146}
147
148
149void UseScratchRegisterScope::Exclude(const RegisterList& list) {
150  VIXL_ASSERT(available_ != NULL);
151  available_->SetList(available_->GetList() & ~list.GetList());
152}
153
154
155void UseScratchRegisterScope::Exclude(const VRegisterList& list) {
156  VIXL_ASSERT(available_vfp_ != NULL);
157  available_vfp_->SetList(available_->GetList() & ~list.GetList());
158}
159
160
161void UseScratchRegisterScope::ExcludeAll() {
162  if (available_ != NULL) {
163    available_->SetList(0);
164  }
165  if (available_vfp_ != NULL) {
166    available_vfp_->SetList(0);
167  }
168}
169
170
171void MacroAssembler::VeneerPoolManager::RemoveLabel(Label* label) {
172  label->ResetInVeneerPool();
173  if (label->GetCheckpoint() == checkpoint_) {
174    // We have to compute checkpoint again.
175    checkpoint_ = Label::kMaxOffset;
176    for (std::list<Label*>::iterator it = labels_.begin();
177         it != labels_.end();) {
178      if (*it == label) {
179        it = labels_.erase(it);
180      } else {
181        checkpoint_ = std::min(checkpoint_, (*it)->GetCheckpoint());
182        ++it;
183      }
184    }
185    masm_->ComputeCheckpoint();
186  } else {
187    // We only have to remove the label from the list.
188    for (std::list<Label*>::iterator it = labels_.begin();; ++it) {
189      VIXL_ASSERT(it != labels_.end());
190      if (*it == label) {
191        labels_.erase(it);
192        break;
193      }
194    }
195  }
196}
197
198
199void MacroAssembler::VeneerPoolManager::Emit(Label::Offset target) {
200  checkpoint_ = Label::kMaxOffset;
201  // Sort labels (regarding their checkpoint) to avoid that a veneer
202  // becomes out of range.
203  labels_.sort(Label::CompareLabels);
204  // To avoid too many veneers, generate veneers which will be necessary soon.
205  static const size_t kVeneerEmissionMargin = 1 * KBytes;
206  // To avoid too many veneers, use generated veneers for other not too far
207  // uses.
208  static const size_t kVeneerEmittedMargin = 2 * KBytes;
209  Label::Offset emitted_target = target + kVeneerEmittedMargin;
210  target += kVeneerEmissionMargin;
211  // Reset the checkpoint. It will be computed again in the loop.
212  checkpoint_ = Label::kMaxOffset;
213  for (std::list<Label*>::iterator it = labels_.begin(); it != labels_.end();) {
214    // The labels are sorted. As soon as a veneer is not needed, we can stop.
215    if ((*it)->GetCheckpoint() > target) {
216      checkpoint_ = std::min(checkpoint_, (*it)->GetCheckpoint());
217      break;
218    }
219    // Define the veneer.
220    Label veneer;
221    masm_->Bind(&veneer);
222    Label::Offset label_checkpoint = Label::kMaxOffset;
223    // Check all uses of this label.
224    for (Label::ForwardRefList::iterator ref = (*it)->GetFirstForwardRef();
225         ref != (*it)->GetEndForwardRef();) {
226      if (ref->IsBranch()) {
227        if (ref->GetCheckpoint() <= emitted_target) {
228          // Use the veneer.
229          masm_->EncodeLabelFor(*ref, &veneer);
230          ref = (*it)->Erase(ref);
231        } else {
232          // Don't use the veneer => update checkpoint.
233          label_checkpoint = std::min(label_checkpoint, ref->GetCheckpoint());
234          ++ref;
235        }
236      } else {
237        ++ref;
238      }
239    }
240    // Even if we no longer have use of this label, we can keep it in the list
241    // as the next "B" would add it back.
242    (*it)->SetCheckpoint(label_checkpoint);
243    checkpoint_ = std::min(checkpoint_, label_checkpoint);
244    // Generate the veneer.
245    masm_->B(*it);
246    ++it;
247  }
248#ifdef VIXL_DEBUG
249  for (std::list<Label*>::iterator it = labels_.begin(); it != labels_.end();
250       ++it) {
251    VIXL_ASSERT((*it)->GetCheckpoint() >= checkpoint_);
252  }
253#endif
254  masm_->ComputeCheckpoint();
255}
256
257
258void MacroAssembler::PerformEnsureEmit(Label::Offset target, uint32_t size) {
259  EmitOption option = kBranchRequired;
260  Label after_pools;
261  if (target >= veneer_pool_manager_.GetCheckpoint()) {
262#ifdef VIXL_DEBUG
263    // Here, we can't use an AssemblerAccurateScope as it would call
264    // PerformEnsureEmit in an infinite loop.
265    bool save_assembler_state = AllowAssembler();
266    SetAllowAssembler(true);
267#endif
268    b(&after_pools);
269#ifdef VIXL_DEBUG
270    SetAllowAssembler(false);
271#endif
272    veneer_pool_manager_.Emit(target);
273    option = kNoBranchRequired;
274#ifdef VIXL_DEBUG
275    SetAllowAssembler(save_assembler_state);
276#endif
277  }
278  // Check if the macro-assembler's internal literal pool should be emitted
279  // to avoid any overflow. If we already generated the veneers, we can
280  // emit the pool (the branch is already done).
281  VIXL_ASSERT(GetCursorOffset() <= literal_pool_manager_.GetCheckpoint());
282  if ((target > literal_pool_manager_.GetCheckpoint()) ||
283      (option == kNoBranchRequired)) {
284    // We will generate the literal pool. Generate all the veneers which
285    // would become out of range.
286    size_t literal_pool_size = literal_pool_manager_.GetLiteralPoolSize();
287    VIXL_ASSERT(IsInt32(literal_pool_size));
288    Label::Offset veneers_target =
289        target + static_cast<Label::Offset>(literal_pool_size);
290    VIXL_ASSERT(veneers_target >= 0);
291    if (veneers_target >= veneer_pool_manager_.GetCheckpoint()) {
292      veneer_pool_manager_.Emit(veneers_target);
293    }
294    EmitLiteralPool(option);
295  }
296  BindHelper(&after_pools);
297  if (GetBuffer()->IsManaged()) {
298    bool grow_requested;
299    GetBuffer()->EnsureSpaceFor(size, &grow_requested);
300    if (grow_requested) ComputeCheckpoint();
301  }
302}
303
304
305void MacroAssembler::ComputeCheckpoint() {
306  checkpoint_ = veneer_pool_manager_.GetCheckpoint();
307  if (literal_pool_manager_.GetCheckpoint() != Label::kMaxOffset) {
308    size_t veneer_max_size = veneer_pool_manager_.GetMaxSize();
309    VIXL_ASSERT(IsInt32(veneer_max_size));
310    Label::Offset tmp = literal_pool_manager_.GetCheckpoint() -
311                        static_cast<Label::Offset>(veneer_max_size);
312    VIXL_ASSERT(tmp >= 0);
313    checkpoint_ = std::min(checkpoint_, tmp);
314  }
315  size_t buffer_size = GetBuffer()->GetCapacity();
316  VIXL_ASSERT(IsInt32(buffer_size));
317  Label::Offset buffer_checkpoint = static_cast<Label::Offset>(buffer_size);
318  checkpoint_ = std::min(checkpoint_, buffer_checkpoint);
319}
320
321
322void MacroAssembler::Switch(Register reg, JumpTableBase* table) {
323  // 32-bit table A32:
324  // adr ip, table
325  // add ip, r1, lsl 2
326  // ldr ip, [ip]
327  // jmp: add pc, pc, ip, lsl 2
328  // table:
329  // .int (case_0 - (jmp + 8)) >> 2
330  // .int (case_1 - (jmp + 8)) >> 2
331  // .int (case_2 - (jmp + 8)) >> 2
332
333  // 16-bit table T32:
334  // adr ip, table
335  // jmp: tbh ip, r1
336  // table:
337  // .short (case_0 - (jmp + 4)) >> 1
338  // .short (case_1 - (jmp + 4)) >> 1
339  // .short (case_2 - (jmp + 4)) >> 1
340  // case_0:
341  //   ...
342  //   b end_switch
343  // case_1:
344  //   ...
345  //   b end_switch
346  // ...
347  // end_switch:
348  Label jump_table;
349  UseScratchRegisterScope temps(this);
350  Register scratch = temps.Acquire();
351  int table_size = AlignUp(table->GetTableSizeInBytes(), 4);
352
353  // Jumpt to default if reg is not in [0, table->GetLength()[
354  Cmp(reg, table->GetLength());
355  B(ge, table->GetDefaultLabel());
356
357  Adr(scratch, &jump_table);
358  if (IsUsingA32()) {
359    Add(scratch, scratch, Operand(reg, LSL, table->GetOffsetShift()));
360    switch (table->GetOffsetShift()) {
361      case 0:
362        Ldrb(scratch, MemOperand(scratch));
363        break;
364      case 1:
365        Ldrh(scratch, MemOperand(scratch));
366        break;
367      case 2:
368        Ldr(scratch, MemOperand(scratch));
369        break;
370      default:
371        VIXL_ABORT_WITH_MSG("Unsupported jump table size");
372    }
373    // Emit whatever needs to be emitted if we want to
374    // correctly rescord the position of the branch instruction
375    uint32_t branch_location = GetCursorOffset();
376    table->SetBranchLocation(branch_location + GetArchitectureStatePCOffset());
377    AssemblerAccurateScope scope(this,
378                                 table_size + kA32InstructionSizeInBytes,
379                                 CodeBufferCheckScope::kMaximumSize);
380    add(pc, pc, Operand(scratch, LSL, 2));
381    VIXL_ASSERT((GetCursorOffset() - branch_location) == 4);
382    bind(&jump_table);
383    GenerateSwitchTable(table, table_size);
384  } else {
385    // Thumb mode - We have tbb and tbh to do this for 8 or 16bit offsets.
386    //  But for 32bit offsets, we use the same coding as for A32
387    if (table->GetOffsetShift() == 2) {
388      // 32bit offsets
389      Add(scratch, scratch, Operand(reg, LSL, 2));
390      Ldr(scratch, MemOperand(scratch));
391      // Cannot use add pc, pc, r lsl 1 as this is unpredictable in T32,
392      // so let's do the shift before
393      Lsl(scratch, scratch, 1);
394      // Emit whatever needs to be emitted if we want to
395      // correctly rescord the position of the branch instruction
396      uint32_t branch_location = GetCursorOffset();
397      table->SetBranchLocation(branch_location +
398                               GetArchitectureStatePCOffset());
399      AssemblerAccurateScope scope(this,
400                                   table_size + kMaxInstructionSizeInBytes,
401                                   CodeBufferCheckScope::kMaximumSize);
402      add(pc, pc, scratch);
403      // add pc, pc, rm fits in 16bit T2 (except for rm = sp)
404      VIXL_ASSERT((GetCursorOffset() - branch_location) == 2);
405      bind(&jump_table);
406      GenerateSwitchTable(table, table_size);
407    } else {
408      VIXL_ASSERT((table->GetOffsetShift() == 0) ||
409                  (table->GetOffsetShift() == 1));
410      // Emit whatever needs to be emitted if we want to
411      // correctly rescord the position of the branch instruction
412      uint32_t branch_location = GetCursorOffset();
413      table->SetBranchLocation(branch_location +
414                               GetArchitectureStatePCOffset());
415      AssemblerAccurateScope scope(this,
416                                   table_size + kMaxInstructionSizeInBytes,
417                                   CodeBufferCheckScope::kMaximumSize);
418      if (table->GetOffsetShift() == 0) {
419        // 8bit offsets
420        tbb(scratch, reg);
421      } else {
422        // 16bit offsets
423        tbh(scratch, reg);
424      }
425      // tbb/tbh is a 32bit instruction
426      VIXL_ASSERT((GetCursorOffset() - branch_location) == 4);
427      bind(&jump_table);
428      GenerateSwitchTable(table, table_size);
429    }
430  }
431}
432
433
434void MacroAssembler::GenerateSwitchTable(JumpTableBase* table, int table_size) {
435  table->BindTable(GetCursorOffset());
436  for (int i = 0; i < table_size / 4; i++) {
437    GetBuffer()->Emit32(0);
438  }
439}
440
441
442// switch/case/default : case
443// case_index is assumed to be < table->GetLength()
444// which is checked in JumpTable::Link and Table::SetPresenceBit
445void MacroAssembler::Case(JumpTableBase* table, int case_index) {
446  table->Link(this, case_index, GetCursorOffset());
447  table->SetPresenceBitForCase(case_index);
448}
449
450// switch/case/default : default
451void MacroAssembler::Default(JumpTableBase* table) {
452  Bind(table->GetDefaultLabel());
453}
454
455// switch/case/default : break
456void MacroAssembler::Break(JumpTableBase* table) { B(table->GetEndLabel()); }
457
458// switch/case/default : finalize
459// Manage the default path, mosstly. All empty offsets in the jumptable
460// will point to default.
461// All values not in [0, table->GetLength()[ are already pointing here anyway.
462void MacroAssembler::EndSwitch(JumpTableBase* table) { table->Finalize(this); }
463
464void MacroAssembler::HandleOutOfBoundsImmediate(Condition cond,
465                                                Register tmp,
466                                                uint32_t imm) {
467  if (IsUintN(16, imm)) {
468    EnsureEmitFor(kMaxInstructionSizeInBytes);
469    mov(cond, tmp, imm & 0xffff);
470    return;
471  }
472  if (IsUsingT32()) {
473    if (ImmediateT32::IsImmediateT32(~imm)) {
474      EnsureEmitFor(kMaxInstructionSizeInBytes);
475      mvn(cond, tmp, ~imm);
476      return;
477    }
478  } else {
479    if (ImmediateA32::IsImmediateA32(~imm)) {
480      EnsureEmitFor(kMaxInstructionSizeInBytes);
481      mvn(cond, tmp, ~imm);
482      return;
483    }
484  }
485  EnsureEmitFor(2 * kMaxInstructionSizeInBytes);
486  mov(cond, tmp, imm & 0xffff);
487  movt(cond, tmp, imm >> 16);
488}
489
490
491HARDFLOAT void PrintfTrampolineRRRR(
492    const char* format, uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
493  printf(format, a, b, c, d);
494}
495
496
497HARDFLOAT void PrintfTrampolineRRRD(
498    const char* format, uint32_t a, uint32_t b, uint32_t c, double d) {
499  printf(format, a, b, c, d);
500}
501
502
503HARDFLOAT void PrintfTrampolineRRDR(
504    const char* format, uint32_t a, uint32_t b, double c, uint32_t d) {
505  printf(format, a, b, c, d);
506}
507
508
509HARDFLOAT void PrintfTrampolineRRDD(
510    const char* format, uint32_t a, uint32_t b, double c, double d) {
511  printf(format, a, b, c, d);
512}
513
514
515HARDFLOAT void PrintfTrampolineRDRR(
516    const char* format, uint32_t a, double b, uint32_t c, uint32_t d) {
517  printf(format, a, b, c, d);
518}
519
520
521HARDFLOAT void PrintfTrampolineRDRD(
522    const char* format, uint32_t a, double b, uint32_t c, double d) {
523  printf(format, a, b, c, d);
524}
525
526
527HARDFLOAT void PrintfTrampolineRDDR(
528    const char* format, uint32_t a, double b, double c, uint32_t d) {
529  printf(format, a, b, c, d);
530}
531
532
533HARDFLOAT void PrintfTrampolineRDDD(
534    const char* format, uint32_t a, double b, double c, double d) {
535  printf(format, a, b, c, d);
536}
537
538
539HARDFLOAT void PrintfTrampolineDRRR(
540    const char* format, double a, uint32_t b, uint32_t c, uint32_t d) {
541  printf(format, a, b, c, d);
542}
543
544
545HARDFLOAT void PrintfTrampolineDRRD(
546    const char* format, double a, uint32_t b, uint32_t c, double d) {
547  printf(format, a, b, c, d);
548}
549
550
551HARDFLOAT void PrintfTrampolineDRDR(
552    const char* format, double a, uint32_t b, double c, uint32_t d) {
553  printf(format, a, b, c, d);
554}
555
556
557HARDFLOAT void PrintfTrampolineDRDD(
558    const char* format, double a, uint32_t b, double c, double d) {
559  printf(format, a, b, c, d);
560}
561
562
563HARDFLOAT void PrintfTrampolineDDRR(
564    const char* format, double a, double b, uint32_t c, uint32_t d) {
565  printf(format, a, b, c, d);
566}
567
568
569HARDFLOAT void PrintfTrampolineDDRD(
570    const char* format, double a, double b, uint32_t c, double d) {
571  printf(format, a, b, c, d);
572}
573
574
575HARDFLOAT void PrintfTrampolineDDDR(
576    const char* format, double a, double b, double c, uint32_t d) {
577  printf(format, a, b, c, d);
578}
579
580
581HARDFLOAT void PrintfTrampolineDDDD(
582    const char* format, double a, double b, double c, double d) {
583  printf(format, a, b, c, d);
584}
585
586
587void MacroAssembler::Printf(const char* format,
588                            CPURegister reg1,
589                            CPURegister reg2,
590                            CPURegister reg3,
591                            CPURegister reg4) {
592  if (generate_simulator_code_) {
593    PushRegister(reg4);
594    PushRegister(reg3);
595    PushRegister(reg2);
596    PushRegister(reg1);
597    Push(RegisterList(r0, r1));
598    StringLiteral* format_literal =
599        new StringLiteral(format, RawLiteral::kDeletedOnPlacementByPool);
600    Adr(r0, format_literal);
601    uint32_t args = (reg4.GetType() << 12) | (reg3.GetType() << 8) |
602                    (reg2.GetType() << 4) | reg1.GetType();
603    Mov(r1, args);
604    Hvc(kPrintfCode);
605    Pop(RegisterList(r0, r1));
606    int size = reg4.GetRegSizeInBytes() + reg3.GetRegSizeInBytes() +
607               reg2.GetRegSizeInBytes() + reg1.GetRegSizeInBytes();
608    Drop(size);
609  } else {
610    // Generate on a native platform => 32 bit environment.
611    // Preserve core registers r0-r3, r12, r14
612    const uint32_t saved_registers_mask =
613        kCallerSavedRegistersMask | (1 << r5.GetCode());
614    Push(RegisterList(saved_registers_mask));
615    // Push VFP registers.
616    Vpush(Untyped64, DRegisterList(d0, 8));
617    if (Has32DRegs()) Vpush(Untyped64, DRegisterList(d16, 16));
618    // Search one register which has been saved and which doesn't need to be
619    // printed.
620    RegisterList available_registers(kCallerSavedRegistersMask);
621    if (reg1.GetType() == CPURegister::kRRegister) {
622      available_registers.Remove(Register(reg1.GetCode()));
623    }
624    if (reg2.GetType() == CPURegister::kRRegister) {
625      available_registers.Remove(Register(reg2.GetCode()));
626    }
627    if (reg3.GetType() == CPURegister::kRRegister) {
628      available_registers.Remove(Register(reg3.GetCode()));
629    }
630    if (reg4.GetType() == CPURegister::kRRegister) {
631      available_registers.Remove(Register(reg4.GetCode()));
632    }
633    Register tmp = available_registers.GetFirstAvailableRegister();
634    VIXL_ASSERT(tmp.GetType() == CPURegister::kRRegister);
635    // Push the flags.
636    Mrs(tmp, APSR);
637    Push(tmp);
638    Vmrs(RegisterOrAPSR_nzcv(tmp.GetCode()), FPSCR);
639    Push(tmp);
640    // Push the registers to print on the stack.
641    PushRegister(reg4);
642    PushRegister(reg3);
643    PushRegister(reg2);
644    PushRegister(reg1);
645    int core_count = 1;
646    int vfp_count = 0;
647    uint32_t printf_type = 0;
648    // Pop the registers to print and store them into r1-r3 and/or d0-d3.
649    // Reg4 may stay into the stack if all the register to print are core
650    // registers.
651    PreparePrintfArgument(reg1, &core_count, &vfp_count, &printf_type);
652    PreparePrintfArgument(reg2, &core_count, &vfp_count, &printf_type);
653    PreparePrintfArgument(reg3, &core_count, &vfp_count, &printf_type);
654    PreparePrintfArgument(reg4, &core_count, &vfp_count, &printf_type);
655    // Ensure that the stack is aligned on 8 bytes.
656    And(r5, sp, 0x7);
657    if (core_count == 5) {
658      // One 32 bit argument (reg4) has been left on the stack =>  align the
659      // stack
660      // before the argument.
661      Pop(r0);
662      Sub(sp, sp, r5);
663      Push(r0);
664    } else {
665      Sub(sp, sp, r5);
666    }
667    // Select the right trampoline depending on the arguments.
668    uintptr_t address;
669    switch (printf_type) {
670      case 0:
671        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRRR);
672        break;
673      case 1:
674        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRRR);
675        break;
676      case 2:
677        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDRR);
678        break;
679      case 3:
680        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDRR);
681        break;
682      case 4:
683        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRDR);
684        break;
685      case 5:
686        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRDR);
687        break;
688      case 6:
689        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDDR);
690        break;
691      case 7:
692        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDDR);
693        break;
694      case 8:
695        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRRD);
696        break;
697      case 9:
698        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRRD);
699        break;
700      case 10:
701        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDRD);
702        break;
703      case 11:
704        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDRD);
705        break;
706      case 12:
707        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRDD);
708        break;
709      case 13:
710        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRDD);
711        break;
712      case 14:
713        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDDD);
714        break;
715      case 15:
716        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDDD);
717        break;
718      default:
719        VIXL_UNREACHABLE();
720        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRRR);
721        break;
722    }
723    StringLiteral* format_literal =
724        new StringLiteral(format, RawLiteral::kDeletedOnPlacementByPool);
725    Adr(r0, format_literal);
726    Mov(ip, Operand::From(address));
727    Blx(ip);
728    // If register reg4 was left on the stack => skip it.
729    if (core_count == 5) Drop(kRegSizeInBytes);
730    // Restore the stack as it was before alignment.
731    Add(sp, sp, r5);
732    // Restore the flags.
733    Pop(tmp);
734    Vmsr(FPSCR, tmp);
735    Pop(tmp);
736    Msr(APSR_nzcvqg, tmp);
737    // Restore the regsisters.
738    if (Has32DRegs()) Vpop(Untyped64, DRegisterList(d16, 16));
739    Vpop(Untyped64, DRegisterList(d0, 8));
740    Pop(RegisterList(saved_registers_mask));
741  }
742}
743
744
745void MacroAssembler::PushRegister(CPURegister reg) {
746  switch (reg.GetType()) {
747    case CPURegister::kNoRegister:
748      break;
749    case CPURegister::kRRegister:
750      Push(Register(reg.GetCode()));
751      break;
752    case CPURegister::kSRegister:
753      Vpush(Untyped32, SRegisterList(SRegister(reg.GetCode())));
754      break;
755    case CPURegister::kDRegister:
756      Vpush(Untyped64, DRegisterList(DRegister(reg.GetCode())));
757      break;
758    case CPURegister::kQRegister:
759      VIXL_UNIMPLEMENTED();
760      break;
761  }
762}
763
764
765void MacroAssembler::PreparePrintfArgument(CPURegister reg,
766                                           int* core_count,
767                                           int* vfp_count,
768                                           uint32_t* printf_type) {
769  switch (reg.GetType()) {
770    case CPURegister::kNoRegister:
771      break;
772    case CPURegister::kRRegister:
773      VIXL_ASSERT(*core_count <= 4);
774      if (*core_count < 4) Pop(Register(*core_count));
775      *core_count += 1;
776      break;
777    case CPURegister::kSRegister:
778      VIXL_ASSERT(*vfp_count < 4);
779      *printf_type |= 1 << (*core_count + *vfp_count - 1);
780      Vpop(Untyped32, SRegisterList(SRegister(*vfp_count * 2)));
781      Vcvt(F64, F32, DRegister(*vfp_count), SRegister(*vfp_count * 2));
782      *vfp_count += 1;
783      break;
784    case CPURegister::kDRegister:
785      VIXL_ASSERT(*vfp_count < 4);
786      *printf_type |= 1 << (*core_count + *vfp_count - 1);
787      Vpop(Untyped64, DRegisterList(DRegister(*vfp_count)));
788      *vfp_count += 1;
789      break;
790    case CPURegister::kQRegister:
791      VIXL_UNIMPLEMENTED();
792      break;
793  }
794}
795
796
797void MacroAssembler::Delegate(InstructionType type,
798                              InstructionCondROp instruction,
799                              Condition cond,
800                              Register rn,
801                              const Operand& operand) {
802  // add, movt, movw, sub, sxtbl16, teq, uxtb16
803  ContextScope context(this);
804  if (IsUsingT32() && operand.IsRegisterShiftedRegister()) {
805    InstructionCondRROp shiftop = NULL;
806    switch (operand.GetShift().GetType()) {
807      case LSL:
808        shiftop = &Assembler::lsl;
809        break;
810      case LSR:
811        shiftop = &Assembler::lsr;
812        break;
813      case ASR:
814        shiftop = &Assembler::asr;
815        break;
816      case RRX:
817        break;
818      case ROR:
819        shiftop = &Assembler::ror;
820        break;
821      default:
822        VIXL_UNREACHABLE();
823    }
824    if (shiftop != NULL) {
825      UseScratchRegisterScope temps(this);
826      Register scratch = temps.Acquire();
827      EnsureEmitFor(2 * kMaxInstructionSizeInBytes);
828      (this->*shiftop)(cond,
829                       scratch,
830                       operand.GetBaseRegister(),
831                       operand.GetShiftRegister());
832      return (this->*instruction)(cond, rn, scratch);
833    }
834  }
835  if (operand.IsImmediate()) {
836    switch (type) {
837      case kTeq: {
838        UseScratchRegisterScope temps(this);
839        Register scratch = temps.Acquire();
840        HandleOutOfBoundsImmediate(cond, scratch, operand.GetImmediate());
841        EnsureEmitFor(kMaxInstructionSizeInBytes);
842        teq(cond, rn, scratch);
843        return;
844      }
845      case kMovt:
846      case kMovw:
847      case kSxtb16:
848      case kUxtb16:
849        break;
850      default:
851        VIXL_UNREACHABLE();
852    }
853  }
854  Assembler::Delegate(type, instruction, cond, rn, operand);
855}
856
857
858void MacroAssembler::Delegate(InstructionType type,
859                              InstructionCondSizeROp instruction,
860                              Condition cond,
861                              EncodingSize size,
862                              Register rn,
863                              const Operand& operand) {
864  // cmn cmp mov movs mvn mvns sxtb sxth tst uxtb uxth
865  ContextScope context(this);
866  VIXL_ASSERT(size.IsBest());
867  if (IsUsingT32() && operand.IsRegisterShiftedRegister()) {
868    InstructionCondRROp shiftop = NULL;
869    switch (operand.GetShift().GetType()) {
870      case LSL:
871        shiftop = &Assembler::lsl;
872        break;
873      case LSR:
874        shiftop = &Assembler::lsr;
875        break;
876      case ASR:
877        shiftop = &Assembler::asr;
878        break;
879      case RRX:
880        break;
881      case ROR:
882        shiftop = &Assembler::ror;
883        break;
884      default:
885        VIXL_UNREACHABLE();
886    }
887    if (shiftop != NULL) {
888      UseScratchRegisterScope temps(this);
889      Register scratch = temps.Acquire();
890      EnsureEmitFor(2 * kMaxInstructionSizeInBytes);
891      (this->*shiftop)(cond,
892                       scratch,
893                       operand.GetBaseRegister(),
894                       operand.GetShiftRegister());
895      return (this->*instruction)(cond, size, rn, scratch);
896    }
897  }
898  if (operand.IsImmediate()) {
899    uint32_t imm = operand.GetImmediate();
900    switch (type) {
901      case kMov:
902      case kMovs:
903        if (!rn.IsPC()) {
904          // Immediate is too large, but not using PC, so handle with mov{t}.
905          HandleOutOfBoundsImmediate(cond, rn, imm);
906          if (type == kMovs) {
907            EnsureEmitFor(kMaxInstructionSizeInBytes);
908            tst(cond, rn, rn);
909          }
910          return;
911        } else {
912          // Immediate is too large and using PC, so handle using a temporary
913          // register.
914          UseScratchRegisterScope temps(this);
915          Register scratch = temps.Acquire();
916          HandleOutOfBoundsImmediate(cond, scratch, imm);
917          EnsureEmitFor(kMaxInstructionSizeInBytes);
918          // TODO: A bit of nonsense here! But should we fix 'mov pc, imm'
919          // anyway?
920          if (type == kMovs) {
921            return movs(cond, pc, scratch);
922          }
923          return mov(cond, pc, scratch);
924        }
925      case kCmn:
926      case kCmp:
927        if (!rn.IsPC()) {
928          UseScratchRegisterScope temps(this);
929          Register scratch = temps.Acquire();
930          HandleOutOfBoundsImmediate(cond, scratch, imm);
931          EnsureEmitFor(kMaxInstructionSizeInBytes);
932          return (this->*instruction)(cond, size, rn, scratch);
933        }
934        break;
935      case kMvn:
936      case kMvns:
937        if (IsUsingA32() || !rn.IsPC()) {
938          UseScratchRegisterScope temps(this);
939          Register scratch = temps.Acquire();
940          HandleOutOfBoundsImmediate(cond, scratch, imm);
941          EnsureEmitFor(kMaxInstructionSizeInBytes);
942          return (this->*instruction)(cond, size, rn, scratch);
943        }
944        break;
945      case kTst: {
946        UseScratchRegisterScope temps(this);
947        Register scratch = temps.Acquire();
948        HandleOutOfBoundsImmediate(cond, scratch, imm);
949        EnsureEmitFor(kMaxInstructionSizeInBytes);
950        return (this->*instruction)(cond, size, rn, scratch);
951      }
952      default:  // kSxtb, Sxth, Uxtb, Uxth
953        break;
954    }
955  }
956  Assembler::Delegate(type, instruction, cond, size, rn, operand);
957}
958
959
960void MacroAssembler::Delegate(InstructionType type,
961                              InstructionCondRROp instruction,
962                              Condition cond,
963                              Register rd,
964                              Register rn,
965                              const Operand& operand) {
966  // addw orn orns pkhbt pkhtb rsc rscs subw sxtab sxtab16 sxtah uxtab uxtab16
967  // uxtah
968  ContextScope context(this);
969  if (IsUsingT32() && operand.IsRegisterShiftedRegister()) {
970    InstructionCondRROp shiftop = NULL;
971    switch (operand.GetShift().GetType()) {
972      case LSL:
973        shiftop = &Assembler::lsl;
974        break;
975      case LSR:
976        shiftop = &Assembler::lsr;
977        break;
978      case ASR:
979        shiftop = &Assembler::asr;
980        break;
981      case RRX:
982        break;
983      case ROR:
984        shiftop = &Assembler::ror;
985        break;
986      default:
987        VIXL_UNREACHABLE();
988    }
989    if (shiftop != NULL) {
990      UseScratchRegisterScope temps(this);
991      Register rm = operand.GetBaseRegister();
992      Register rs = operand.GetShiftRegister();
993      // If different from `rn`, we can make use of either `rd`, `rm` or `rs` as
994      // a scratch register.
995      if (!rd.Is(rn)) temps.Include(rd);
996      if (!rm.Is(rn)) temps.Include(rm);
997      if (!rs.Is(rn)) temps.Include(rs);
998      Register scratch = temps.Acquire();
999      EnsureEmitFor(2 * kMaxInstructionSizeInBytes);
1000      (this->*shiftop)(cond, scratch, rm, rs);
1001      return (this->*instruction)(cond, rd, rn, scratch);
1002    }
1003  }
1004  if (IsUsingT32() && ((type == kRsc) || (type == kRscs))) {
1005    // The RegisterShiftRegister case should have been handled above.
1006    VIXL_ASSERT(!operand.IsRegisterShiftedRegister());
1007    UseScratchRegisterScope temps(this);
1008    Register negated_rn;
1009    if (operand.IsImmediate() || !operand.GetBaseRegister().Is(rn)) {
1010      // In this case, we can just negate `rn` instead of using a temporary
1011      // register.
1012      negated_rn = rn;
1013    } else {
1014      if (!rd.Is(rn)) temps.Include(rd);
1015      negated_rn = temps.Acquire();
1016    }
1017    EnsureEmitFor(2 * kMaxInstructionSizeInBytes);
1018    mvn(cond, negated_rn, rn);
1019    if (type == kRsc) {
1020      return adc(cond, rd, negated_rn, operand);
1021    }
1022    return adcs(cond, rd, negated_rn, operand);
1023  }
1024  if (IsUsingA32() && ((type == kOrn) || (type == kOrns))) {
1025    // TODO: orn r0, r1, imm -> orr r0, r1, neg(imm) if doable
1026    //  mvn r0, r2
1027    //  orr r0, r1, r0
1028    Register scratch;
1029    UseScratchRegisterScope temps(this);
1030    // If different from `rn`, we can make use of source and destination
1031    // registers as a scratch register.
1032    if (!rd.Is(rn)) temps.Include(rd);
1033    if (!operand.IsImmediate() && !operand.GetBaseRegister().Is(rn)) {
1034      temps.Include(operand.GetBaseRegister());
1035    }
1036    if (operand.IsRegisterShiftedRegister() &&
1037        !operand.GetShiftRegister().Is(rn)) {
1038      temps.Include(operand.GetShiftRegister());
1039    }
1040    scratch = temps.Acquire();
1041    EnsureEmitFor(2 * kMaxInstructionSizeInBytes);
1042    mvn(cond, scratch, operand);
1043    if (type == kOrns) {
1044      return orrs(cond, rd, rn, scratch);
1045    }
1046    return orr(cond, rd, rn, scratch);
1047  }
1048  if (operand.IsImmediate()) {
1049    int32_t imm = operand.GetImmediate();
1050    if (ImmediateT32::IsImmediateT32(~imm)) {
1051      if (IsUsingT32()) {
1052        switch (type) {
1053          case kOrn:
1054            return orr(cond, rd, rn, ~imm);
1055          case kOrns:
1056            return orrs(cond, rd, rn, ~imm);
1057          default:
1058            break;
1059        }
1060      }
1061    }
1062    if (imm < 0) {
1063      switch (type) {
1064        case kAddw:
1065          EnsureEmitFor(kMaxInstructionSizeInBytes);
1066          return subw(cond, rd, rn, -imm);
1067        case kSubw:
1068          EnsureEmitFor(kMaxInstructionSizeInBytes);
1069          return addw(cond, rd, rn, -imm);
1070        default:
1071          break;
1072      }
1073    }
1074    UseScratchRegisterScope temps(this);
1075    // Allow using the destination as a scratch register if possible.
1076    if (!rd.Is(rn)) temps.Include(rd);
1077    Register scratch = temps.Acquire();
1078    switch (type) {
1079      case kAddw:
1080        EnsureEmitFor(2 * kMaxInstructionSizeInBytes);
1081        mov(cond, scratch, imm);
1082        return add(cond, rd, rn, scratch);
1083      case kSubw:
1084        EnsureEmitFor(2 * kMaxInstructionSizeInBytes);
1085        mov(cond, scratch, imm);
1086        return sub(cond, rd, rn, scratch);
1087      default:
1088        break;
1089    }
1090    EnsureEmitFor(2 * kMaxInstructionSizeInBytes);
1091    mov(cond, scratch, imm);
1092    return (this->*instruction)(cond, rd, rn, scratch);
1093  }
1094  Assembler::Delegate(type, instruction, cond, rd, rn, operand);
1095}
1096
1097
1098void MacroAssembler::Delegate(InstructionType type,
1099                              InstructionCondSizeRROp instruction,
1100                              Condition cond,
1101                              EncodingSize size,
1102                              Register rd,
1103                              Register rn,
1104                              const Operand& operand) {
1105  // adc adcs add adds and_ ands asr asrs bic bics eor eors lsl lsls lsr lsrs
1106  // orr orrs ror rors rsb rsbs sbc sbcs sub subs
1107  ContextScope context(this);
1108  VIXL_ASSERT(size.IsBest());
1109  if (IsUsingT32() && operand.IsRegisterShiftedRegister()) {
1110    InstructionCondRROp shiftop = NULL;
1111    switch (operand.GetShift().GetType()) {
1112      case LSL:
1113        shiftop = &Assembler::lsl;
1114        break;
1115      case LSR:
1116        shiftop = &Assembler::lsr;
1117        break;
1118      case ASR:
1119        shiftop = &Assembler::asr;
1120        break;
1121      case RRX:
1122        break;
1123      case ROR:
1124        shiftop = &Assembler::ror;
1125        break;
1126      default:
1127        VIXL_UNREACHABLE();
1128    }
1129    if (shiftop != NULL) {
1130      UseScratchRegisterScope temps(this);
1131      Register rm = operand.GetBaseRegister();
1132      Register rs = operand.GetShiftRegister();
1133      // If different from `rn`, we can make use of either `rd`, `rm` or `rs` as
1134      // a scratch register.
1135      if (!rd.Is(rn)) temps.Include(rd);
1136      if (!rm.Is(rn)) temps.Include(rm);
1137      if (!rs.Is(rn)) temps.Include(rs);
1138      Register scratch = temps.Acquire();
1139      EnsureEmitFor(2 * kMaxInstructionSizeInBytes);
1140      (this->*shiftop)(cond, scratch, rm, rs);
1141      return (this->*instruction)(cond, size, rd, rn, scratch);
1142    }
1143  }
1144  if (operand.IsImmediate()) {
1145    int32_t imm = operand.GetImmediate();
1146    if (ImmediateT32::IsImmediateT32(~imm)) {
1147      if (IsUsingT32()) {
1148        switch (type) {
1149          case kOrr:
1150            return orn(cond, rd, rn, ~imm);
1151          case kOrrs:
1152            return orns(cond, rd, rn, ~imm);
1153          default:
1154            break;
1155        }
1156      }
1157    }
1158    if (imm < 0) {
1159      InstructionCondSizeRROp asmcb = NULL;
1160      switch (type) {
1161        case kAdd:
1162          asmcb = &Assembler::sub;
1163          imm = -imm;
1164          break;
1165        case kAdc:
1166          asmcb = &Assembler::sbc;
1167          imm = ~imm;
1168          break;
1169        case kAdds:
1170          asmcb = &Assembler::subs;
1171          imm = -imm;
1172          break;
1173        case kSub:
1174          asmcb = &Assembler::add;
1175          imm = -imm;
1176          break;
1177        case kSbc:
1178          asmcb = &Assembler::adc;
1179          imm = ~imm;
1180          break;
1181        case kSubs:
1182          asmcb = &Assembler::adds;
1183          imm = -imm;
1184          break;
1185        default:
1186          break;
1187      }
1188      if (asmcb != NULL) {
1189        EnsureEmitFor(kMaxInstructionSizeInBytes);
1190        return (this->*asmcb)(cond, size, rd, rn, Operand(imm));
1191      }
1192    }
1193    UseScratchRegisterScope temps(this);
1194    // Allow using the destination as a scratch register if possible.
1195    if (!rd.Is(rn)) temps.Include(rd);
1196    Register scratch = temps.Acquire();
1197    EnsureEmitFor(2 * kMaxInstructionSizeInBytes);
1198    mov(cond, scratch, operand.GetImmediate());
1199    return (this->*instruction)(cond, size, rd, rn, scratch);
1200  }
1201  Assembler::Delegate(type, instruction, cond, size, rd, rn, operand);
1202}
1203
1204
1205void MacroAssembler::Delegate(InstructionType type,
1206                              InstructionRL instruction,
1207                              Register rn,
1208                              Label* label) {
1209  // cbz cbnz
1210  ContextScope context(this);
1211  if (IsUsingT32() && rn.IsLow()) {
1212    switch (type) {
1213      case kCbnz: {
1214        Label done;
1215        EnsureEmitFor(2 * kMaxInstructionSizeInBytes);
1216        cbz(rn, &done);
1217        b(label);
1218        Bind(&done);
1219        return;
1220      }
1221      case kCbz: {
1222        Label done;
1223        EnsureEmitFor(2 * kMaxInstructionSizeInBytes);
1224        cbnz(rn, &done);
1225        b(label);
1226        Bind(&done);
1227        return;
1228      }
1229      default:
1230        break;
1231    }
1232  } else {
1233    switch (type) {
1234      case kCbnz:
1235        // cmp rn, #0
1236        // b.ne label
1237        EnsureEmitFor(2 * kMaxInstructionSizeInBytes);
1238        cmp(rn, 0);
1239        b(ne, label);
1240        return;
1241      case kCbz:
1242        // cmp rn, #0
1243        // b.eq label
1244        EnsureEmitFor(2 * kMaxInstructionSizeInBytes);
1245        cmp(rn, 0);
1246        b(eq, label);
1247        return;
1248      default:
1249        break;
1250    }
1251  }
1252  Assembler::Delegate(type, instruction, rn, label);
1253}
1254
1255
1256template <typename T>
1257static inline bool IsI64BitPattern(T imm) {
1258  for (T mask = 0xff << ((sizeof(T) - 1) * 8); mask != 0; mask >>= 8) {
1259    if (((imm & mask) != mask) && ((imm & mask) != 0)) return false;
1260  }
1261  return true;
1262}
1263
1264
1265template <typename T>
1266static inline bool IsI8BitPattern(T imm) {
1267  uint8_t imm8 = imm & 0xff;
1268  for (unsigned rep = sizeof(T) - 1; rep > 0; rep--) {
1269    imm >>= 8;
1270    if ((imm & 0xff) != imm8) return false;
1271  }
1272  return true;
1273}
1274
1275
1276static inline bool CanBeInverted(uint32_t imm32) {
1277  uint32_t fill8 = 0;
1278
1279  if ((imm32 & 0xffffff00) == 0xffffff00) {
1280    //    11111111 11111111 11111111 abcdefgh
1281    return true;
1282  }
1283  if (((imm32 & 0xff) == 0) || ((imm32 & 0xff) == 0xff)) {
1284    fill8 = imm32 & 0xff;
1285    imm32 >>= 8;
1286    if ((imm32 >> 8) == 0xffff) {
1287      //    11111111 11111111 abcdefgh 00000000
1288      // or 11111111 11111111 abcdefgh 11111111
1289      return true;
1290    }
1291    if ((imm32 & 0xff) == fill8) {
1292      imm32 >>= 8;
1293      if ((imm32 >> 8) == 0xff) {
1294        //    11111111 abcdefgh 00000000 00000000
1295        // or 11111111 abcdefgh 11111111 11111111
1296        return true;
1297      }
1298      if ((fill8 == 0xff) && ((imm32 & 0xff) == 0xff)) {
1299        //    abcdefgh 11111111 11111111 11111111
1300        return true;
1301      }
1302    }
1303  }
1304  return false;
1305}
1306
1307
1308template <typename RES, typename T>
1309static inline RES replicate(T imm) {
1310  VIXL_ASSERT((sizeof(RES) > sizeof(T)) &&
1311              (((sizeof(RES) / sizeof(T)) * sizeof(T)) == sizeof(RES)));
1312  RES res = imm;
1313  for (unsigned i = sizeof(RES) / sizeof(T) - 1; i > 0; i--) {
1314    res = (res << (sizeof(T) * 8)) | imm;
1315  }
1316  return res;
1317}
1318
1319
1320void MacroAssembler::Delegate(InstructionType type,
1321                              InstructionCondDtSSop instruction,
1322                              Condition cond,
1323                              DataType dt,
1324                              SRegister rd,
1325                              const SOperand& operand) {
1326  ContextScope context(this);
1327  if (type == kVmov) {
1328    if (operand.IsImmediate() && dt.Is(F32)) {
1329      const NeonImmediate& neon_imm = operand.GetNeonImmediate();
1330      if (neon_imm.CanConvert<float>()) {
1331        // movw ip, imm16
1332        // movk ip, imm16
1333        // vmov s0, ip
1334        UseScratchRegisterScope temps(this);
1335        Register scratch = temps.Acquire();
1336        float f = neon_imm.GetImmediate<float>();
1337        EnsureEmitFor(2 * kMaxInstructionSizeInBytes);
1338        mov(cond, scratch, FloatToRawbits(f));
1339        return vmov(cond, rd, scratch);
1340      }
1341    }
1342  }
1343  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
1344}
1345
1346
1347void MacroAssembler::Delegate(InstructionType type,
1348                              InstructionCondDtDDop instruction,
1349                              Condition cond,
1350                              DataType dt,
1351                              DRegister rd,
1352                              const DOperand& operand) {
1353  ContextScope context(this);
1354  if (type == kVmov) {
1355    if (operand.IsImmediate()) {
1356      const NeonImmediate& neon_imm = operand.GetNeonImmediate();
1357      switch (dt.GetValue()) {
1358        case I32:
1359          if (neon_imm.CanConvert<uint32_t>()) {
1360            uint32_t imm = neon_imm.GetImmediate<uint32_t>();
1361            // vmov.i32 d0, 0xabababab will translate into vmov.i8 d0, 0xab
1362            if (IsI8BitPattern(imm)) {
1363              EnsureEmitFor(kMaxInstructionSizeInBytes);
1364              return vmov(cond, I8, rd, imm & 0xff);
1365            }
1366            // vmov.i32 d0, 0xff0000ff will translate into
1367            // vmov.i64 d0, 0xff0000ffff0000ff
1368            if (IsI64BitPattern(imm)) {
1369              EnsureEmitFor(kMaxInstructionSizeInBytes);
1370              return vmov(cond, I64, rd, replicate<uint64_t>(imm));
1371            }
1372            // vmov.i32 d0, 0xffab0000 will translate into
1373            // vmvn.i32 d0, 0x0054ffff
1374            if (cond.Is(al) && CanBeInverted(imm)) {
1375              EnsureEmitFor(kMaxInstructionSizeInBytes);
1376              return vmvn(I32, rd, ~imm);
1377            }
1378          }
1379          break;
1380        case I16:
1381          if (neon_imm.CanConvert<uint16_t>()) {
1382            uint16_t imm = neon_imm.GetImmediate<uint16_t>();
1383            // vmov.i16 d0, 0xabab will translate into vmov.i8 d0, 0xab
1384            if (IsI8BitPattern(imm)) {
1385              EnsureEmitFor(kMaxInstructionSizeInBytes);
1386              return vmov(cond, I8, rd, imm & 0xff);
1387            }
1388          }
1389          break;
1390        case I64:
1391          if (neon_imm.CanConvert<uint64_t>()) {
1392            uint64_t imm = neon_imm.GetImmediate<uint64_t>();
1393            // vmov.i64 d0, -1 will translate into vmov.i8 d0, 0xff
1394            if (IsI8BitPattern(imm)) {
1395              EnsureEmitFor(kMaxInstructionSizeInBytes);
1396              return vmov(cond, I8, rd, imm & 0xff);
1397            }
1398            // mov ip, lo(imm64)
1399            // vdup d0, ip
1400            // vdup is prefered to 'vmov d0[0]' as d0[1] does not need to be
1401            // preserved
1402            {
1403              UseScratchRegisterScope temps(this);
1404              Register scratch = temps.Acquire();
1405              EnsureEmitFor(kMaxInstructionSizeInBytes);
1406              mov(cond, scratch, static_cast<uint32_t>(imm & 0xffffffff));
1407              EnsureEmitFor(kMaxInstructionSizeInBytes);
1408              vdup(cond, Untyped32, rd, scratch);
1409            }
1410            // mov ip, hi(imm64)
1411            // vmov d0[1], ip
1412            {
1413              UseScratchRegisterScope temps(this);
1414              Register scratch = temps.Acquire();
1415              EnsureEmitFor(kMaxInstructionSizeInBytes);
1416              mov(cond, scratch, static_cast<uint32_t>(imm >> 32));
1417              EnsureEmitFor(kMaxInstructionSizeInBytes);
1418              vmov(cond, Untyped32, DRegisterLane(rd, 1), scratch);
1419            }
1420            return;
1421          }
1422          break;
1423        default:
1424          break;
1425      }
1426      if ((dt.Is(I8) || dt.Is(I16) || dt.Is(I32)) &&
1427          neon_imm.CanConvert<uint32_t>()) {
1428        // mov ip, imm32
1429        // vdup.8 d0, ip
1430        UseScratchRegisterScope temps(this);
1431        Register scratch = temps.Acquire();
1432        EnsureEmitFor(kMaxInstructionSizeInBytes);
1433        mov(cond, scratch, neon_imm.GetImmediate<uint32_t>());
1434        DataTypeValue vdup_dt = Untyped32;
1435        switch (dt.GetValue()) {
1436          case I8:
1437            vdup_dt = Untyped8;
1438            break;
1439          case I16:
1440            vdup_dt = Untyped16;
1441            break;
1442          case I32:
1443            vdup_dt = Untyped32;
1444            break;
1445          default:
1446            VIXL_UNREACHABLE();
1447        }
1448        EnsureEmitFor(kMaxInstructionSizeInBytes);
1449        return vdup(cond, vdup_dt, rd, scratch);
1450      }
1451      if (dt.Is(F32) && neon_imm.CanConvert<float>()) {
1452        float f = neon_imm.GetImmediate<float>();
1453        // Punt to vmov.i32
1454        EnsureEmitFor(kMaxInstructionSizeInBytes);
1455        return vmov(cond, I32, rd, FloatToRawbits(f));
1456      }
1457      if (dt.Is(F64) && neon_imm.CanConvert<double>()) {
1458        // Punt to vmov.i64
1459        double d = neon_imm.GetImmediate<double>();
1460        EnsureEmitFor(kMaxInstructionSizeInBytes);
1461        return vmov(cond, I64, rd, DoubleToRawbits(d));
1462      }
1463    }
1464  }
1465  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
1466}
1467
1468
1469void MacroAssembler::Delegate(InstructionType type,
1470                              InstructionCondDtQQop instruction,
1471                              Condition cond,
1472                              DataType dt,
1473                              QRegister rd,
1474                              const QOperand& operand) {
1475  ContextScope context(this);
1476  if (type == kVmov) {
1477    if (operand.IsImmediate()) {
1478      const NeonImmediate& neon_imm = operand.GetNeonImmediate();
1479      switch (dt.GetValue()) {
1480        case I32:
1481          if (neon_imm.CanConvert<uint32_t>()) {
1482            uint32_t imm = neon_imm.GetImmediate<uint32_t>();
1483            // vmov.i32 d0, 0xabababab will translate into vmov.i8 d0, 0xab
1484            if (IsI8BitPattern(imm)) {
1485              EnsureEmitFor(kMaxInstructionSizeInBytes);
1486              return vmov(cond, I8, rd, imm & 0xff);
1487            }
1488            // vmov.i32 d0, 0xff0000ff will translate into
1489            // vmov.i64 d0, 0xff0000ffff0000ff
1490            if (IsI64BitPattern(imm)) {
1491              EnsureEmitFor(kMaxInstructionSizeInBytes);
1492              return vmov(cond, I64, rd, replicate<uint64_t>(imm));
1493            }
1494            // vmov.i32 d0, 0xffab0000 will translate into
1495            // vmvn.i32 d0, 0x0054ffff
1496            if (CanBeInverted(imm)) {
1497              EnsureEmitFor(kMaxInstructionSizeInBytes);
1498              return vmvn(cond, I32, rd, ~imm);
1499            }
1500          }
1501          break;
1502        case I16:
1503          if (neon_imm.CanConvert<uint16_t>()) {
1504            uint16_t imm = neon_imm.GetImmediate<uint16_t>();
1505            // vmov.i16 d0, 0xabab will translate into vmov.i8 d0, 0xab
1506            if (IsI8BitPattern(imm)) {
1507              EnsureEmitFor(kMaxInstructionSizeInBytes);
1508              return vmov(cond, I8, rd, imm & 0xff);
1509            }
1510          }
1511          break;
1512        case I64:
1513          if (neon_imm.CanConvert<uint64_t>()) {
1514            uint64_t imm = neon_imm.GetImmediate<uint64_t>();
1515            // vmov.i64 d0, -1 will translate into vmov.i8 d0, 0xff
1516            if (IsI8BitPattern(imm)) {
1517              EnsureEmitFor(kMaxInstructionSizeInBytes);
1518              return vmov(cond, I8, rd, imm & 0xff);
1519            }
1520            // mov ip, lo(imm64)
1521            // vdup q0, ip
1522            // vdup is prefered to 'vmov d0[0]' as d0[1-3] don't need to be
1523            // preserved
1524            {
1525              UseScratchRegisterScope temps(this);
1526              Register scratch = temps.Acquire();
1527              EnsureEmitFor(kMaxInstructionSizeInBytes);
1528              mov(cond, scratch, static_cast<uint32_t>(imm & 0xffffffff));
1529              EnsureEmitFor(kMaxInstructionSizeInBytes);
1530              vdup(cond, Untyped32, rd, scratch);
1531            }
1532            // mov ip, hi(imm64)
1533            // vmov.i32 d0[1], ip
1534            // vmov d1, d0
1535            {
1536              UseScratchRegisterScope temps(this);
1537              Register scratch = temps.Acquire();
1538              EnsureEmitFor(kMaxInstructionSizeInBytes);
1539              mov(cond, scratch, static_cast<uint32_t>(imm >> 32));
1540              EnsureEmitFor(kMaxInstructionSizeInBytes);
1541              vmov(cond,
1542                   Untyped32,
1543                   DRegisterLane(rd.GetLowDRegister(), 1),
1544                   scratch);
1545              EnsureEmitFor(kMaxInstructionSizeInBytes);
1546              vmov(cond, F64, rd.GetHighDRegister(), rd.GetLowDRegister());
1547            }
1548            return;
1549          }
1550          break;
1551        default:
1552          break;
1553      }
1554      if ((dt.Is(I8) || dt.Is(I16) || dt.Is(I32)) &&
1555          neon_imm.CanConvert<uint32_t>()) {
1556        // mov ip, imm32
1557        // vdup.8 d0, ip
1558        UseScratchRegisterScope temps(this);
1559        Register scratch = temps.Acquire();
1560        EnsureEmitFor(kMaxInstructionSizeInBytes);
1561        mov(cond, scratch, neon_imm.GetImmediate<uint32_t>());
1562        DataTypeValue vdup_dt = Untyped32;
1563        switch (dt.GetValue()) {
1564          case I8:
1565            vdup_dt = Untyped8;
1566            break;
1567          case I16:
1568            vdup_dt = Untyped16;
1569            break;
1570          case I32:
1571            vdup_dt = Untyped32;
1572            break;
1573          default:
1574            VIXL_UNREACHABLE();
1575        }
1576        EnsureEmitFor(kMaxInstructionSizeInBytes);
1577        return vdup(cond, vdup_dt, rd, scratch);
1578      }
1579      if (dt.Is(F32) && neon_imm.CanConvert<float>()) {
1580        // Punt to vmov.i64
1581        float f = neon_imm.GetImmediate<float>();
1582        EnsureEmitFor(kMaxInstructionSizeInBytes);
1583        return vmov(cond, I32, rd, FloatToRawbits(f));
1584      }
1585      if (dt.Is(F64) && neon_imm.CanConvert<double>()) {
1586        // Punt to vmov.i64
1587        double d = neon_imm.GetImmediate<double>();
1588        EnsureEmitFor(kMaxInstructionSizeInBytes);
1589        return vmov(cond, I64, rd, DoubleToRawbits(d));
1590      }
1591    }
1592  }
1593  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
1594}
1595
1596
1597void MacroAssembler::Delegate(InstructionType type,
1598                              InstructionCondMop instruction,
1599                              Condition cond,
1600                              const MemOperand& operand) {
1601  // pld pldw pli
1602  ContextScope context(this);
1603  if (operand.IsImmediate()) {
1604    const Register& rn = operand.GetBaseRegister();
1605    AddrMode addrmode = operand.GetAddrMode();
1606    int32_t offset = operand.GetOffsetImmediate();
1607    switch (addrmode) {
1608      case PreIndex:
1609        // Pre-Indexed case:
1610        // pld [r1, 12345]! will translate into
1611        //   add r1, r1, 12345
1612        //   pld [r1]
1613        EnsureEmitFor(kMaxInstructionSizeInBytes);
1614        if (operand.GetSign().IsPlus()) {
1615          add(cond, rn, rn, offset);
1616        } else {
1617          sub(cond, rn, rn, offset);
1618        }
1619        EnsureEmitFor(kMaxInstructionSizeInBytes);
1620        (this->*instruction)(cond, MemOperand(rn, Offset));
1621        return;
1622      case Offset: {
1623        UseScratchRegisterScope temps(this);
1624        Register scratch = temps.Acquire();
1625        // Offset case:
1626        // pld [r1, 12345] will translate into
1627        //   add ip, r1, 12345
1628        //   pld [ip]
1629        EnsureEmitFor(kMaxInstructionSizeInBytes);
1630        if (operand.GetSign().IsPlus()) {
1631          add(cond, scratch, rn, offset);
1632        } else {
1633          sub(cond, scratch, rn, offset);
1634        }
1635        EnsureEmitFor(kMaxInstructionSizeInBytes);
1636        (this->*instruction)(cond, MemOperand(scratch, Offset));
1637        return;
1638      }
1639      case PostIndex:
1640        // Post-indexed case:
1641        // pld [r1], imm32 will translate into
1642        //   pld [r1]
1643        //   movw ip. imm32 & 0xffffffff
1644        //   movt ip, imm32 >> 16
1645        //   add r1, r1, ip
1646        EnsureEmitFor(kMaxInstructionSizeInBytes);
1647        (this->*instruction)(cond, MemOperand(rn, Offset));
1648        EnsureEmitFor(kMaxInstructionSizeInBytes);
1649        if (operand.GetSign().IsPlus()) {
1650          add(cond, rn, rn, offset);
1651        } else {
1652          sub(cond, rn, rn, offset);
1653        }
1654        return;
1655    }
1656  }
1657  if (operand.IsPlainRegister()) {
1658    const Register& rn = operand.GetBaseRegister();
1659    AddrMode addrmode = operand.GetAddrMode();
1660    const Register& rm = operand.GetOffsetRegister();
1661    switch (addrmode) {
1662      case PreIndex:
1663        // Pre-Indexed case:
1664        // pld [r1, r2]! will translate into
1665        //   add r1, r1, r2
1666        //   pld [r1]
1667        EnsureEmitFor(kMaxInstructionSizeInBytes);
1668        if (operand.GetSign().IsPlus()) {
1669          add(cond, rn, rn, rm);
1670        } else {
1671          sub(cond, rn, rn, rm);
1672        }
1673        EnsureEmitFor(kMaxInstructionSizeInBytes);
1674        (this->*instruction)(cond, MemOperand(rn, Offset));
1675        return;
1676      case Offset: {
1677        UseScratchRegisterScope temps(this);
1678        Register scratch = temps.Acquire();
1679        // Offset case:
1680        // pld [r1, r2] will translate into
1681        //   add ip, r1, r2
1682        //   pld [ip]
1683        EnsureEmitFor(kMaxInstructionSizeInBytes);
1684        if (operand.GetSign().IsPlus()) {
1685          add(cond, scratch, rn, rm);
1686        } else {
1687          sub(cond, scratch, rn, rm);
1688        }
1689        EnsureEmitFor(kMaxInstructionSizeInBytes);
1690        (this->*instruction)(cond, MemOperand(scratch, Offset));
1691        return;
1692      }
1693      case PostIndex:
1694        // Post-indexed case:
1695        // pld [r1], r2 will translate into
1696        //   pld [r1]
1697        //   add r1, r1, r2
1698        EnsureEmitFor(kMaxInstructionSizeInBytes);
1699        (this->*instruction)(cond, MemOperand(rn, Offset));
1700        if (operand.GetSign().IsPlus()) {
1701          add(cond, rn, rn, rm);
1702        } else {
1703          sub(cond, rn, rn, rm);
1704        }
1705        return;
1706    }
1707  }
1708  Assembler::Delegate(type, instruction, cond, operand);
1709}
1710
1711void MacroAssembler::Delegate(InstructionType type,
1712                              InstructionCondRMop instruction,
1713                              Condition cond,
1714                              Register rd,
1715                              const MemOperand& operand) {
1716  // lda ldab ldaex ldaexb ldaexh ldah ldrbt ldrex ldrexb ldrexh ldrht ldrsbt
1717  // ldrsht ldrt stl stlb stlh strbt strht strt
1718  ContextScope context(this);
1719  if (operand.IsImmediate()) {
1720    const Register& rn = operand.GetBaseRegister();
1721    AddrMode addrmode = operand.GetAddrMode();
1722    int32_t offset = operand.GetOffsetImmediate();
1723    switch (addrmode) {
1724      case PreIndex:
1725        // Pre-Indexed case:
1726        // lda r0, [r1, 12345]! will translate into
1727        //   add r1, r1, 12345
1728        //   lda r0, [r1]
1729        EnsureEmitFor(kMaxInstructionSizeInBytes);
1730        if (operand.GetSign().IsPlus()) {
1731          add(cond, rn, rn, offset);
1732        } else {
1733          sub(cond, rn, rn, offset);
1734        }
1735        EnsureEmitFor(kMaxInstructionSizeInBytes);
1736        (this->*instruction)(cond, rd, MemOperand(rn, Offset));
1737        return;
1738      case Offset: {
1739        UseScratchRegisterScope temps(this);
1740        // Allow using the destination as a scratch register if possible.
1741        if ((type != kStl) && (type != kStlb) && (type != kStlh) &&
1742            !rd.Is(rn)) {
1743          temps.Include(rd);
1744        }
1745        Register scratch = temps.Acquire();
1746        // Offset case:
1747        // lda r0, [r1, 12345] will translate into
1748        //   add r0, r1, 12345
1749        //   lda r0, [r0]
1750        EnsureEmitFor(kMaxInstructionSizeInBytes);
1751        if (operand.GetSign().IsPlus()) {
1752          add(cond, scratch, rn, offset);
1753        } else {
1754          sub(cond, scratch, rn, offset);
1755        }
1756        EnsureEmitFor(kMaxInstructionSizeInBytes);
1757        (this->*instruction)(cond, rd, MemOperand(scratch, Offset));
1758        return;
1759      }
1760      case PostIndex:
1761        // Avoid the unpredictable case 'ldr r0, [r0], imm'
1762        if (!rn.Is(rd)) {
1763          // Post-indexed case:
1764          // lda r0. [r1], imm32 will translate into
1765          //   lda r0, [r1]
1766          //   movw ip. imm32 & 0xffffffff
1767          //   movt ip, imm32 >> 16
1768          //   add r1, r1, ip
1769          EnsureEmitFor(kMaxInstructionSizeInBytes);
1770          (this->*instruction)(cond, rd, MemOperand(rn, Offset));
1771          EnsureEmitFor(kMaxInstructionSizeInBytes);
1772          if (operand.GetSign().IsPlus()) {
1773            add(cond, rn, rn, offset);
1774          } else {
1775            sub(cond, rn, rn, offset);
1776          }
1777          return;
1778        }
1779        break;
1780    }
1781  }
1782  if (operand.IsPlainRegister()) {
1783    const Register& rn = operand.GetBaseRegister();
1784    AddrMode addrmode = operand.GetAddrMode();
1785    const Register& rm = operand.GetOffsetRegister();
1786    switch (addrmode) {
1787      case PreIndex:
1788        // Pre-Indexed case:
1789        // lda r0, [r1, r2]! will translate into
1790        //   add r1, r1, 12345
1791        //   lda r0, [r1]
1792        EnsureEmitFor(kMaxInstructionSizeInBytes);
1793        if (operand.GetSign().IsPlus()) {
1794          add(cond, rn, rn, rm);
1795        } else {
1796          sub(cond, rn, rn, rm);
1797        }
1798        EnsureEmitFor(kMaxInstructionSizeInBytes);
1799        (this->*instruction)(cond, rd, MemOperand(rn, Offset));
1800        return;
1801      case Offset: {
1802        UseScratchRegisterScope temps(this);
1803        // Allow using the destination as a scratch register if possible.
1804        if ((type != kStl) && (type != kStlb) && (type != kStlh) &&
1805            !rd.Is(rn)) {
1806          temps.Include(rd);
1807        }
1808        Register scratch = temps.Acquire();
1809        // Offset case:
1810        // lda r0, [r1, r2] will translate into
1811        //   add r0, r1, r2
1812        //   lda r0, [r0]
1813        EnsureEmitFor(kMaxInstructionSizeInBytes);
1814        if (operand.GetSign().IsPlus()) {
1815          add(cond, scratch, rn, rm);
1816        } else {
1817          sub(cond, scratch, rn, rm);
1818        }
1819        EnsureEmitFor(kMaxInstructionSizeInBytes);
1820        (this->*instruction)(cond, rd, MemOperand(scratch, Offset));
1821        return;
1822      }
1823      case PostIndex:
1824        // Avoid the unpredictable case 'lda r0, [r0], r1'
1825        if (!rn.Is(rd)) {
1826          // Post-indexed case:
1827          // lda r0, [r1], r2 translate into
1828          //   lda r0, [r1]
1829          //   add r1, r1, r2
1830          EnsureEmitFor(kMaxInstructionSizeInBytes);
1831          (this->*instruction)(cond, rd, MemOperand(rn, Offset));
1832          EnsureEmitFor(kMaxInstructionSizeInBytes);
1833          if (operand.GetSign().IsPlus()) {
1834            add(cond, rn, rn, rm);
1835          } else {
1836            sub(cond, rn, rn, rm);
1837          }
1838          return;
1839        }
1840        break;
1841    }
1842  }
1843  Assembler::Delegate(type, instruction, cond, rd, operand);
1844}
1845
1846
1847void MacroAssembler::Delegate(InstructionType type,
1848                              InstructionCondSizeRMop instruction,
1849                              Condition cond,
1850                              EncodingSize size,
1851                              Register rd,
1852                              const MemOperand& operand) {
1853  // ldr ldrb ldrh ldrsb ldrsh str strb strh
1854  ContextScope context(this);
1855  VIXL_ASSERT(size.IsBest());
1856  if (operand.IsImmediate()) {
1857    const Register& rn = operand.GetBaseRegister();
1858    AddrMode addrmode = operand.GetAddrMode();
1859    int32_t offset = operand.GetOffsetImmediate();
1860    switch (addrmode) {
1861      case PreIndex:
1862        // Pre-Indexed case:
1863        // ldr r0, [r1, 12345]! will translate into
1864        //   add r1, r1, 12345
1865        //   ldr r0, [r1]
1866        EnsureEmitFor(kMaxInstructionSizeInBytes);
1867        if (operand.GetSign().IsPlus()) {
1868          add(cond, rn, rn, offset);
1869        } else {
1870          sub(cond, rn, rn, offset);
1871        }
1872        EnsureEmitFor(kMaxInstructionSizeInBytes);
1873        (this->*instruction)(cond, size, rd, MemOperand(rn, Offset));
1874        return;
1875      case Offset: {
1876        UseScratchRegisterScope temps(this);
1877        // Allow using the destination as a scratch register if possible.
1878        if ((type != kStr) && (type != kStrb) && (type != kStrh) &&
1879            !rd.Is(rn)) {
1880          temps.Include(rd);
1881        }
1882        Register scratch = temps.Acquire();
1883        // Offset case:
1884        // ldr r0, [r1, 12345] will translate into
1885        //   add r0, r1, 12345
1886        //   ldr r0, [r0]
1887        EnsureEmitFor(kMaxInstructionSizeInBytes);
1888        if (operand.GetSign().IsPlus()) {
1889          add(cond, scratch, rn, offset);
1890        } else {
1891          sub(cond, scratch, rn, offset);
1892        }
1893        EnsureEmitFor(kMaxInstructionSizeInBytes);
1894        (this->*instruction)(cond, size, rd, MemOperand(scratch, Offset));
1895        return;
1896      }
1897      case PostIndex:
1898        // Avoid the unpredictable case 'ldr r0, [r0], imm'
1899        if (!rn.Is(rd)) {
1900          // Post-indexed case:
1901          // ldr r0. [r1], imm32 will translate into
1902          //   ldr r0, [r1]
1903          //   movw ip. imm32 & 0xffffffff
1904          //   movt ip, imm32 >> 16
1905          //   add r1, r1, ip
1906          EnsureEmitFor(kMaxInstructionSizeInBytes);
1907          (this->*instruction)(cond, size, rd, MemOperand(rn, Offset));
1908          EnsureEmitFor(kMaxInstructionSizeInBytes);
1909          if (operand.GetSign().IsPlus()) {
1910            add(cond, rn, rn, offset);
1911          } else {
1912            sub(cond, rn, rn, offset);
1913          }
1914          return;
1915        }
1916        break;
1917    }
1918  }
1919  if (operand.IsPlainRegister()) {
1920    const Register& rn = operand.GetBaseRegister();
1921    AddrMode addrmode = operand.GetAddrMode();
1922    const Register& rm = operand.GetOffsetRegister();
1923    switch (addrmode) {
1924      case PreIndex:
1925        // Pre-Indexed case:
1926        // ldr r0, [r1, r2]! will translate into
1927        //   add r1, r1, r2
1928        //   ldr r0, [r1]
1929        EnsureEmitFor(kMaxInstructionSizeInBytes);
1930        if (operand.GetSign().IsPlus()) {
1931          add(cond, rn, rn, rm);
1932        } else {
1933          sub(cond, rn, rn, rm);
1934        }
1935        EnsureEmitFor(kMaxInstructionSizeInBytes);
1936        (this->*instruction)(cond, size, rd, MemOperand(rn, Offset));
1937        return;
1938      case Offset: {
1939        UseScratchRegisterScope temps(this);
1940        // Allow using the destination as a scratch register if possible.
1941        if ((type != kStr) && (type != kStrb) && (type != kStrh) &&
1942            !rd.Is(rn)) {
1943          temps.Include(rd);
1944        }
1945        Register scratch = temps.Acquire();
1946        // Offset case:
1947        // ldr r0, [r1, r2] will translate into
1948        //   add r0, r1, r2
1949        //   ldr r0, [r0]
1950        EnsureEmitFor(kMaxInstructionSizeInBytes);
1951        if (operand.GetSign().IsPlus()) {
1952          add(cond, scratch, rn, rm);
1953        } else {
1954          sub(cond, scratch, rn, rm);
1955        }
1956        EnsureEmitFor(kMaxInstructionSizeInBytes);
1957        (this->*instruction)(cond, size, rd, MemOperand(scratch, Offset));
1958        return;
1959      }
1960      case PostIndex:
1961        // Avoid the unpredictable case 'ldr r0, [r0], imm'
1962        if (!rn.Is(rd)) {
1963          // Post-indexed case:
1964          // ldr r0. [r1], r2 will translate into
1965          //   ldr r0, [r1]
1966          //   add r1, r1, r2
1967          EnsureEmitFor(kMaxInstructionSizeInBytes);
1968          (this->*instruction)(cond, size, rd, MemOperand(rn, Offset));
1969          EnsureEmitFor(kMaxInstructionSizeInBytes);
1970          if (operand.GetSign().IsPlus()) {
1971            add(cond, rn, rn, rm);
1972          } else {
1973            sub(cond, rn, rn, rm);
1974          }
1975          return;
1976        }
1977        break;
1978    }
1979  }
1980  Assembler::Delegate(type, instruction, cond, size, rd, operand);
1981}
1982
1983
1984void MacroAssembler::Delegate(InstructionType type,
1985                              InstructionCondRRMop instruction,
1986                              Condition cond,
1987                              Register rt,
1988                              Register rt2,
1989                              const MemOperand& operand) {
1990  // ldaexd, ldrd, ldrexd, stlex, stlexb, stlexh, strd, strex, strexb, strexh
1991  ContextScope context(this);
1992
1993  bool can_delegate = true;
1994  if (((type == kLdrd) || (type == kStrd) || (type == kLdaexd) ||
1995       (type == kLdrexd)) &&
1996      IsUsingA32()) {
1997    can_delegate =
1998        (((rt.GetCode() & 1) == 0) && !rt.Is(lr) &&
1999         (((rt.GetCode() + 1) % kNumberOfRegisters) == rt2.GetCode()));
2000  }
2001
2002  if (can_delegate) {
2003    if (operand.IsImmediate()) {
2004      const Register& rn = operand.GetBaseRegister();
2005      AddrMode addrmode = operand.GetAddrMode();
2006      int32_t offset = operand.GetOffsetImmediate();
2007      switch (addrmode) {
2008        case PreIndex:
2009          // Pre-Indexed case:
2010          // ldrd r0, r1, [r2, 12345]! will translate into
2011          //   add r2, 12345
2012          //   ldrd r0, r1, [r2]
2013          EnsureEmitFor(kMaxInstructionSizeInBytes);
2014          if (operand.GetSign().IsPlus()) {
2015            add(cond, rn, rn, offset);
2016          } else {
2017            sub(cond, rn, rn, offset);
2018          }
2019          EnsureEmitFor(kMaxInstructionSizeInBytes);
2020          (this->*instruction)(cond, rt, rt2, MemOperand(rn, Offset));
2021          return;
2022        case Offset: {
2023          UseScratchRegisterScope temps(this);
2024          // Allow using the destinations as a scratch registers if possible.
2025          if ((type != kStlex) && (type != kStlexb) && (type != kStlexh) &&
2026              (type != kStrd) && (type != kStrex) && (type != kStrexb) &&
2027              (type != kStrexh)) {
2028            if (!rt.Is(rn)) {
2029              temps.Include(rt);
2030            }
2031            if (!rt2.Is(rn)) {
2032              temps.Include(rt2);
2033            }
2034          }
2035          Register scratch = temps.Acquire();
2036          // Offset case:
2037          // ldrd r0, r1, [r2, 12345] will translate into
2038          //   add r0, r2, 12345
2039          //   ldrd r0, r1, [r0]
2040          EnsureEmitFor(kMaxInstructionSizeInBytes);
2041          if (operand.GetSign().IsPlus()) {
2042            add(cond, scratch, rn, offset);
2043          } else {
2044            sub(cond, scratch, rn, offset);
2045          }
2046          EnsureEmitFor(kMaxInstructionSizeInBytes);
2047          (this->*instruction)(cond, rt, rt2, MemOperand(scratch, Offset));
2048          return;
2049        }
2050        case PostIndex:
2051          // Avoid the unpredictable case 'ldr r0, r1, [r0], imm'
2052          if (!rn.Is(rt) && !rn.Is(rt2)) {
2053            // Post-indexed case:
2054            // ldrd r0, r1, [r2], imm32 will translate into
2055            //   ldrd r0, r1, [r2]
2056            //   movw ip. imm32 & 0xffffffff
2057            //   movt ip, imm32 >> 16
2058            //   add r2, ip
2059            EnsureEmitFor(kMaxInstructionSizeInBytes);
2060            (this->*instruction)(cond, rt, rt2, MemOperand(rn, Offset));
2061            EnsureEmitFor(kMaxInstructionSizeInBytes);
2062            if (operand.GetSign().IsPlus()) {
2063              add(cond, rn, rn, offset);
2064            } else {
2065              sub(cond, rn, rn, offset);
2066            }
2067            return;
2068          }
2069          break;
2070      }
2071    }
2072    if (operand.IsPlainRegister()) {
2073      const Register& rn = operand.GetBaseRegister();
2074      const Register& rm = operand.GetOffsetRegister();
2075      AddrMode addrmode = operand.GetAddrMode();
2076      switch (addrmode) {
2077        case PreIndex:
2078          // ldrd r0, r1, [r2, r3]! will translate into
2079          //   add r2, r3
2080          //   ldrd r0, r1, [r2]
2081          EnsureEmitFor(kMaxInstructionSizeInBytes);
2082          if (operand.GetSign().IsPlus()) {
2083            add(cond, rn, rn, rm);
2084          } else {
2085            sub(cond, rn, rn, rm);
2086          }
2087          EnsureEmitFor(kMaxInstructionSizeInBytes);
2088          (this->*instruction)(cond, rt, rt2, MemOperand(rn, Offset));
2089          return;
2090        case PostIndex:
2091          // ldrd r0, r1, [r2], r3 will translate into
2092          //   ldrd r0, r1, [r2]
2093          //   add r2, r3
2094          EnsureEmitFor(kMaxInstructionSizeInBytes);
2095          (this->*instruction)(cond, rt, rt2, MemOperand(rn, Offset));
2096          EnsureEmitFor(kMaxInstructionSizeInBytes);
2097          if (operand.GetSign().IsPlus()) {
2098            add(cond, rn, rn, rm);
2099          } else {
2100            sub(cond, rn, rn, rm);
2101          }
2102          return;
2103        case Offset: {
2104          UseScratchRegisterScope temps(this);
2105          // Allow using the destinations as a scratch registers if possible.
2106          if ((type != kStlex) && (type != kStlexb) && (type != kStlexh) &&
2107              (type != kStrd) && (type != kStrex) && (type != kStrexb) &&
2108              (type != kStrexh)) {
2109            if (!rt.Is(rn)) {
2110              temps.Include(rt);
2111            }
2112            if (!rt2.Is(rn)) {
2113              temps.Include(rt2);
2114            }
2115          }
2116          Register scratch = temps.Acquire();
2117          // Offset case:
2118          // ldrd r0, r1, [r2, r3] will translate into
2119          //   add r0, r2, r3
2120          //   ldrd r0, r1, [r0]
2121          EnsureEmitFor(kMaxInstructionSizeInBytes);
2122          if (operand.GetSign().IsPlus()) {
2123            add(cond, scratch, rn, rm);
2124          } else {
2125            sub(cond, scratch, rn, rm);
2126          }
2127          EnsureEmitFor(kMaxInstructionSizeInBytes);
2128          (this->*instruction)(cond, rt, rt2, MemOperand(scratch, Offset));
2129          return;
2130        }
2131      }
2132    }
2133  }
2134  Assembler::Delegate(type, instruction, cond, rt, rt2, operand);
2135}
2136
2137
2138void MacroAssembler::Delegate(InstructionType type,
2139                              InstructionCondDtSMop instruction,
2140                              Condition cond,
2141                              DataType dt,
2142                              SRegister rd,
2143                              const MemOperand& operand) {
2144  // vldr.32 vstr.32
2145  ContextScope context(this);
2146  if (operand.IsImmediate()) {
2147    const Register& rn = operand.GetBaseRegister();
2148    AddrMode addrmode = operand.GetAddrMode();
2149    int32_t offset = operand.GetOffsetImmediate();
2150    switch (addrmode) {
2151      case PreIndex:
2152        // Pre-Indexed case:
2153        // vldr.32 s0, [r1, 12345]! will translate into
2154        //   add r1, 12345
2155        //   vldr.32 s0, [r1]
2156        EnsureEmitFor(kMaxInstructionSizeInBytes);
2157        if (operand.GetSign().IsPlus()) {
2158          add(cond, rn, rn, offset);
2159        } else {
2160          sub(cond, rn, rn, offset);
2161        }
2162        EnsureEmitFor(kMaxInstructionSizeInBytes);
2163        (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
2164        return;
2165      case Offset: {
2166        UseScratchRegisterScope temps(this);
2167        Register scratch = temps.Acquire();
2168        // Offset case:
2169        // vldr.32 s0, [r1, 12345] will translate into
2170        //   add ip, r1, 12345
2171        //   vldr.32 s0, [ip]
2172        EnsureEmitFor(kMaxInstructionSizeInBytes);
2173        if (operand.GetSign().IsPlus()) {
2174          add(cond, scratch, rn, offset);
2175        } else {
2176          sub(cond, scratch, rn, offset);
2177        }
2178        EnsureEmitFor(kMaxInstructionSizeInBytes);
2179        (this->*instruction)(cond, dt, rd, MemOperand(scratch, Offset));
2180        return;
2181      }
2182      case PostIndex:
2183        // Post-indexed case:
2184        // vldr.32 s0, [r1], imm32 will translate into
2185        //   vldr.32 s0, [r1]
2186        //   movw ip. imm32 & 0xffffffff
2187        //   movt ip, imm32 >> 16
2188        //   add r1, ip
2189        EnsureEmitFor(kMaxInstructionSizeInBytes);
2190        (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
2191        EnsureEmitFor(kMaxInstructionSizeInBytes);
2192        if (operand.GetSign().IsPlus()) {
2193          add(cond, rn, rn, offset);
2194        } else {
2195          sub(cond, rn, rn, offset);
2196        }
2197        return;
2198    }
2199  }
2200  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
2201}
2202
2203
2204void MacroAssembler::Delegate(InstructionType type,
2205                              InstructionCondRRRMop instruction,
2206                              Condition cond,
2207                              Register rd,
2208                              Register rt,
2209                              Register rt2,
2210                              const MemOperand& operand) {
2211  // stlexd strexd
2212  ContextScope context(this);
2213  if (IsUsingT32() ||
2214      (((rt.GetCode() & 1) == 0) && !rt.Is(lr) &&
2215       (((rt.GetCode() + 1) % kNumberOfRegisters) == rt2.GetCode()))) {
2216    if (operand.IsImmediate()) {
2217      const Register& rn = operand.GetBaseRegister();
2218      AddrMode addrmode = operand.GetAddrMode();
2219      int32_t offset = operand.GetOffsetImmediate();
2220      switch (addrmode) {
2221        case PreIndex:
2222          // Pre-Indexed case:
2223          // strexd r5, r0, r1, [r2, 12345]! will translate into
2224          //   add r2, 12345
2225          //   strexd r5,  r0, r1, [r2]
2226          EnsureEmitFor(kMaxInstructionSizeInBytes);
2227          if (operand.GetSign().IsPlus()) {
2228            add(cond, rn, rn, offset);
2229          } else {
2230            sub(cond, rn, rn, offset);
2231          }
2232          EnsureEmitFor(kMaxInstructionSizeInBytes);
2233          (this->*instruction)(cond, rd, rt, rt2, MemOperand(rn, Offset));
2234          return;
2235        case Offset: {
2236          UseScratchRegisterScope temps(this);
2237          // Allow using the destination as a scratch register if possible.
2238          if (!rd.Is(rn) && !rd.Is(rt) && !rd.Is(rt2)) temps.Include(rd);
2239          Register scratch = temps.Acquire();
2240          // Offset case:
2241          // strexd r5, r0, r1, [r2, 12345] will translate into
2242          //   add r5, r2, 12345
2243          //   strexd r5, r0, r1, [r5]
2244          EnsureEmitFor(kMaxInstructionSizeInBytes);
2245          if (operand.GetSign().IsPlus()) {
2246            add(cond, scratch, rn, offset);
2247          } else {
2248            sub(cond, scratch, rn, offset);
2249          }
2250          EnsureEmitFor(kMaxInstructionSizeInBytes);
2251          (this->*instruction)(cond, rd, rt, rt2, MemOperand(scratch, Offset));
2252          return;
2253        }
2254        case PostIndex:
2255          // Avoid the unpredictable case 'ldr r0, r1, [r0], imm'
2256          if (!rn.Is(rt) && !rn.Is(rt2)) {
2257            // Post-indexed case:
2258            // strexd r5, r0, r1, [r2], imm32 will translate into
2259            //   strexd r5, r0, r1, [r2]
2260            //   movw ip. imm32 & 0xffffffff
2261            //   movt ip, imm32 >> 16
2262            //   add r2, ip
2263            EnsureEmitFor(kMaxInstructionSizeInBytes);
2264            (this->*instruction)(cond, rd, rt, rt2, MemOperand(rn, Offset));
2265            EnsureEmitFor(kMaxInstructionSizeInBytes);
2266            if (operand.GetSign().IsPlus()) {
2267              add(cond, rn, rn, offset);
2268            } else {
2269              sub(cond, rn, rn, offset);
2270            }
2271            return;
2272          }
2273          break;
2274      }
2275    }
2276  }
2277  Assembler::Delegate(type, instruction, cond, rd, rt, rt2, operand);
2278}
2279
2280
2281void MacroAssembler::Delegate(InstructionType type,
2282                              InstructionCondDtDMop instruction,
2283                              Condition cond,
2284                              DataType dt,
2285                              DRegister rd,
2286                              const MemOperand& operand) {
2287  // vldr.64 vstr.64
2288  ContextScope context(this);
2289  if (operand.IsImmediate()) {
2290    const Register& rn = operand.GetBaseRegister();
2291    AddrMode addrmode = operand.GetAddrMode();
2292    int32_t offset = operand.GetOffsetImmediate();
2293    switch (addrmode) {
2294      case PreIndex:
2295        // Pre-Indexed case:
2296        // vldr.64 d0, [r1, 12345]! will translate into
2297        //   add r1, 12345
2298        //   vldr.64 d0, [r1]
2299        EnsureEmitFor(kMaxInstructionSizeInBytes);
2300        if (operand.GetSign().IsPlus()) {
2301          add(cond, rn, rn, offset);
2302        } else {
2303          sub(cond, rn, rn, offset);
2304        }
2305        EnsureEmitFor(kMaxInstructionSizeInBytes);
2306        (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
2307        return;
2308      case Offset: {
2309        UseScratchRegisterScope temps(this);
2310        Register scratch = temps.Acquire();
2311        // Offset case:
2312        // vldr.64 d0, [r1, 12345] will translate into
2313        //   add ip, r1, 12345
2314        //   vldr.32 s0, [ip]
2315        EnsureEmitFor(kMaxInstructionSizeInBytes);
2316        if (operand.GetSign().IsPlus()) {
2317          add(cond, scratch, rn, offset);
2318        } else {
2319          sub(cond, scratch, rn, offset);
2320        }
2321        EnsureEmitFor(kMaxInstructionSizeInBytes);
2322        (this->*instruction)(cond, dt, rd, MemOperand(scratch, Offset));
2323        return;
2324      }
2325      case PostIndex:
2326        // Post-indexed case:
2327        // vldr.64 d0. [r1], imm32 will translate into
2328        //   vldr.64 d0, [r1]
2329        //   movw ip. imm32 & 0xffffffff
2330        //   movt ip, imm32 >> 16
2331        //   add r1, ip
2332        EnsureEmitFor(kMaxInstructionSizeInBytes);
2333        (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
2334        EnsureEmitFor(kMaxInstructionSizeInBytes);
2335        if (operand.GetSign().IsPlus()) {
2336          add(cond, rn, rn, offset);
2337        } else {
2338          sub(cond, rn, rn, offset);
2339        }
2340        return;
2341    }
2342  }
2343  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
2344}
2345
2346
2347void MacroAssembler::Delegate(InstructionType type,
2348                              InstructionCondDtNrlMop instruction,
2349                              Condition cond,
2350                              DataType dt,
2351                              const NeonRegisterList& nreglist,
2352                              const MemOperand& operand) {
2353  // vld3 vst3
2354  ContextScope context(this);
2355  const Register& rn = operand.GetBaseRegister();
2356
2357  bool can_delegate = !rn.Is(pc) && (nreglist.GetLength() == 3) &&
2358                      (dt.Is(Untyped8) || dt.Is(Untyped16) || dt.Is(Untyped32));
2359
2360  if (can_delegate) {
2361    if (operand.IsImmediate()) {
2362      AddrMode addrmode = operand.GetAddrMode();
2363      int32_t offset = operand.GetOffsetImmediate();
2364      switch (addrmode) {
2365        case PreIndex:
2366          // Pre-Indexed case:
2367          // vld3.8 {d0-d2}, [r1, 12345]! will translate into
2368          //   add r1, 12345
2369          //   vld3.8 {d0-d2}, [r1]
2370          EnsureEmitFor(kMaxInstructionSizeInBytes);
2371          if (operand.GetSign().IsPlus()) {
2372            add(cond, rn, rn, offset);
2373          } else {
2374            sub(cond, rn, rn, offset);
2375          }
2376          EnsureEmitFor(kMaxInstructionSizeInBytes);
2377          (this->*instruction)(cond, dt, nreglist, MemOperand(rn, Offset));
2378          return;
2379        case Offset: {
2380          UseScratchRegisterScope temps(this);
2381          Register scratch = temps.Acquire();
2382          // Offset case:
2383          // vld3.16 {d0-d2}[7], [r1, 12345] will translate into
2384          //   add ip, r1, 12345
2385          //   vld3.8 {d0-d2}[7], [ip]
2386          EnsureEmitFor(kMaxInstructionSizeInBytes);
2387          if (operand.GetSign().IsPlus()) {
2388            add(cond, scratch, rn, offset);
2389          } else {
2390            sub(cond, scratch, rn, offset);
2391          }
2392          EnsureEmitFor(kMaxInstructionSizeInBytes);
2393          (this->*instruction)(cond, dt, nreglist, MemOperand(scratch, Offset));
2394          return;
2395        }
2396        case PostIndex:
2397          // Post-indexed case:
2398          // vld3.32 {d0-d2}, [r1], imm32 will translate into
2399          //   vld3.8 {d0-d2}, [ip]
2400          //   movw ip. imm32 & 0xffffffff
2401          //   movt ip, imm32 >> 16
2402          //   add r1, ip
2403          EnsureEmitFor(kMaxInstructionSizeInBytes);
2404          (this->*instruction)(cond, dt, nreglist, MemOperand(rn, Offset));
2405          EnsureEmitFor(kMaxInstructionSizeInBytes);
2406          if (operand.GetSign().IsPlus()) {
2407            add(cond, rn, rn, offset);
2408          } else {
2409            sub(cond, rn, rn, offset);
2410          }
2411          return;
2412      }
2413    }
2414  }
2415  Assembler::Delegate(type, instruction, cond, dt, nreglist, operand);
2416}
2417
2418
2419void MacroAssembler::Delegate(InstructionType type,
2420                              InstructionCondMsrOp /*instruction*/,
2421                              Condition cond,
2422                              MaskedSpecialRegister spec_reg,
2423                              const Operand& operand) {
2424  USE(type);
2425  VIXL_ASSERT(type == kMsr);
2426  UseScratchRegisterScope temps(this);
2427  Register scratch = temps.Acquire();
2428  EnsureEmitFor(kMaxInstructionSizeInBytes);
2429  mov(cond, scratch, operand);
2430  EnsureEmitFor(kMaxInstructionSizeInBytes);
2431  msr(cond, spec_reg, scratch);
2432}
2433
2434// Start of generated code.
2435// End of generated code.
2436}  // namespace aarch32
2437}  // namespace vixl
2438