macro-assembler-aarch32.cc revision 740da998f1b2677636dfd76a6028e283d6175bf0
1// Copyright 2015, VIXL authors
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are met:
6//
7//   * Redistributions of source code must retain the above copyright notice,
8//     this list of conditions and the following disclaimer.
9//   * Redistributions in binary form must reproduce the above copyright
10//     notice, this list of conditions and the following disclaimer in the
11//     documentation and/or other materials provided with the distribution.
12//   * Neither the name of ARM Limited nor the names of its contributors may
13//     be used to endorse or promote products derived from this software
14//     without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26// POSSIBILITY OF SUCH DAMAGE.
27
28#include "aarch32/macro-assembler-aarch32.h"
29
30#define STRINGIFY(x) #x
31#define TOSTRING(x) STRINGIFY(x)
32
33#define CONTEXT_SCOPE \
34  ContextScope context(this, __FILE__ ":" TOSTRING(__LINE__))
35
36namespace vixl {
37namespace aarch32 {
38
39void UseScratchRegisterScope::Open(MacroAssembler* masm) {
40  VIXL_ASSERT((available_ == NULL) && (available_vfp_ == NULL));
41  available_ = masm->GetScratchRegisterList();
42  old_available_ = available_->GetList();
43  available_vfp_ = masm->GetScratchVRegisterList();
44  old_available_vfp_ = available_vfp_->GetList();
45}
46
47
48void UseScratchRegisterScope::Close() {
49  if (available_ != NULL) {
50    available_->SetList(old_available_);
51    available_ = NULL;
52  }
53  if (available_vfp_ != NULL) {
54    available_vfp_->SetList(old_available_vfp_);
55    available_vfp_ = NULL;
56  }
57}
58
59
60bool UseScratchRegisterScope::IsAvailable(const Register& reg) const {
61  VIXL_ASSERT(available_ != NULL);
62  VIXL_ASSERT(reg.IsValid());
63  return available_->Includes(reg);
64}
65
66
67bool UseScratchRegisterScope::IsAvailable(const VRegister& reg) const {
68  VIXL_ASSERT(available_vfp_ != NULL);
69  VIXL_ASSERT(reg.IsValid());
70  return available_vfp_->IncludesAllOf(reg);
71}
72
73
74Register UseScratchRegisterScope::Acquire() {
75  VIXL_ASSERT(available_ != NULL);
76  VIXL_CHECK(!available_->IsEmpty());
77  Register reg = available_->GetFirstAvailableRegister();
78  available_->Remove(reg);
79  return reg;
80}
81
82
83VRegister UseScratchRegisterScope::AcquireV(unsigned size_in_bits) {
84  switch (size_in_bits) {
85    case kSRegSizeInBits:
86      return AcquireS();
87    case kDRegSizeInBits:
88      return AcquireD();
89    case kQRegSizeInBits:
90      return AcquireQ();
91    default:
92      VIXL_UNREACHABLE();
93      return NoVReg;
94  }
95}
96
97
98QRegister UseScratchRegisterScope::AcquireQ() {
99  VIXL_ASSERT(available_vfp_ != NULL);
100  VIXL_CHECK(!available_vfp_->IsEmpty());
101  QRegister reg = available_vfp_->GetFirstAvailableQRegister();
102  available_vfp_->Remove(reg);
103  return reg;
104}
105
106
107DRegister UseScratchRegisterScope::AcquireD() {
108  VIXL_ASSERT(available_vfp_ != NULL);
109  VIXL_CHECK(!available_vfp_->IsEmpty());
110  DRegister reg = available_vfp_->GetFirstAvailableDRegister();
111  available_vfp_->Remove(reg);
112  return reg;
113}
114
115
116SRegister UseScratchRegisterScope::AcquireS() {
117  VIXL_ASSERT(available_vfp_ != NULL);
118  VIXL_CHECK(!available_vfp_->IsEmpty());
119  SRegister reg = available_vfp_->GetFirstAvailableSRegister();
120  available_vfp_->Remove(reg);
121  return reg;
122}
123
124
125void UseScratchRegisterScope::Release(const Register& reg) {
126  VIXL_ASSERT(available_ != NULL);
127  VIXL_ASSERT(reg.IsValid());
128  VIXL_ASSERT(!available_->Includes(reg));
129  available_->Combine(reg);
130}
131
132
133void UseScratchRegisterScope::Release(const VRegister& reg) {
134  VIXL_ASSERT(available_vfp_ != NULL);
135  VIXL_ASSERT(reg.IsValid());
136  VIXL_ASSERT(!available_vfp_->IncludesAliasOf(reg));
137  available_vfp_->Combine(reg);
138}
139
140
141void UseScratchRegisterScope::Include(const RegisterList& list) {
142  VIXL_ASSERT(available_ != NULL);
143  RegisterList excluded_registers(sp, lr, pc);
144  uint32_t mask = list.GetList() & ~excluded_registers.GetList();
145  available_->SetList(available_->GetList() | mask);
146}
147
148
149void UseScratchRegisterScope::Include(const VRegisterList& list) {
150  VIXL_ASSERT(available_vfp_ != NULL);
151  available_vfp_->SetList(available_vfp_->GetList() | list.GetList());
152}
153
154
155void UseScratchRegisterScope::Exclude(const RegisterList& list) {
156  VIXL_ASSERT(available_ != NULL);
157  available_->SetList(available_->GetList() & ~list.GetList());
158}
159
160
161void UseScratchRegisterScope::Exclude(const VRegisterList& list) {
162  VIXL_ASSERT(available_vfp_ != NULL);
163  available_vfp_->SetList(available_->GetList() & ~list.GetList());
164}
165
166
167void UseScratchRegisterScope::ExcludeAll() {
168  if (available_ != NULL) {
169    available_->SetList(0);
170  }
171  if (available_vfp_ != NULL) {
172    available_vfp_->SetList(0);
173  }
174}
175
176
177void VeneerPoolManager::AddLabel(Label* label) {
178  if (!label->IsInVeneerPool()) {
179    label->SetVeneerPoolManager(this);
180    labels_.push_back(label);
181  }
182  Label::ForwardReference& back = label->GetBackForwardRef();
183  back.SetIsBranch();
184  label->UpdateCheckpoint();
185  Label::Offset tmp = label->GetCheckpoint();
186  if (checkpoint_ > tmp) {
187    checkpoint_ = tmp;
188    masm_->ComputeCheckpoint();
189  }
190}
191
192
193void VeneerPoolManager::RemoveLabel(Label* label) {
194  label->ClearVeneerPoolManager();
195  if (label->GetCheckpoint() == checkpoint_) {
196    // We have to compute checkpoint again.
197    checkpoint_ = Label::kMaxOffset;
198    for (std::list<Label*>::iterator it = labels_.begin();
199         it != labels_.end();) {
200      if (*it == label) {
201        it = labels_.erase(it);
202      } else {
203        checkpoint_ = std::min(checkpoint_, (*it)->GetCheckpoint());
204        ++it;
205      }
206    }
207    masm_->ComputeCheckpoint();
208  } else {
209    // We only have to remove the label from the list.
210    for (std::list<Label*>::iterator it = labels_.begin();; ++it) {
211      VIXL_ASSERT(it != labels_.end());
212      if (*it == label) {
213        labels_.erase(it);
214        break;
215      }
216    }
217  }
218}
219
220
221void VeneerPoolManager::Emit(Label::Offset target) {
222  checkpoint_ = Label::kMaxOffset;
223  // Sort labels (regarding their checkpoint) to avoid that a veneer
224  // becomes out of range.
225  labels_.sort(Label::CompareLabels);
226  // To avoid too many veneers, generate veneers which will be necessary soon.
227  static const size_t kVeneerEmissionMargin = 1 * KBytes;
228  // To avoid too many veneers, use generated veneers for other not too far
229  // uses.
230  static const size_t kVeneerEmittedMargin = 2 * KBytes;
231  Label::Offset emitted_target = target + kVeneerEmittedMargin;
232  target += kVeneerEmissionMargin;
233  // Reset the checkpoint. It will be computed again in the loop.
234  checkpoint_ = Label::kMaxOffset;
235  for (std::list<Label*>::iterator it = labels_.begin(); it != labels_.end();) {
236    // The labels are sorted. As soon as a veneer is not needed, we can stop.
237    if ((*it)->GetCheckpoint() > target) {
238      checkpoint_ = std::min(checkpoint_, (*it)->GetCheckpoint());
239      break;
240    }
241    // Define the veneer.
242    Label veneer;
243    masm_->Bind(&veneer);
244    Label::Offset label_checkpoint = Label::kMaxOffset;
245    // Check all uses of this label.
246    for (Label::ForwardRefList::iterator ref = (*it)->GetFirstForwardRef();
247         ref != (*it)->GetEndForwardRef();) {
248      if (ref->IsBranch()) {
249        if (ref->GetCheckpoint() <= emitted_target) {
250          // Use the veneer.
251          masm_->EncodeLabelFor(*ref, &veneer);
252          ref = (*it)->Erase(ref);
253        } else {
254          // Don't use the veneer => update checkpoint.
255          label_checkpoint = std::min(label_checkpoint, ref->GetCheckpoint());
256          ++ref;
257        }
258      } else {
259        ++ref;
260      }
261    }
262    // Even if we no longer have use of this label, we can keep it in the list
263    // as the next "B" would add it back.
264    (*it)->SetCheckpoint(label_checkpoint);
265    checkpoint_ = std::min(checkpoint_, label_checkpoint);
266    // Generate the veneer.
267    masm_->B(*it);
268    ++it;
269  }
270#ifdef VIXL_DEBUG
271  for (std::list<Label*>::iterator it = labels_.begin(); it != labels_.end();
272       ++it) {
273    VIXL_ASSERT((*it)->GetCheckpoint() >= checkpoint_);
274  }
275#endif
276  masm_->ComputeCheckpoint();
277}
278
279
280void MacroAssembler::PerformEnsureEmit(Label::Offset target, uint32_t size) {
281  EmitOption option = kBranchRequired;
282  Label after_pools;
283  if (target > veneer_pool_manager_.GetCheckpoint()) {
284#ifdef VIXL_DEBUG
285    // Here, we can't use an AssemblerAccurateScope as it would call
286    // PerformEnsureEmit in an infinite loop.
287    bool save_assembler_state = AllowAssembler();
288    SetAllowAssembler(true);
289#endif
290    GetBuffer()->EnsureSpaceFor(kMaxInstructionSizeInBytes);
291    b(&after_pools);
292#ifdef VIXL_DEBUG
293    SetAllowAssembler(false);
294#endif
295    veneer_pool_manager_.Emit(target);
296    option = kNoBranchRequired;
297#ifdef VIXL_DEBUG
298    SetAllowAssembler(save_assembler_state);
299#endif
300  }
301  // Check if the macro-assembler's internal literal pool should be emitted
302  // to avoid any overflow. If we already generated the veneers, we can
303  // emit the pool (the branch is already done).
304  VIXL_ASSERT(GetCursorOffset() <= literal_pool_manager_.GetCheckpoint());
305  if ((target > literal_pool_manager_.GetCheckpoint()) ||
306      (option == kNoBranchRequired)) {
307    // We will generate the literal pool. Generate all the veneers which
308    // would become out of range.
309    size_t literal_pool_size = literal_pool_manager_.GetLiteralPoolSize();
310    VIXL_ASSERT(IsInt32(literal_pool_size));
311    Label::Offset veneers_target =
312        target + static_cast<Label::Offset>(literal_pool_size);
313    VIXL_ASSERT(veneers_target >= 0);
314    if (veneers_target >= veneer_pool_manager_.GetCheckpoint()) {
315      veneer_pool_manager_.Emit(veneers_target);
316    }
317    EmitLiteralPool(option);
318  }
319  BindHelper(&after_pools);
320  if (GetBuffer()->IsManaged()) {
321    bool grow_requested;
322    GetBuffer()->EnsureSpaceFor(size, &grow_requested);
323    if (grow_requested) ComputeCheckpoint();
324  }
325}
326
327
328void MacroAssembler::ComputeCheckpoint() {
329  checkpoint_ = veneer_pool_manager_.GetCheckpoint();
330  if (literal_pool_manager_.GetCheckpoint() != Label::kMaxOffset) {
331    size_t veneer_max_size = veneer_pool_manager_.GetMaxSize();
332    VIXL_ASSERT(IsInt32(veneer_max_size));
333    // We must be able to generate the pool and a branch over the pool.
334    Label::Offset tmp = literal_pool_manager_.GetCheckpoint() -
335                        static_cast<Label::Offset>(veneer_max_size +
336                                                   kMaxInstructionSizeInBytes);
337    VIXL_ASSERT(tmp >= 0);
338    checkpoint_ = std::min(checkpoint_, tmp);
339  }
340  size_t buffer_size = GetBuffer()->GetCapacity();
341  VIXL_ASSERT(IsInt32(buffer_size));
342  Label::Offset buffer_checkpoint = static_cast<Label::Offset>(buffer_size);
343  checkpoint_ = std::min(checkpoint_, buffer_checkpoint);
344}
345
346
347void MacroAssembler::Switch(Register reg, JumpTableBase* table) {
348  // 32-bit table A32:
349  // adr ip, table
350  // add ip, r1, lsl 2
351  // ldr ip, [ip]
352  // jmp: add pc, pc, ip, lsl 2
353  // table:
354  // .int (case_0 - (jmp + 8)) >> 2
355  // .int (case_1 - (jmp + 8)) >> 2
356  // .int (case_2 - (jmp + 8)) >> 2
357
358  // 16-bit table T32:
359  // adr ip, table
360  // jmp: tbh ip, r1
361  // table:
362  // .short (case_0 - (jmp + 4)) >> 1
363  // .short (case_1 - (jmp + 4)) >> 1
364  // .short (case_2 - (jmp + 4)) >> 1
365  // case_0:
366  //   ...
367  //   b end_switch
368  // case_1:
369  //   ...
370  //   b end_switch
371  // ...
372  // end_switch:
373  Label jump_table;
374  UseScratchRegisterScope temps(this);
375  Register scratch = temps.Acquire();
376  int table_size = AlignUp(table->GetTableSizeInBytes(), 4);
377
378  // Jumpt to default if reg is not in [0, table->GetLength()[
379  Cmp(reg, table->GetLength());
380  B(ge, table->GetDefaultLabel());
381
382  Adr(scratch, &jump_table);
383  if (IsUsingA32()) {
384    Add(scratch, scratch, Operand(reg, LSL, table->GetOffsetShift()));
385    switch (table->GetOffsetShift()) {
386      case 0:
387        Ldrb(scratch, MemOperand(scratch));
388        break;
389      case 1:
390        Ldrh(scratch, MemOperand(scratch));
391        break;
392      case 2:
393        Ldr(scratch, MemOperand(scratch));
394        break;
395      default:
396        VIXL_ABORT_WITH_MSG("Unsupported jump table size.\n");
397    }
398    // Emit whatever needs to be emitted if we want to
399    // correctly rescord the position of the branch instruction
400    uint32_t branch_location = GetCursorOffset();
401    table->SetBranchLocation(branch_location + GetArchitectureStatePCOffset());
402    AssemblerAccurateScope scope(this,
403                                 table_size + kA32InstructionSizeInBytes,
404                                 CodeBufferCheckScope::kMaximumSize);
405    add(pc, pc, Operand(scratch, LSL, 2));
406    VIXL_ASSERT((GetCursorOffset() - branch_location) == 4);
407    bind(&jump_table);
408    GenerateSwitchTable(table, table_size);
409  } else {
410    // Thumb mode - We have tbb and tbh to do this for 8 or 16bit offsets.
411    //  But for 32bit offsets, we use the same coding as for A32
412    if (table->GetOffsetShift() == 2) {
413      // 32bit offsets
414      Add(scratch, scratch, Operand(reg, LSL, 2));
415      Ldr(scratch, MemOperand(scratch));
416      // Cannot use add pc, pc, r lsl 1 as this is unpredictable in T32,
417      // so let's do the shift before
418      Lsl(scratch, scratch, 1);
419      // Emit whatever needs to be emitted if we want to
420      // correctly rescord the position of the branch instruction
421      uint32_t branch_location = GetCursorOffset();
422      table->SetBranchLocation(branch_location +
423                               GetArchitectureStatePCOffset());
424      AssemblerAccurateScope scope(this,
425                                   table_size + kMaxInstructionSizeInBytes,
426                                   CodeBufferCheckScope::kMaximumSize);
427      add(pc, pc, scratch);
428      // add pc, pc, rm fits in 16bit T2 (except for rm = sp)
429      VIXL_ASSERT((GetCursorOffset() - branch_location) == 2);
430      bind(&jump_table);
431      GenerateSwitchTable(table, table_size);
432    } else {
433      VIXL_ASSERT((table->GetOffsetShift() == 0) ||
434                  (table->GetOffsetShift() == 1));
435      // Emit whatever needs to be emitted if we want to
436      // correctly rescord the position of the branch instruction
437      uint32_t branch_location = GetCursorOffset();
438      table->SetBranchLocation(branch_location +
439                               GetArchitectureStatePCOffset());
440      AssemblerAccurateScope scope(this,
441                                   table_size + kMaxInstructionSizeInBytes,
442                                   CodeBufferCheckScope::kMaximumSize);
443      if (table->GetOffsetShift() == 0) {
444        // 8bit offsets
445        tbb(scratch, reg);
446      } else {
447        // 16bit offsets
448        tbh(scratch, reg);
449      }
450      // tbb/tbh is a 32bit instruction
451      VIXL_ASSERT((GetCursorOffset() - branch_location) == 4);
452      bind(&jump_table);
453      GenerateSwitchTable(table, table_size);
454    }
455  }
456}
457
458
459void MacroAssembler::GenerateSwitchTable(JumpTableBase* table, int table_size) {
460  table->BindTable(GetCursorOffset());
461  for (int i = 0; i < table_size / 4; i++) {
462    GetBuffer()->Emit32(0);
463  }
464}
465
466
467// switch/case/default : case
468// case_index is assumed to be < table->GetLength()
469// which is checked in JumpTable::Link and Table::SetPresenceBit
470void MacroAssembler::Case(JumpTableBase* table, int case_index) {
471  table->Link(this, case_index, GetCursorOffset());
472  table->SetPresenceBitForCase(case_index);
473}
474
475// switch/case/default : default
476void MacroAssembler::Default(JumpTableBase* table) {
477  Bind(table->GetDefaultLabel());
478}
479
480// switch/case/default : break
481void MacroAssembler::Break(JumpTableBase* table) { B(table->GetEndLabel()); }
482
483// switch/case/default : finalize
484// Manage the default path, mosstly. All empty offsets in the jumptable
485// will point to default.
486// All values not in [0, table->GetLength()[ are already pointing here anyway.
487void MacroAssembler::EndSwitch(JumpTableBase* table) { table->Finalize(this); }
488
489void MacroAssembler::HandleOutOfBoundsImmediate(Condition cond,
490                                                Register tmp,
491                                                uint32_t imm) {
492  if (IsUintN(16, imm)) {
493    CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
494    mov(cond, tmp, imm & 0xffff);
495    return;
496  }
497  if (IsUsingT32()) {
498    if (ImmediateT32::IsImmediateT32(~imm)) {
499      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
500      mvn(cond, tmp, ~imm);
501      return;
502    }
503  } else {
504    if (ImmediateA32::IsImmediateA32(~imm)) {
505      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
506      mvn(cond, tmp, ~imm);
507      return;
508    }
509  }
510  CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
511  mov(cond, tmp, imm & 0xffff);
512  movt(cond, tmp, imm >> 16);
513}
514
515
516void MacroAssembler::PadToMinimumBranchRange(Label* label) {
517  const Label::ForwardReference* last_reference = label->GetForwardRefBack();
518  if ((last_reference != NULL) && last_reference->IsUsingT32()) {
519    uint32_t location = last_reference->GetLocation();
520    if (location + k16BitT32InstructionSizeInBytes ==
521        static_cast<uint32_t>(GetCursorOffset())) {
522      uint16_t* instr_ptr = buffer_.GetOffsetAddress<uint16_t*>(location);
523      if ((instr_ptr[0] & kCbzCbnzMask) == kCbzCbnzValue) {
524        VIXL_ASSERT(!InITBlock());
525        // A Cbz or a Cbnz can't jump immediately after the instruction. If the
526        // target is immediately after the Cbz or Cbnz, we insert a nop to
527        // avoid that.
528        EmitT32_16(k16BitT32NopOpcode);
529      }
530    }
531  }
532}
533
534
535HARDFLOAT void PrintfTrampolineRRRR(
536    const char* format, uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
537  printf(format, a, b, c, d);
538}
539
540
541HARDFLOAT void PrintfTrampolineRRRD(
542    const char* format, uint32_t a, uint32_t b, uint32_t c, double d) {
543  printf(format, a, b, c, d);
544}
545
546
547HARDFLOAT void PrintfTrampolineRRDR(
548    const char* format, uint32_t a, uint32_t b, double c, uint32_t d) {
549  printf(format, a, b, c, d);
550}
551
552
553HARDFLOAT void PrintfTrampolineRRDD(
554    const char* format, uint32_t a, uint32_t b, double c, double d) {
555  printf(format, a, b, c, d);
556}
557
558
559HARDFLOAT void PrintfTrampolineRDRR(
560    const char* format, uint32_t a, double b, uint32_t c, uint32_t d) {
561  printf(format, a, b, c, d);
562}
563
564
565HARDFLOAT void PrintfTrampolineRDRD(
566    const char* format, uint32_t a, double b, uint32_t c, double d) {
567  printf(format, a, b, c, d);
568}
569
570
571HARDFLOAT void PrintfTrampolineRDDR(
572    const char* format, uint32_t a, double b, double c, uint32_t d) {
573  printf(format, a, b, c, d);
574}
575
576
577HARDFLOAT void PrintfTrampolineRDDD(
578    const char* format, uint32_t a, double b, double c, double d) {
579  printf(format, a, b, c, d);
580}
581
582
583HARDFLOAT void PrintfTrampolineDRRR(
584    const char* format, double a, uint32_t b, uint32_t c, uint32_t d) {
585  printf(format, a, b, c, d);
586}
587
588
589HARDFLOAT void PrintfTrampolineDRRD(
590    const char* format, double a, uint32_t b, uint32_t c, double d) {
591  printf(format, a, b, c, d);
592}
593
594
595HARDFLOAT void PrintfTrampolineDRDR(
596    const char* format, double a, uint32_t b, double c, uint32_t d) {
597  printf(format, a, b, c, d);
598}
599
600
601HARDFLOAT void PrintfTrampolineDRDD(
602    const char* format, double a, uint32_t b, double c, double d) {
603  printf(format, a, b, c, d);
604}
605
606
607HARDFLOAT void PrintfTrampolineDDRR(
608    const char* format, double a, double b, uint32_t c, uint32_t d) {
609  printf(format, a, b, c, d);
610}
611
612
613HARDFLOAT void PrintfTrampolineDDRD(
614    const char* format, double a, double b, uint32_t c, double d) {
615  printf(format, a, b, c, d);
616}
617
618
619HARDFLOAT void PrintfTrampolineDDDR(
620    const char* format, double a, double b, double c, uint32_t d) {
621  printf(format, a, b, c, d);
622}
623
624
625HARDFLOAT void PrintfTrampolineDDDD(
626    const char* format, double a, double b, double c, double d) {
627  printf(format, a, b, c, d);
628}
629
630
631void MacroAssembler::Printf(const char* format,
632                            CPURegister reg1,
633                            CPURegister reg2,
634                            CPURegister reg3,
635                            CPURegister reg4) {
636  if (generate_simulator_code_) {
637    PushRegister(reg4);
638    PushRegister(reg3);
639    PushRegister(reg2);
640    PushRegister(reg1);
641    Push(RegisterList(r0, r1));
642    StringLiteral* format_literal =
643        new StringLiteral(format, RawLiteral::kDeletedOnPlacementByPool);
644    Adr(r0, format_literal);
645    uint32_t args = (reg4.GetType() << 12) | (reg3.GetType() << 8) |
646                    (reg2.GetType() << 4) | reg1.GetType();
647    Mov(r1, args);
648    Hvc(kPrintfCode);
649    Pop(RegisterList(r0, r1));
650    int size = reg4.GetRegSizeInBytes() + reg3.GetRegSizeInBytes() +
651               reg2.GetRegSizeInBytes() + reg1.GetRegSizeInBytes();
652    Drop(size);
653  } else {
654    // Generate on a native platform => 32 bit environment.
655    // Preserve core registers r0-r3, r12, r14
656    const uint32_t saved_registers_mask =
657        kCallerSavedRegistersMask | (1 << r5.GetCode());
658    Push(RegisterList(saved_registers_mask));
659    // Push VFP registers.
660    Vpush(Untyped64, DRegisterList(d0, 8));
661    if (Has32DRegs()) Vpush(Untyped64, DRegisterList(d16, 16));
662    // Search one register which has been saved and which doesn't need to be
663    // printed.
664    RegisterList available_registers(kCallerSavedRegistersMask);
665    if (reg1.GetType() == CPURegister::kRRegister) {
666      available_registers.Remove(Register(reg1.GetCode()));
667    }
668    if (reg2.GetType() == CPURegister::kRRegister) {
669      available_registers.Remove(Register(reg2.GetCode()));
670    }
671    if (reg3.GetType() == CPURegister::kRRegister) {
672      available_registers.Remove(Register(reg3.GetCode()));
673    }
674    if (reg4.GetType() == CPURegister::kRRegister) {
675      available_registers.Remove(Register(reg4.GetCode()));
676    }
677    Register tmp = available_registers.GetFirstAvailableRegister();
678    VIXL_ASSERT(tmp.GetType() == CPURegister::kRRegister);
679    // Push the flags.
680    Mrs(tmp, APSR);
681    Push(tmp);
682    Vmrs(RegisterOrAPSR_nzcv(tmp.GetCode()), FPSCR);
683    Push(tmp);
684    // Push the registers to print on the stack.
685    PushRegister(reg4);
686    PushRegister(reg3);
687    PushRegister(reg2);
688    PushRegister(reg1);
689    int core_count = 1;
690    int vfp_count = 0;
691    uint32_t printf_type = 0;
692    // Pop the registers to print and store them into r1-r3 and/or d0-d3.
693    // Reg4 may stay into the stack if all the register to print are core
694    // registers.
695    PreparePrintfArgument(reg1, &core_count, &vfp_count, &printf_type);
696    PreparePrintfArgument(reg2, &core_count, &vfp_count, &printf_type);
697    PreparePrintfArgument(reg3, &core_count, &vfp_count, &printf_type);
698    PreparePrintfArgument(reg4, &core_count, &vfp_count, &printf_type);
699    // Ensure that the stack is aligned on 8 bytes.
700    And(r5, sp, 0x7);
701    if (core_count == 5) {
702      // One 32 bit argument (reg4) has been left on the stack =>  align the
703      // stack
704      // before the argument.
705      Pop(r0);
706      Sub(sp, sp, r5);
707      Push(r0);
708    } else {
709      Sub(sp, sp, r5);
710    }
711    // Select the right trampoline depending on the arguments.
712    uintptr_t address;
713    switch (printf_type) {
714      case 0:
715        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRRR);
716        break;
717      case 1:
718        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRRR);
719        break;
720      case 2:
721        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDRR);
722        break;
723      case 3:
724        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDRR);
725        break;
726      case 4:
727        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRDR);
728        break;
729      case 5:
730        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRDR);
731        break;
732      case 6:
733        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDDR);
734        break;
735      case 7:
736        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDDR);
737        break;
738      case 8:
739        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRRD);
740        break;
741      case 9:
742        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRRD);
743        break;
744      case 10:
745        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDRD);
746        break;
747      case 11:
748        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDRD);
749        break;
750      case 12:
751        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRDD);
752        break;
753      case 13:
754        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRDD);
755        break;
756      case 14:
757        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDDD);
758        break;
759      case 15:
760        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDDD);
761        break;
762      default:
763        VIXL_UNREACHABLE();
764        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRRR);
765        break;
766    }
767    StringLiteral* format_literal =
768        new StringLiteral(format, RawLiteral::kDeletedOnPlacementByPool);
769    Adr(r0, format_literal);
770    Mov(ip, Operand::From(address));
771    Blx(ip);
772    // If register reg4 was left on the stack => skip it.
773    if (core_count == 5) Drop(kRegSizeInBytes);
774    // Restore the stack as it was before alignment.
775    Add(sp, sp, r5);
776    // Restore the flags.
777    Pop(tmp);
778    Vmsr(FPSCR, tmp);
779    Pop(tmp);
780    Msr(APSR_nzcvqg, tmp);
781    // Restore the regsisters.
782    if (Has32DRegs()) Vpop(Untyped64, DRegisterList(d16, 16));
783    Vpop(Untyped64, DRegisterList(d0, 8));
784    Pop(RegisterList(saved_registers_mask));
785  }
786}
787
788
789void MacroAssembler::PushRegister(CPURegister reg) {
790  switch (reg.GetType()) {
791    case CPURegister::kNoRegister:
792      break;
793    case CPURegister::kRRegister:
794      Push(Register(reg.GetCode()));
795      break;
796    case CPURegister::kSRegister:
797      Vpush(Untyped32, SRegisterList(SRegister(reg.GetCode())));
798      break;
799    case CPURegister::kDRegister:
800      Vpush(Untyped64, DRegisterList(DRegister(reg.GetCode())));
801      break;
802    case CPURegister::kQRegister:
803      VIXL_UNIMPLEMENTED();
804      break;
805  }
806}
807
808
809void MacroAssembler::PreparePrintfArgument(CPURegister reg,
810                                           int* core_count,
811                                           int* vfp_count,
812                                           uint32_t* printf_type) {
813  switch (reg.GetType()) {
814    case CPURegister::kNoRegister:
815      break;
816    case CPURegister::kRRegister:
817      VIXL_ASSERT(*core_count <= 4);
818      if (*core_count < 4) Pop(Register(*core_count));
819      *core_count += 1;
820      break;
821    case CPURegister::kSRegister:
822      VIXL_ASSERT(*vfp_count < 4);
823      *printf_type |= 1 << (*core_count + *vfp_count - 1);
824      Vpop(Untyped32, SRegisterList(SRegister(*vfp_count * 2)));
825      Vcvt(F64, F32, DRegister(*vfp_count), SRegister(*vfp_count * 2));
826      *vfp_count += 1;
827      break;
828    case CPURegister::kDRegister:
829      VIXL_ASSERT(*vfp_count < 4);
830      *printf_type |= 1 << (*core_count + *vfp_count - 1);
831      Vpop(Untyped64, DRegisterList(DRegister(*vfp_count)));
832      *vfp_count += 1;
833      break;
834    case CPURegister::kQRegister:
835      VIXL_UNIMPLEMENTED();
836      break;
837  }
838}
839
840
841void MacroAssembler::Delegate(InstructionType type,
842                              InstructionCondROp instruction,
843                              Condition cond,
844                              Register rn,
845                              const Operand& operand) {
846  // movt, sxtb16, teq, uxtb16
847  VIXL_ASSERT((type == kMovt) || (type == kSxtb16) || (type == kTeq) ||
848              (type == kUxtb16));
849
850  if (type == kMovt) {
851    VIXL_ABORT_WITH_MSG("`Movt` expects a 16-bit immediate.");
852  }
853
854  // This delegate only supports teq with immediates.
855  CONTEXT_SCOPE;
856  if ((type == kTeq) && operand.IsImmediate()) {
857    UseScratchRegisterScope temps(this);
858    Register scratch = temps.Acquire();
859    HandleOutOfBoundsImmediate(cond, scratch, operand.GetImmediate());
860    CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
861    teq(cond, rn, scratch);
862    return;
863  }
864  Assembler::Delegate(type, instruction, cond, rn, operand);
865}
866
867
868void MacroAssembler::Delegate(InstructionType type,
869                              InstructionCondSizeROp instruction,
870                              Condition cond,
871                              EncodingSize size,
872                              Register rn,
873                              const Operand& operand) {
874  // cmn cmp mov movs mvn mvns sxtb sxth tst uxtb uxth
875  CONTEXT_SCOPE;
876  VIXL_ASSERT(size.IsBest());
877  VIXL_ASSERT((type == kCmn) || (type == kCmp) || (type == kMov) ||
878              (type == kMovs) || (type == kMvn) || (type == kMvns) ||
879              (type == kSxtb) || (type == kSxth) || (type == kTst) ||
880              (type == kUxtb) || (type == kUxth));
881  if (IsUsingT32() && operand.IsRegisterShiftedRegister()) {
882    VIXL_ASSERT((type != kMov) || (type != kMovs));
883    InstructionCondRROp shiftop = NULL;
884    switch (operand.GetShift().GetType()) {
885      case LSL:
886        shiftop = &Assembler::lsl;
887        break;
888      case LSR:
889        shiftop = &Assembler::lsr;
890        break;
891      case ASR:
892        shiftop = &Assembler::asr;
893        break;
894      case RRX:
895        // A RegisterShiftedRegister operand cannot have a shift of type RRX.
896        VIXL_UNREACHABLE();
897        break;
898      case ROR:
899        shiftop = &Assembler::ror;
900        break;
901      default:
902        VIXL_UNREACHABLE();
903    }
904    if (shiftop != NULL) {
905      UseScratchRegisterScope temps(this);
906      Register scratch = temps.Acquire();
907      CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
908      (this->*shiftop)(cond,
909                       scratch,
910                       operand.GetBaseRegister(),
911                       operand.GetShiftRegister());
912      (this->*instruction)(cond, size, rn, scratch);
913      return;
914    }
915  }
916  if (operand.IsImmediate()) {
917    uint32_t imm = operand.GetImmediate();
918    switch (type) {
919      case kMov:
920      case kMovs:
921        if (!rn.IsPC()) {
922          // Immediate is too large, but not using PC, so handle with mov{t}.
923          HandleOutOfBoundsImmediate(cond, rn, imm);
924          if (type == kMovs) {
925            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
926            tst(cond, rn, rn);
927          }
928          return;
929        } else if (type == kMov) {
930          VIXL_ASSERT(IsUsingA32() || cond.Is(al));
931          // Immediate is too large and using PC, so handle using a temporary
932          // register.
933          UseScratchRegisterScope temps(this);
934          Register scratch = temps.Acquire();
935          HandleOutOfBoundsImmediate(al, scratch, imm);
936          EnsureEmitFor(kMaxInstructionSizeInBytes);
937          bx(cond, scratch);
938          return;
939        }
940        break;
941      case kCmn:
942      case kCmp:
943        if (IsUsingA32() || !rn.IsPC()) {
944          UseScratchRegisterScope temps(this);
945          Register scratch = temps.Acquire();
946          HandleOutOfBoundsImmediate(cond, scratch, imm);
947          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
948          (this->*instruction)(cond, size, rn, scratch);
949          return;
950        }
951        break;
952      case kMvn:
953      case kMvns:
954        if (!rn.IsPC()) {
955          UseScratchRegisterScope temps(this);
956          Register scratch = temps.Acquire();
957          HandleOutOfBoundsImmediate(cond, scratch, imm);
958          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
959          (this->*instruction)(cond, size, rn, scratch);
960          return;
961        }
962        break;
963      case kTst:
964        if (IsUsingA32() || !rn.IsPC()) {
965          UseScratchRegisterScope temps(this);
966          Register scratch = temps.Acquire();
967          HandleOutOfBoundsImmediate(cond, scratch, imm);
968          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
969          (this->*instruction)(cond, size, rn, scratch);
970          return;
971        }
972        break;
973      default:  // kSxtb, Sxth, Uxtb, Uxth
974        break;
975    }
976  }
977  Assembler::Delegate(type, instruction, cond, size, rn, operand);
978}
979
980
981void MacroAssembler::Delegate(InstructionType type,
982                              InstructionCondRROp instruction,
983                              Condition cond,
984                              Register rd,
985                              Register rn,
986                              const Operand& operand) {
987  // orn orns pkhbt pkhtb rsc rscs sxtab sxtab16 sxtah uxtab uxtab16 uxtah
988
989  if ((type == kSxtab) || (type == kSxtab16) || (type == kSxtah) ||
990      (type == kUxtab) || (type == kUxtab16) || (type == kUxtah) ||
991      (type == kPkhbt) || (type == kPkhtb)) {
992    UnimplementedDelegate(type);
993    return;
994  }
995
996  // This delegate only handles the following instructions.
997  VIXL_ASSERT((type == kOrn) || (type == kOrns) || (type == kRsc) ||
998              (type == kRscs));
999  CONTEXT_SCOPE;
1000
1001  // T32 does not support register shifted register operands, emulate it.
1002  if (IsUsingT32() && operand.IsRegisterShiftedRegister()) {
1003    InstructionCondRROp shiftop = NULL;
1004    switch (operand.GetShift().GetType()) {
1005      case LSL:
1006        shiftop = &Assembler::lsl;
1007        break;
1008      case LSR:
1009        shiftop = &Assembler::lsr;
1010        break;
1011      case ASR:
1012        shiftop = &Assembler::asr;
1013        break;
1014      case RRX:
1015        // A RegisterShiftedRegister operand cannot have a shift of type RRX.
1016        VIXL_UNREACHABLE();
1017        break;
1018      case ROR:
1019        shiftop = &Assembler::ror;
1020        break;
1021      default:
1022        VIXL_UNREACHABLE();
1023    }
1024    if (shiftop != NULL) {
1025      UseScratchRegisterScope temps(this);
1026      Register rm = operand.GetBaseRegister();
1027      Register rs = operand.GetShiftRegister();
1028      // If different from `rn`, we can make use of either `rd`, `rm` or `rs` as
1029      // a scratch register.
1030      if (!rd.Is(rn)) temps.Include(rd);
1031      if (!rm.Is(rn)) temps.Include(rm);
1032      if (!rs.Is(rn)) temps.Include(rs);
1033      Register scratch = temps.Acquire();
1034      // TODO: The scope length was measured empirically. We should analyse the
1035      // worst-case size and add targetted tests.
1036      CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1037      (this->*shiftop)(cond, scratch, rm, rs);
1038      (this->*instruction)(cond, rd, rn, scratch);
1039      return;
1040    }
1041  }
1042
1043  // T32 does not have a Rsc instruction, negate the lhs input and turn it into
1044  // an Adc. Adc and Rsc are equivalent using a bitwise NOT:
1045  //   adc rd, rn, operand <-> rsc rd, NOT(rn), operand
1046  if (IsUsingT32() && ((type == kRsc) || (type == kRscs))) {
1047    // The RegisterShiftRegister case should have been handled above.
1048    VIXL_ASSERT(!operand.IsRegisterShiftedRegister());
1049    UseScratchRegisterScope temps(this);
1050    Register negated_rn;
1051    if (operand.IsImmediate() || !operand.GetBaseRegister().Is(rn)) {
1052      // In this case, we can just negate `rn` instead of using a temporary
1053      // register.
1054      negated_rn = rn;
1055    } else {
1056      if (!rd.Is(rn)) temps.Include(rd);
1057      negated_rn = temps.Acquire();
1058    }
1059    {
1060      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1061      mvn(cond, negated_rn, rn);
1062    }
1063    if (type == kRsc) {
1064      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1065      adc(cond, rd, negated_rn, operand);
1066      return;
1067    }
1068    // TODO: We shouldn't have to specify how much space the next instruction
1069    // needs.
1070    CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1071    adcs(cond, rd, negated_rn, operand);
1072    return;
1073  }
1074
1075  // A32 does not have a Orn instruction, negate the rhs input and turn it into
1076  // a Orr.
1077  if (IsUsingA32() && ((type == kOrn) || (type == kOrns))) {
1078    // TODO: orn r0, r1, imm -> orr r0, r1, neg(imm) if doable
1079    //  mvn r0, r2
1080    //  orr r0, r1, r0
1081    Register scratch;
1082    UseScratchRegisterScope temps(this);
1083    // If different from `rn`, we can make use of source and destination
1084    // registers as a scratch register.
1085    if (!rd.Is(rn)) temps.Include(rd);
1086    if (!operand.IsImmediate() && !operand.GetBaseRegister().Is(rn)) {
1087      temps.Include(operand.GetBaseRegister());
1088    }
1089    if (operand.IsRegisterShiftedRegister() &&
1090        !operand.GetShiftRegister().Is(rn)) {
1091      temps.Include(operand.GetShiftRegister());
1092    }
1093    scratch = temps.Acquire();
1094    {
1095      // TODO: We shouldn't have to specify how much space the next instruction
1096      // needs.
1097      CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1098      mvn(cond, scratch, operand);
1099    }
1100    if (type == kOrns) {
1101      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1102      orrs(cond, rd, rn, scratch);
1103      return;
1104    }
1105    CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1106    orr(cond, rd, rn, scratch);
1107    return;
1108  }
1109  if (operand.IsImmediate()) {
1110    int32_t imm = operand.GetSignedImmediate();
1111
1112    // If the immediate can be encoded when inverted, turn Orn into Orr.
1113    // Otherwise rely on HandleOutOfBoundsImmediate to generate a series of
1114    // mov.
1115    if (IsUsingT32() && ((type == kOrn) || (type == kOrns)) &&
1116        ImmediateT32::IsImmediateT32(~imm)) {
1117      VIXL_ASSERT((type == kOrn) || (type == kOrns));
1118      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1119      switch (type) {
1120        case kOrn:
1121          orr(cond, rd, rn, ~imm);
1122          return;
1123        case kOrns:
1124          orrs(cond, rd, rn, ~imm);
1125          return;
1126        default:
1127          VIXL_UNREACHABLE();
1128          break;
1129      }
1130    } else {
1131      UseScratchRegisterScope temps(this);
1132      // Allow using the destination as a scratch register if possible.
1133      if (!rd.Is(rn)) temps.Include(rd);
1134      Register scratch = temps.Acquire();
1135      HandleOutOfBoundsImmediate(cond, scratch, imm);
1136      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1137      (this->*instruction)(cond, rd, rn, scratch);
1138      return;
1139    }
1140  }
1141  Assembler::Delegate(type, instruction, cond, rd, rn, operand);
1142}
1143
1144
1145void MacroAssembler::Delegate(InstructionType type,
1146                              InstructionCondSizeRROp instruction,
1147                              Condition cond,
1148                              EncodingSize size,
1149                              Register rd,
1150                              Register rn,
1151                              const Operand& operand) {
1152  // adc adcs add adds and_ ands asr asrs bic bics eor eors lsl lsls lsr lsrs
1153  // orr orrs ror rors rsb rsbs sbc sbcs sub subs
1154
1155  VIXL_ASSERT(
1156      (type == kAdc) || (type == kAdcs) || (type == kAdd) || (type == kAdds) ||
1157      (type == kAnd) || (type == kAnds) || (type == kAsr) || (type == kAsrs) ||
1158      (type == kBic) || (type == kBics) || (type == kEor) || (type == kEors) ||
1159      (type == kLsl) || (type == kLsls) || (type == kLsr) || (type == kLsrs) ||
1160      (type == kOrr) || (type == kOrrs) || (type == kRor) || (type == kRors) ||
1161      (type == kRsb) || (type == kRsbs) || (type == kSbc) || (type == kSbcs) ||
1162      (type == kSub) || (type == kSubs));
1163
1164  CONTEXT_SCOPE;
1165  VIXL_ASSERT(size.IsBest());
1166  if (IsUsingT32() && operand.IsRegisterShiftedRegister()) {
1167    InstructionCondRROp shiftop = NULL;
1168    switch (operand.GetShift().GetType()) {
1169      case LSL:
1170        shiftop = &Assembler::lsl;
1171        break;
1172      case LSR:
1173        shiftop = &Assembler::lsr;
1174        break;
1175      case ASR:
1176        shiftop = &Assembler::asr;
1177        break;
1178      case RRX:
1179        // A RegisterShiftedRegister operand cannot have a shift of type RRX.
1180        VIXL_UNREACHABLE();
1181        break;
1182      case ROR:
1183        shiftop = &Assembler::ror;
1184        break;
1185      default:
1186        VIXL_UNREACHABLE();
1187    }
1188    if (shiftop != NULL) {
1189      UseScratchRegisterScope temps(this);
1190      Register rm = operand.GetBaseRegister();
1191      Register rs = operand.GetShiftRegister();
1192      // If different from `rn`, we can make use of either `rd`, `rm` or `rs` as
1193      // a scratch register.
1194      if (!rd.Is(rn)) temps.Include(rd);
1195      if (!rm.Is(rn)) temps.Include(rm);
1196      if (!rs.Is(rn)) temps.Include(rs);
1197      Register scratch = temps.Acquire();
1198      CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
1199      (this->*shiftop)(cond, scratch, rm, rs);
1200      (this->*instruction)(cond, size, rd, rn, scratch);
1201      return;
1202    }
1203  }
1204  if (operand.IsImmediate()) {
1205    int32_t imm = operand.GetSignedImmediate();
1206    if (ImmediateT32::IsImmediateT32(~imm)) {
1207      if (IsUsingT32()) {
1208        switch (type) {
1209          case kOrr:
1210            orn(cond, rd, rn, ~imm);
1211            return;
1212          case kOrrs:
1213            orns(cond, rd, rn, ~imm);
1214            return;
1215          default:
1216            break;
1217        }
1218      }
1219    }
1220    if (imm < 0) {
1221      InstructionCondSizeRROp asmcb = NULL;
1222      // Add and sub are equivalent using an arithmetic negation:
1223      //   add rd, rn, #imm <-> sub rd, rn, - #imm
1224      // Add and sub with carry are equivalent using a bitwise NOT:
1225      //   adc rd, rn, #imm <-> sbc rd, rn, NOT #imm
1226      switch (type) {
1227        case kAdd:
1228          asmcb = &Assembler::sub;
1229          imm = -imm;
1230          break;
1231        case kAdds:
1232          asmcb = &Assembler::subs;
1233          imm = -imm;
1234          break;
1235        case kSub:
1236          asmcb = &Assembler::add;
1237          imm = -imm;
1238          break;
1239        case kSubs:
1240          asmcb = &Assembler::adds;
1241          imm = -imm;
1242          break;
1243        case kAdc:
1244          asmcb = &Assembler::sbc;
1245          imm = ~imm;
1246          break;
1247        case kAdcs:
1248          asmcb = &Assembler::sbcs;
1249          imm = ~imm;
1250          break;
1251        case kSbc:
1252          asmcb = &Assembler::adc;
1253          imm = ~imm;
1254          break;
1255        case kSbcs:
1256          asmcb = &Assembler::adcs;
1257          imm = ~imm;
1258          break;
1259        default:
1260          break;
1261      }
1262      if (asmcb != NULL) {
1263        CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1264        (this->*asmcb)(cond, size, rd, rn, Operand(imm));
1265        return;
1266      }
1267    }
1268    UseScratchRegisterScope temps(this);
1269    // Allow using the destination as a scratch register if possible.
1270    if (!rd.Is(rn)) temps.Include(rd);
1271    Register scratch = temps.Acquire();
1272    // TODO: The scope length was measured empirically. We should analyse the
1273    // worst-case size and add targetted tests.
1274    CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1275    mov(cond, scratch, operand.GetImmediate());
1276    (this->*instruction)(cond, size, rd, rn, scratch);
1277    return;
1278  }
1279  Assembler::Delegate(type, instruction, cond, size, rd, rn, operand);
1280}
1281
1282
1283void MacroAssembler::Delegate(InstructionType type,
1284                              InstructionRL instruction,
1285                              Register rn,
1286                              Label* label) {
1287  // cbz cbnz
1288  VIXL_ASSERT((type == kCbz) || (type == kCbnz));
1289
1290  CONTEXT_SCOPE;
1291  CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
1292  if (IsUsingA32()) {
1293    if (type == kCbz) {
1294      VIXL_ABORT_WITH_MSG("Cbz is only available for T32.\n");
1295    } else {
1296      VIXL_ABORT_WITH_MSG("Cbnz is only available for T32.\n");
1297    }
1298  } else if (rn.IsLow()) {
1299    switch (type) {
1300      case kCbnz: {
1301        Label done;
1302        cbz(rn, &done);
1303        b(label);
1304        Bind(&done);
1305        return;
1306      }
1307      case kCbz: {
1308        Label done;
1309        cbnz(rn, &done);
1310        b(label);
1311        Bind(&done);
1312        return;
1313      }
1314      default:
1315        break;
1316    }
1317  }
1318  Assembler::Delegate(type, instruction, rn, label);
1319}
1320
1321
1322template <typename T>
1323static inline bool IsI64BitPattern(T imm) {
1324  for (T mask = 0xff << ((sizeof(T) - 1) * 8); mask != 0; mask >>= 8) {
1325    if (((imm & mask) != mask) && ((imm & mask) != 0)) return false;
1326  }
1327  return true;
1328}
1329
1330
1331template <typename T>
1332static inline bool IsI8BitPattern(T imm) {
1333  uint8_t imm8 = imm & 0xff;
1334  for (unsigned rep = sizeof(T) - 1; rep > 0; rep--) {
1335    imm >>= 8;
1336    if ((imm & 0xff) != imm8) return false;
1337  }
1338  return true;
1339}
1340
1341
1342static inline bool CanBeInverted(uint32_t imm32) {
1343  uint32_t fill8 = 0;
1344
1345  if ((imm32 & 0xffffff00) == 0xffffff00) {
1346    //    11111111 11111111 11111111 abcdefgh
1347    return true;
1348  }
1349  if (((imm32 & 0xff) == 0) || ((imm32 & 0xff) == 0xff)) {
1350    fill8 = imm32 & 0xff;
1351    imm32 >>= 8;
1352    if ((imm32 >> 8) == 0xffff) {
1353      //    11111111 11111111 abcdefgh 00000000
1354      // or 11111111 11111111 abcdefgh 11111111
1355      return true;
1356    }
1357    if ((imm32 & 0xff) == fill8) {
1358      imm32 >>= 8;
1359      if ((imm32 >> 8) == 0xff) {
1360        //    11111111 abcdefgh 00000000 00000000
1361        // or 11111111 abcdefgh 11111111 11111111
1362        return true;
1363      }
1364      if ((fill8 == 0xff) && ((imm32 & 0xff) == 0xff)) {
1365        //    abcdefgh 11111111 11111111 11111111
1366        return true;
1367      }
1368    }
1369  }
1370  return false;
1371}
1372
1373
1374template <typename RES, typename T>
1375static inline RES replicate(T imm) {
1376  VIXL_ASSERT((sizeof(RES) > sizeof(T)) &&
1377              (((sizeof(RES) / sizeof(T)) * sizeof(T)) == sizeof(RES)));
1378  RES res = imm;
1379  for (unsigned i = sizeof(RES) / sizeof(T) - 1; i > 0; i--) {
1380    res = (res << (sizeof(T) * 8)) | imm;
1381  }
1382  return res;
1383}
1384
1385
1386void MacroAssembler::Delegate(InstructionType type,
1387                              InstructionCondDtSSop instruction,
1388                              Condition cond,
1389                              DataType dt,
1390                              SRegister rd,
1391                              const SOperand& operand) {
1392  CONTEXT_SCOPE;
1393  if (type == kVmov) {
1394    if (operand.IsImmediate() && dt.Is(F32)) {
1395      const NeonImmediate& neon_imm = operand.GetNeonImmediate();
1396      if (neon_imm.CanConvert<float>()) {
1397        // movw ip, imm16
1398        // movk ip, imm16
1399        // vmov s0, ip
1400        UseScratchRegisterScope temps(this);
1401        Register scratch = temps.Acquire();
1402        float f = neon_imm.GetImmediate<float>();
1403        // TODO: The scope length was measured empirically. We should analyse
1404        // the
1405        // worst-case size and add targetted tests.
1406        CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1407        mov(cond, scratch, FloatToRawbits(f));
1408        vmov(cond, rd, scratch);
1409        return;
1410      }
1411    }
1412  }
1413  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
1414}
1415
1416
1417void MacroAssembler::Delegate(InstructionType type,
1418                              InstructionCondDtDDop instruction,
1419                              Condition cond,
1420                              DataType dt,
1421                              DRegister rd,
1422                              const DOperand& operand) {
1423  CONTEXT_SCOPE;
1424  if (type == kVmov) {
1425    if (operand.IsImmediate()) {
1426      const NeonImmediate& neon_imm = operand.GetNeonImmediate();
1427      switch (dt.GetValue()) {
1428        case I32:
1429          if (neon_imm.CanConvert<uint32_t>()) {
1430            uint32_t imm = neon_imm.GetImmediate<uint32_t>();
1431            // vmov.i32 d0, 0xabababab will translate into vmov.i8 d0, 0xab
1432            if (IsI8BitPattern(imm)) {
1433              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1434              vmov(cond, I8, rd, imm & 0xff);
1435              return;
1436            }
1437            // vmov.i32 d0, 0xff0000ff will translate into
1438            // vmov.i64 d0, 0xff0000ffff0000ff
1439            if (IsI64BitPattern(imm)) {
1440              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1441              vmov(cond, I64, rd, replicate<uint64_t>(imm));
1442              return;
1443            }
1444            // vmov.i32 d0, 0xffab0000 will translate into
1445            // vmvn.i32 d0, 0x0054ffff
1446            if (cond.Is(al) && CanBeInverted(imm)) {
1447              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1448              vmvn(I32, rd, ~imm);
1449              return;
1450            }
1451          }
1452          break;
1453        case I16:
1454          if (neon_imm.CanConvert<uint16_t>()) {
1455            uint16_t imm = neon_imm.GetImmediate<uint16_t>();
1456            // vmov.i16 d0, 0xabab will translate into vmov.i8 d0, 0xab
1457            if (IsI8BitPattern(imm)) {
1458              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1459              vmov(cond, I8, rd, imm & 0xff);
1460              return;
1461            }
1462          }
1463          break;
1464        case I64:
1465          if (neon_imm.CanConvert<uint64_t>()) {
1466            uint64_t imm = neon_imm.GetImmediate<uint64_t>();
1467            // vmov.i64 d0, -1 will translate into vmov.i8 d0, 0xff
1468            if (IsI8BitPattern(imm)) {
1469              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1470              vmov(cond, I8, rd, imm & 0xff);
1471              return;
1472            }
1473            // mov ip, lo(imm64)
1474            // vdup d0, ip
1475            // vdup is prefered to 'vmov d0[0]' as d0[1] does not need to be
1476            // preserved
1477            {
1478              UseScratchRegisterScope temps(this);
1479              Register scratch = temps.Acquire();
1480              {
1481                // TODO: The scope length was measured empirically. We should
1482                // analyse the
1483                // worst-case size and add targetted tests.
1484                CodeBufferCheckScope scope(this,
1485                                           2 * kMaxInstructionSizeInBytes);
1486                mov(cond, scratch, static_cast<uint32_t>(imm & 0xffffffff));
1487              }
1488              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1489              vdup(cond, Untyped32, rd, scratch);
1490            }
1491            // mov ip, hi(imm64)
1492            // vmov d0[1], ip
1493            {
1494              UseScratchRegisterScope temps(this);
1495              Register scratch = temps.Acquire();
1496              {
1497                // TODO: The scope length was measured empirically. We should
1498                // analyse the
1499                // worst-case size and add targetted tests.
1500                CodeBufferCheckScope scope(this,
1501                                           2 * kMaxInstructionSizeInBytes);
1502                mov(cond, scratch, static_cast<uint32_t>(imm >> 32));
1503              }
1504              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1505              vmov(cond, Untyped32, DRegisterLane(rd, 1), scratch);
1506            }
1507            return;
1508          }
1509          break;
1510        default:
1511          break;
1512      }
1513      if ((dt.Is(I8) || dt.Is(I16) || dt.Is(I32)) &&
1514          neon_imm.CanConvert<uint32_t>()) {
1515        // mov ip, imm32
1516        // vdup.8 d0, ip
1517        UseScratchRegisterScope temps(this);
1518        Register scratch = temps.Acquire();
1519        {
1520          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1521          mov(cond, scratch, neon_imm.GetImmediate<uint32_t>());
1522        }
1523        DataTypeValue vdup_dt = Untyped32;
1524        switch (dt.GetValue()) {
1525          case I8:
1526            vdup_dt = Untyped8;
1527            break;
1528          case I16:
1529            vdup_dt = Untyped16;
1530            break;
1531          case I32:
1532            vdup_dt = Untyped32;
1533            break;
1534          default:
1535            VIXL_UNREACHABLE();
1536        }
1537        CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1538        vdup(cond, vdup_dt, rd, scratch);
1539        return;
1540      }
1541      if (dt.Is(F32) && neon_imm.CanConvert<float>()) {
1542        float f = neon_imm.GetImmediate<float>();
1543        // Punt to vmov.i32
1544        // TODO: The scope length was guessed based on the double case below. We
1545        // should analyse the worst-case size and add targetted tests.
1546        CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1547        vmov(cond, I32, rd, FloatToRawbits(f));
1548        return;
1549      }
1550      if (dt.Is(F64) && neon_imm.CanConvert<double>()) {
1551        // Punt to vmov.i64
1552        double d = neon_imm.GetImmediate<double>();
1553        // TODO: The scope length was measured empirically. We should analyse
1554        // the
1555        // worst-case size and add targetted tests.
1556        CodeBufferCheckScope scope(this, 6 * kMaxInstructionSizeInBytes);
1557        vmov(cond, I64, rd, DoubleToRawbits(d));
1558        return;
1559      }
1560    }
1561  }
1562  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
1563}
1564
1565
1566void MacroAssembler::Delegate(InstructionType type,
1567                              InstructionCondDtQQop instruction,
1568                              Condition cond,
1569                              DataType dt,
1570                              QRegister rd,
1571                              const QOperand& operand) {
1572  CONTEXT_SCOPE;
1573  if (type == kVmov) {
1574    if (operand.IsImmediate()) {
1575      const NeonImmediate& neon_imm = operand.GetNeonImmediate();
1576      switch (dt.GetValue()) {
1577        case I32:
1578          if (neon_imm.CanConvert<uint32_t>()) {
1579            uint32_t imm = neon_imm.GetImmediate<uint32_t>();
1580            // vmov.i32 d0, 0xabababab will translate into vmov.i8 d0, 0xab
1581            if (IsI8BitPattern(imm)) {
1582              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1583              vmov(cond, I8, rd, imm & 0xff);
1584              return;
1585            }
1586            // vmov.i32 d0, 0xff0000ff will translate into
1587            // vmov.i64 d0, 0xff0000ffff0000ff
1588            if (IsI64BitPattern(imm)) {
1589              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1590              vmov(cond, I64, rd, replicate<uint64_t>(imm));
1591              return;
1592            }
1593            // vmov.i32 d0, 0xffab0000 will translate into
1594            // vmvn.i32 d0, 0x0054ffff
1595            if (CanBeInverted(imm)) {
1596              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1597              vmvn(cond, I32, rd, ~imm);
1598              return;
1599            }
1600          }
1601          break;
1602        case I16:
1603          if (neon_imm.CanConvert<uint16_t>()) {
1604            uint16_t imm = neon_imm.GetImmediate<uint16_t>();
1605            // vmov.i16 d0, 0xabab will translate into vmov.i8 d0, 0xab
1606            if (IsI8BitPattern(imm)) {
1607              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1608              vmov(cond, I8, rd, imm & 0xff);
1609              return;
1610            }
1611          }
1612          break;
1613        case I64:
1614          if (neon_imm.CanConvert<uint64_t>()) {
1615            uint64_t imm = neon_imm.GetImmediate<uint64_t>();
1616            // vmov.i64 d0, -1 will translate into vmov.i8 d0, 0xff
1617            if (IsI8BitPattern(imm)) {
1618              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1619              vmov(cond, I8, rd, imm & 0xff);
1620              return;
1621            }
1622            // mov ip, lo(imm64)
1623            // vdup q0, ip
1624            // vdup is prefered to 'vmov d0[0]' as d0[1-3] don't need to be
1625            // preserved
1626            {
1627              UseScratchRegisterScope temps(this);
1628              Register scratch = temps.Acquire();
1629              {
1630                CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1631                mov(cond, scratch, static_cast<uint32_t>(imm & 0xffffffff));
1632              }
1633              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1634              vdup(cond, Untyped32, rd, scratch);
1635            }
1636            // mov ip, hi(imm64)
1637            // vmov.i32 d0[1], ip
1638            // vmov d1, d0
1639            {
1640              UseScratchRegisterScope temps(this);
1641              Register scratch = temps.Acquire();
1642              {
1643                CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1644                mov(cond, scratch, static_cast<uint32_t>(imm >> 32));
1645              }
1646              {
1647                CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1648                vmov(cond,
1649                     Untyped32,
1650                     DRegisterLane(rd.GetLowDRegister(), 1),
1651                     scratch);
1652              }
1653              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1654              vmov(cond, F64, rd.GetHighDRegister(), rd.GetLowDRegister());
1655            }
1656            return;
1657          }
1658          break;
1659        default:
1660          break;
1661      }
1662      if ((dt.Is(I8) || dt.Is(I16) || dt.Is(I32)) &&
1663          neon_imm.CanConvert<uint32_t>()) {
1664        // mov ip, imm32
1665        // vdup.8 d0, ip
1666        UseScratchRegisterScope temps(this);
1667        Register scratch = temps.Acquire();
1668        {
1669          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1670          mov(cond, scratch, neon_imm.GetImmediate<uint32_t>());
1671        }
1672        DataTypeValue vdup_dt = Untyped32;
1673        switch (dt.GetValue()) {
1674          case I8:
1675            vdup_dt = Untyped8;
1676            break;
1677          case I16:
1678            vdup_dt = Untyped16;
1679            break;
1680          case I32:
1681            vdup_dt = Untyped32;
1682            break;
1683          default:
1684            VIXL_UNREACHABLE();
1685        }
1686        CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1687        vdup(cond, vdup_dt, rd, scratch);
1688        return;
1689      }
1690      if (dt.Is(F32) && neon_imm.CanConvert<float>()) {
1691        // Punt to vmov.i64
1692        float f = neon_imm.GetImmediate<float>();
1693        CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1694        vmov(cond, I32, rd, FloatToRawbits(f));
1695        return;
1696      }
1697      if (dt.Is(F64) && neon_imm.CanConvert<double>()) {
1698        // Punt to vmov.i64
1699        double d = neon_imm.GetImmediate<double>();
1700        CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1701        vmov(cond, I64, rd, DoubleToRawbits(d));
1702        return;
1703      }
1704    }
1705  }
1706  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
1707}
1708
1709
1710void MacroAssembler::Delegate(InstructionType type,
1711                              InstructionCondSizeRMop instruction,
1712                              Condition cond,
1713                              EncodingSize size,
1714                              Register rd,
1715                              const MemOperand& operand) {
1716  // ldr ldrb ldrh ldrsb ldrsh str strb strh
1717  CONTEXT_SCOPE;
1718  VIXL_ASSERT(size.IsBest());
1719  VIXL_ASSERT((type == kLdr) || (type == kLdrb) || (type == kLdrh) ||
1720              (type == kLdrsb) || (type == kLdrsh) || (type == kStr) ||
1721              (type == kStrb) || (type == kStrh));
1722  if (operand.IsImmediate()) {
1723    const Register& rn = operand.GetBaseRegister();
1724    AddrMode addrmode = operand.GetAddrMode();
1725    int32_t offset = operand.GetOffsetImmediate();
1726    uint32_t mask = 0;
1727    switch (type) {
1728      case kLdr:
1729      case kLdrb:
1730      case kStr:
1731      case kStrb:
1732        if (IsUsingA32() || (addrmode == Offset)) {
1733          mask = 0xfff;
1734        } else {
1735          mask = 0xff;
1736        }
1737        break;
1738      case kLdrsb:
1739      case kLdrh:
1740      case kLdrsh:
1741      case kStrh:
1742        if (IsUsingT32() && (addrmode == Offset)) {
1743          mask = 0xfff;
1744        } else {
1745          mask = 0xff;
1746        }
1747        break;
1748      default:
1749        VIXL_UNREACHABLE();
1750        return;
1751    }
1752    bool negative;
1753    // Try to maximize the offset use by the MemOperand (load_store_offset).
1754    // Add or subtract the part which can't be used by the MemOperand
1755    // (add_sub_offset).
1756    int32_t add_sub_offset;
1757    int32_t load_store_offset;
1758    load_store_offset = offset & mask;
1759    if (offset >= 0) {
1760      negative = false;
1761      add_sub_offset = offset & ~mask;
1762    } else {
1763      negative = true;
1764      add_sub_offset = -offset & ~mask;
1765      if (load_store_offset > 0) add_sub_offset += mask + 1;
1766    }
1767    switch (addrmode) {
1768      case PreIndex:
1769        // Avoid the unpredictable case 'str r0, [r0, imm]!'
1770        if (!rn.Is(rd)) {
1771          // Pre-Indexed case:
1772          // ldr r0, [r1, 12345]! will translate into
1773          //   add r1, r1, 12345
1774          //   ldr r0, [r1]
1775          {
1776            CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1777            if (negative) {
1778              sub(cond, rn, rn, add_sub_offset);
1779            } else {
1780              add(cond, rn, rn, add_sub_offset);
1781            }
1782          }
1783          {
1784            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1785            (this->*instruction)(cond,
1786                                 size,
1787                                 rd,
1788                                 MemOperand(rn, load_store_offset, PreIndex));
1789          }
1790          return;
1791        }
1792        break;
1793      case Offset: {
1794        UseScratchRegisterScope temps(this);
1795        // Allow using the destination as a scratch register if possible.
1796        if ((type != kStr) && (type != kStrb) && (type != kStrh) &&
1797            !rd.Is(rn)) {
1798          temps.Include(rd);
1799        }
1800        Register scratch = temps.Acquire();
1801        // Offset case:
1802        // ldr r0, [r1, 12345] will translate into
1803        //   add r0, r1, 12345
1804        //   ldr r0, [r0]
1805        {
1806          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1807          if (negative) {
1808            sub(cond, scratch, rn, add_sub_offset);
1809          } else {
1810            add(cond, scratch, rn, add_sub_offset);
1811          }
1812        }
1813        {
1814          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1815          (this->*instruction)(cond,
1816                               size,
1817                               rd,
1818                               MemOperand(scratch, load_store_offset));
1819        }
1820        return;
1821      }
1822      case PostIndex:
1823        // Avoid the unpredictable case 'ldr r0, [r0], imm'
1824        if (!rn.Is(rd)) {
1825          // Post-indexed case:
1826          // ldr r0. [r1], imm32 will translate into
1827          //   ldr r0, [r1]
1828          //   movw ip. imm32 & 0xffffffff
1829          //   movt ip, imm32 >> 16
1830          //   add r1, r1, ip
1831          {
1832            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1833            (this->*instruction)(cond,
1834                                 size,
1835                                 rd,
1836                                 MemOperand(rn, load_store_offset, PostIndex));
1837          }
1838          {
1839            CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1840            if (negative) {
1841              sub(cond, rn, rn, add_sub_offset);
1842            } else {
1843              add(cond, rn, rn, add_sub_offset);
1844            }
1845          }
1846          return;
1847        }
1848        break;
1849    }
1850  } else if (operand.IsPlainRegister()) {
1851    const Register& rn = operand.GetBaseRegister();
1852    AddrMode addrmode = operand.GetAddrMode();
1853    const Register& rm = operand.GetOffsetRegister();
1854    if (rm.IsPC()) {
1855      VIXL_ABORT_WITH_MSG(
1856          "The MacroAssembler does not convert loads and stores with a PC "
1857          "offset register.\n");
1858    }
1859    if (rn.IsPC() && addrmode != Offset) {
1860      VIXL_ABORT_WITH_MSG(
1861          "The MacroAssembler does not convert loads and stores with a PC "
1862          "base register in pre-index or post-index mode.\n");
1863    }
1864    switch (addrmode) {
1865      case PreIndex:
1866        // Avoid the unpredictable case 'str r0, [r0, imm]!'
1867        if (!rn.Is(rd)) {
1868          // Pre-Indexed case:
1869          // ldr r0, [r1, r2]! will translate into
1870          //   add r1, r1, r2
1871          //   ldr r0, [r1]
1872          {
1873            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1874            if (operand.GetSign().IsPlus()) {
1875              add(cond, rn, rn, rm);
1876            } else {
1877              sub(cond, rn, rn, rm);
1878            }
1879          }
1880          {
1881            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1882            (this->*instruction)(cond, size, rd, MemOperand(rn, Offset));
1883          }
1884          return;
1885        }
1886        break;
1887      case Offset: {
1888        UseScratchRegisterScope temps(this);
1889        // Allow using the destination as a scratch register if this is not a
1890        // store.
1891        // Avoid using PC as a temporary as this has side-effects.
1892        if ((type != kStr) && (type != kStrb) && (type != kStrh) &&
1893            !rd.IsPC()) {
1894          temps.Include(rd);
1895        }
1896        Register scratch = temps.Acquire();
1897        // Offset case:
1898        // ldr r0, [r1, r2] will translate into
1899        //   add r0, r1, r2
1900        //   ldr r0, [r0]
1901        {
1902          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1903          if (operand.GetSign().IsPlus()) {
1904            add(cond, scratch, rn, rm);
1905          } else {
1906            sub(cond, scratch, rn, rm);
1907          }
1908        }
1909        {
1910          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1911          (this->*instruction)(cond, size, rd, MemOperand(scratch, Offset));
1912        }
1913        return;
1914      }
1915      case PostIndex:
1916        // Avoid the unpredictable case 'ldr r0, [r0], imm'
1917        if (!rn.Is(rd)) {
1918          // Post-indexed case:
1919          // ldr r0. [r1], r2 will translate into
1920          //   ldr r0, [r1]
1921          //   add r1, r1, r2
1922          {
1923            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1924            (this->*instruction)(cond, size, rd, MemOperand(rn, Offset));
1925          }
1926          {
1927            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1928            if (operand.GetSign().IsPlus()) {
1929              add(cond, rn, rn, rm);
1930            } else {
1931              sub(cond, rn, rn, rm);
1932            }
1933          }
1934          return;
1935        }
1936        break;
1937    }
1938  }
1939  Assembler::Delegate(type, instruction, cond, size, rd, operand);
1940}
1941
1942
1943void MacroAssembler::Delegate(InstructionType type,
1944                              InstructionCondRRMop instruction,
1945                              Condition cond,
1946                              Register rt,
1947                              Register rt2,
1948                              const MemOperand& operand) {
1949  // ldaexd, ldrd, ldrexd, stlex, stlexb, stlexh, strd, strex, strexb, strexh
1950
1951  if ((type == kLdaexd) || (type == kLdrexd) || (type == kStlex) ||
1952      (type == kStlexb) || (type == kStlexh) || (type == kStrex) ||
1953      (type == kStrexb) || (type == kStrexh)) {
1954    UnimplementedDelegate(type);
1955    return;
1956  }
1957
1958  VIXL_ASSERT((type == kLdrd) || (type == kStrd));
1959
1960  CONTEXT_SCOPE;
1961
1962  // TODO: Should we allow these cases?
1963  if (IsUsingA32()) {
1964    // The first register needs to be even.
1965    if ((rt.GetCode() & 1) != 0) {
1966      UnimplementedDelegate(type);
1967      return;
1968    }
1969    // Registers need to be adjacent.
1970    if (((rt.GetCode() + 1) % kNumberOfRegisters) != rt2.GetCode()) {
1971      UnimplementedDelegate(type);
1972      return;
1973    }
1974    // LDRD lr, pc [...] is not allowed.
1975    if (rt.Is(lr)) {
1976      UnimplementedDelegate(type);
1977      return;
1978    }
1979  }
1980
1981  if (operand.IsImmediate()) {
1982    const Register& rn = operand.GetBaseRegister();
1983    AddrMode addrmode = operand.GetAddrMode();
1984    int32_t offset = operand.GetOffsetImmediate();
1985    switch (addrmode) {
1986      case PreIndex: {
1987        // Allow using the destinations as a scratch registers if possible.
1988        UseScratchRegisterScope temps(this);
1989        if (type == kLdrd) {
1990          if (!rt.Is(rn)) temps.Include(rt);
1991          if (!rt2.Is(rn)) temps.Include(rt2);
1992        }
1993
1994        // Pre-Indexed case:
1995        // ldrd r0, r1, [r2, 12345]! will translate into
1996        //   add r2, 12345
1997        //   ldrd r0, r1, [r2]
1998        {
1999          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2000          add(cond, rn, rn, offset);
2001        }
2002        {
2003          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2004          (this->*instruction)(cond, rt, rt2, MemOperand(rn, Offset));
2005        }
2006        return;
2007      }
2008      case Offset: {
2009        UseScratchRegisterScope temps(this);
2010        // Allow using the destinations as a scratch registers if possible.
2011        if (type == kLdrd) {
2012          if (!rt.Is(rn)) temps.Include(rt);
2013          if (!rt2.Is(rn)) temps.Include(rt2);
2014        }
2015        Register scratch = temps.Acquire();
2016        // Offset case:
2017        // ldrd r0, r1, [r2, 12345] will translate into
2018        //   add r0, r2, 12345
2019        //   ldrd r0, r1, [r0]
2020        {
2021          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2022          add(cond, scratch, rn, offset);
2023        }
2024        {
2025          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2026          (this->*instruction)(cond, rt, rt2, MemOperand(scratch, Offset));
2027        }
2028        return;
2029      }
2030      case PostIndex:
2031        // Avoid the unpredictable case 'ldrd r0, r1, [r0], imm'
2032        if (!rn.Is(rt) && !rn.Is(rt2)) {
2033          // Post-indexed case:
2034          // ldrd r0, r1, [r2], imm32 will translate into
2035          //   ldrd r0, r1, [r2]
2036          //   movw ip. imm32 & 0xffffffff
2037          //   movt ip, imm32 >> 16
2038          //   add r2, ip
2039          {
2040            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2041            (this->*instruction)(cond, rt, rt2, MemOperand(rn, Offset));
2042          }
2043          {
2044            CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2045            add(cond, rn, rn, offset);
2046          }
2047          return;
2048        }
2049        break;
2050    }
2051  }
2052  if (operand.IsPlainRegister()) {
2053    const Register& rn = operand.GetBaseRegister();
2054    const Register& rm = operand.GetOffsetRegister();
2055    AddrMode addrmode = operand.GetAddrMode();
2056    switch (addrmode) {
2057      case PreIndex:
2058        // ldrd r0, r1, [r2, r3]! will translate into
2059        //   add r2, r3
2060        //   ldrd r0, r1, [r2]
2061        {
2062          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2063          if (operand.GetSign().IsPlus()) {
2064            add(cond, rn, rn, rm);
2065          } else {
2066            sub(cond, rn, rn, rm);
2067          }
2068        }
2069        {
2070          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2071          (this->*instruction)(cond, rt, rt2, MemOperand(rn, Offset));
2072        }
2073        return;
2074      case PostIndex:
2075        // ldrd r0, r1, [r2], r3 will translate into
2076        //   ldrd r0, r1, [r2]
2077        //   add r2, r3
2078        {
2079          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2080          (this->*instruction)(cond, rt, rt2, MemOperand(rn, Offset));
2081        }
2082        {
2083          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2084          if (operand.GetSign().IsPlus()) {
2085            add(cond, rn, rn, rm);
2086          } else {
2087            sub(cond, rn, rn, rm);
2088          }
2089        }
2090        return;
2091      case Offset: {
2092        UseScratchRegisterScope temps(this);
2093        // Allow using the destinations as a scratch registers if possible.
2094        if (type == kLdrd) {
2095          if (!rt.Is(rn)) temps.Include(rt);
2096          if (!rt2.Is(rn)) temps.Include(rt2);
2097        }
2098        Register scratch = temps.Acquire();
2099        // Offset case:
2100        // ldrd r0, r1, [r2, r3] will translate into
2101        //   add r0, r2, r3
2102        //   ldrd r0, r1, [r0]
2103        {
2104          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2105          if (operand.GetSign().IsPlus()) {
2106            add(cond, scratch, rn, rm);
2107          } else {
2108            sub(cond, scratch, rn, rm);
2109          }
2110        }
2111        {
2112          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2113          (this->*instruction)(cond, rt, rt2, MemOperand(scratch, Offset));
2114        }
2115        return;
2116      }
2117    }
2118  }
2119  Assembler::Delegate(type, instruction, cond, rt, rt2, operand);
2120}
2121
2122
2123void MacroAssembler::Delegate(InstructionType type,
2124                              InstructionCondDtSMop instruction,
2125                              Condition cond,
2126                              DataType dt,
2127                              SRegister rd,
2128                              const MemOperand& operand) {
2129  // vldr.32 vstr.32
2130  CONTEXT_SCOPE;
2131  if (operand.IsImmediate()) {
2132    const Register& rn = operand.GetBaseRegister();
2133    AddrMode addrmode = operand.GetAddrMode();
2134    int32_t offset = operand.GetOffsetImmediate();
2135    VIXL_ASSERT(((offset > 0) && operand.GetSign().IsPlus()) ||
2136                ((offset < 0) && operand.GetSign().IsMinus()) || (offset == 0));
2137    if (rn.IsPC()) {
2138      VIXL_ABORT_WITH_MSG(
2139          "The MacroAssembler does not convert vldr or vstr with a PC base "
2140          "register.\n");
2141    }
2142    switch (addrmode) {
2143      case PreIndex:
2144        // Pre-Indexed case:
2145        // vldr.32 s0, [r1, 12345]! will translate into
2146        //   add r1, 12345
2147        //   vldr.32 s0, [r1]
2148        if (offset != 0) {
2149          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2150          add(cond, rn, rn, offset);
2151        }
2152        {
2153          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2154          (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
2155        }
2156        return;
2157      case Offset: {
2158        UseScratchRegisterScope temps(this);
2159        Register scratch = temps.Acquire();
2160        // Offset case:
2161        // vldr.32 s0, [r1, 12345] will translate into
2162        //   add ip, r1, 12345
2163        //   vldr.32 s0, [ip]
2164        {
2165          VIXL_ASSERT(offset != 0);
2166          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2167          add(cond, scratch, rn, offset);
2168        }
2169        {
2170          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2171          (this->*instruction)(cond, dt, rd, MemOperand(scratch, Offset));
2172        }
2173        return;
2174      }
2175      case PostIndex:
2176        // Post-indexed case:
2177        // vldr.32 s0, [r1], imm32 will translate into
2178        //   vldr.32 s0, [r1]
2179        //   movw ip. imm32 & 0xffffffff
2180        //   movt ip, imm32 >> 16
2181        //   add r1, ip
2182        {
2183          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2184          (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
2185        }
2186        if (offset != 0) {
2187          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2188          add(cond, rn, rn, offset);
2189        }
2190        return;
2191    }
2192  }
2193  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
2194}
2195
2196
2197void MacroAssembler::Delegate(InstructionType type,
2198                              InstructionCondDtDMop instruction,
2199                              Condition cond,
2200                              DataType dt,
2201                              DRegister rd,
2202                              const MemOperand& operand) {
2203  // vldr.64 vstr.64
2204  CONTEXT_SCOPE;
2205  if (operand.IsImmediate()) {
2206    const Register& rn = operand.GetBaseRegister();
2207    AddrMode addrmode = operand.GetAddrMode();
2208    int32_t offset = operand.GetOffsetImmediate();
2209    VIXL_ASSERT(((offset > 0) && operand.GetSign().IsPlus()) ||
2210                ((offset < 0) && operand.GetSign().IsMinus()) || (offset == 0));
2211    if (rn.IsPC()) {
2212      VIXL_ABORT_WITH_MSG(
2213          "The MacroAssembler does not convert vldr or vstr with a PC base "
2214          "register.\n");
2215    }
2216    switch (addrmode) {
2217      case PreIndex:
2218        // Pre-Indexed case:
2219        // vldr.64 d0, [r1, 12345]! will translate into
2220        //   add r1, 12345
2221        //   vldr.64 d0, [r1]
2222        if (offset != 0) {
2223          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2224          add(cond, rn, rn, offset);
2225        }
2226        {
2227          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2228          (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
2229        }
2230        return;
2231      case Offset: {
2232        UseScratchRegisterScope temps(this);
2233        Register scratch = temps.Acquire();
2234        // Offset case:
2235        // vldr.64 d0, [r1, 12345] will translate into
2236        //   add ip, r1, 12345
2237        //   vldr.32 s0, [ip]
2238        {
2239          VIXL_ASSERT(offset != 0);
2240          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2241          add(cond, scratch, rn, offset);
2242        }
2243        {
2244          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2245          (this->*instruction)(cond, dt, rd, MemOperand(scratch, Offset));
2246        }
2247        return;
2248      }
2249      case PostIndex:
2250        // Post-indexed case:
2251        // vldr.64 d0. [r1], imm32 will translate into
2252        //   vldr.64 d0, [r1]
2253        //   movw ip. imm32 & 0xffffffff
2254        //   movt ip, imm32 >> 16
2255        //   add r1, ip
2256        {
2257          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2258          (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
2259        }
2260        if (offset != 0) {
2261          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2262          add(cond, rn, rn, offset);
2263        }
2264        return;
2265    }
2266  }
2267  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
2268}
2269
2270
2271void MacroAssembler::Delegate(InstructionType type,
2272                              InstructionCondMsrOp instruction,
2273                              Condition cond,
2274                              MaskedSpecialRegister spec_reg,
2275                              const Operand& operand) {
2276  USE(type);
2277  VIXL_ASSERT(type == kMsr);
2278  if (operand.IsImmediate()) {
2279    UseScratchRegisterScope temps(this);
2280    Register scratch = temps.Acquire();
2281    {
2282      CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
2283      mov(cond, scratch, operand);
2284    }
2285    CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2286    msr(cond, spec_reg, scratch);
2287    return;
2288  }
2289  Assembler::Delegate(type, instruction, cond, spec_reg, operand);
2290}
2291
2292#undef CONTEXT_SCOPE
2293#undef TOSTRING
2294#undef STRINGIFY
2295
2296// Start of generated code.
2297// End of generated code.
2298}  // namespace aarch32
2299}  // namespace vixl
2300