macro-assembler-aarch32.cc revision b21b7623a9fb6d4c627aec1e3a0b51f278ef0a72
1// Copyright 2015, VIXL authors
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are met:
6//
7//   * Redistributions of source code must retain the above copyright notice,
8//     this list of conditions and the following disclaimer.
9//   * Redistributions in binary form must reproduce the above copyright
10//     notice, this list of conditions and the following disclaimer in the
11//     documentation and/or other materials provided with the distribution.
12//   * Neither the name of ARM Limited nor the names of its contributors may
13//     be used to endorse or promote products derived from this software
14//     without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26// POSSIBILITY OF SUCH DAMAGE.
27
28#include "aarch32/macro-assembler-aarch32.h"
29
30#define STRINGIFY(x) #x
31#define TOSTRING(x) STRINGIFY(x)
32
33#define CONTEXT_SCOPE \
34  ContextScope context(this, __FILE__ ":" TOSTRING(__LINE__))
35
36namespace vixl {
37namespace aarch32 {
38
39void UseScratchRegisterScope::Open(MacroAssembler* masm) {
40  VIXL_ASSERT((available_ == NULL) && (available_vfp_ == NULL));
41  available_ = masm->GetScratchRegisterList();
42  old_available_ = available_->GetList();
43  available_vfp_ = masm->GetScratchVRegisterList();
44  old_available_vfp_ = available_vfp_->GetList();
45}
46
47
48void UseScratchRegisterScope::Close() {
49  if (available_ != NULL) {
50    available_->SetList(old_available_);
51    available_ = NULL;
52  }
53  if (available_vfp_ != NULL) {
54    available_vfp_->SetList(old_available_vfp_);
55    available_vfp_ = NULL;
56  }
57}
58
59
60bool UseScratchRegisterScope::IsAvailable(const Register& reg) const {
61  VIXL_ASSERT(available_ != NULL);
62  VIXL_ASSERT(reg.IsValid());
63  return available_->Includes(reg);
64}
65
66
67bool UseScratchRegisterScope::IsAvailable(const VRegister& reg) const {
68  VIXL_ASSERT(available_vfp_ != NULL);
69  VIXL_ASSERT(reg.IsValid());
70  return available_vfp_->IncludesAllOf(reg);
71}
72
73
74Register UseScratchRegisterScope::Acquire() {
75  VIXL_ASSERT(available_ != NULL);
76  VIXL_CHECK(!available_->IsEmpty());
77  Register reg = available_->GetFirstAvailableRegister();
78  available_->Remove(reg);
79  return reg;
80}
81
82
83VRegister UseScratchRegisterScope::AcquireV(unsigned size_in_bits) {
84  switch (size_in_bits) {
85    case kSRegSizeInBits:
86      return AcquireS();
87    case kDRegSizeInBits:
88      return AcquireD();
89    case kQRegSizeInBits:
90      return AcquireQ();
91    default:
92      VIXL_UNREACHABLE();
93      return NoVReg;
94  }
95}
96
97
98QRegister UseScratchRegisterScope::AcquireQ() {
99  VIXL_ASSERT(available_vfp_ != NULL);
100  VIXL_CHECK(!available_vfp_->IsEmpty());
101  QRegister reg = available_vfp_->GetFirstAvailableQRegister();
102  available_vfp_->Remove(reg);
103  return reg;
104}
105
106
107DRegister UseScratchRegisterScope::AcquireD() {
108  VIXL_ASSERT(available_vfp_ != NULL);
109  VIXL_CHECK(!available_vfp_->IsEmpty());
110  DRegister reg = available_vfp_->GetFirstAvailableDRegister();
111  available_vfp_->Remove(reg);
112  return reg;
113}
114
115
116SRegister UseScratchRegisterScope::AcquireS() {
117  VIXL_ASSERT(available_vfp_ != NULL);
118  VIXL_CHECK(!available_vfp_->IsEmpty());
119  SRegister reg = available_vfp_->GetFirstAvailableSRegister();
120  available_vfp_->Remove(reg);
121  return reg;
122}
123
124
125void UseScratchRegisterScope::Release(const Register& reg) {
126  VIXL_ASSERT(available_ != NULL);
127  VIXL_ASSERT(reg.IsValid());
128  VIXL_ASSERT(!available_->Includes(reg));
129  available_->Combine(reg);
130}
131
132
133void UseScratchRegisterScope::Release(const VRegister& reg) {
134  VIXL_ASSERT(available_vfp_ != NULL);
135  VIXL_ASSERT(reg.IsValid());
136  VIXL_ASSERT(!available_vfp_->IncludesAliasOf(reg));
137  available_vfp_->Combine(reg);
138}
139
140
141void UseScratchRegisterScope::Include(const RegisterList& list) {
142  VIXL_ASSERT(available_ != NULL);
143  RegisterList excluded_registers(sp, lr, pc);
144  uint32_t mask = list.GetList() & ~excluded_registers.GetList();
145  available_->SetList(available_->GetList() | mask);
146}
147
148
149void UseScratchRegisterScope::Include(const VRegisterList& list) {
150  VIXL_ASSERT(available_vfp_ != NULL);
151  available_vfp_->SetList(available_vfp_->GetList() | list.GetList());
152}
153
154
155void UseScratchRegisterScope::Exclude(const RegisterList& list) {
156  VIXL_ASSERT(available_ != NULL);
157  available_->SetList(available_->GetList() & ~list.GetList());
158}
159
160
161void UseScratchRegisterScope::Exclude(const VRegisterList& list) {
162  VIXL_ASSERT(available_vfp_ != NULL);
163  available_vfp_->SetList(available_vfp_->GetList() & ~list.GetList());
164}
165
166
167void UseScratchRegisterScope::ExcludeAll() {
168  if (available_ != NULL) {
169    available_->SetList(0);
170  }
171  if (available_vfp_ != NULL) {
172    available_vfp_->SetList(0);
173  }
174}
175
176
177void VeneerPoolManager::AddLabel(Label* label) {
178  if (!label->IsInVeneerPool()) {
179    label->SetVeneerPoolManager(this);
180    labels_.push_back(label);
181  }
182  Label::ForwardReference& back = label->GetBackForwardRef();
183  back.SetIsBranch();
184  label->UpdateCheckpoint();
185  Label::Offset tmp = label->GetCheckpoint();
186  if (checkpoint_ > tmp) {
187    checkpoint_ = tmp;
188    masm_->ComputeCheckpoint();
189  }
190}
191
192
193void VeneerPoolManager::RemoveLabel(Label* label) {
194  label->ClearVeneerPoolManager();
195  if (label->GetCheckpoint() == checkpoint_) {
196    // We have to compute checkpoint again.
197    checkpoint_ = Label::kMaxOffset;
198    for (std::list<Label*>::iterator it = labels_.begin();
199         it != labels_.end();) {
200      if (*it == label) {
201        it = labels_.erase(it);
202      } else {
203        checkpoint_ = std::min(checkpoint_, (*it)->GetCheckpoint());
204        ++it;
205      }
206    }
207    masm_->ComputeCheckpoint();
208  } else {
209    // We only have to remove the label from the list.
210    for (std::list<Label*>::iterator it = labels_.begin();; ++it) {
211      VIXL_ASSERT(it != labels_.end());
212      if (*it == label) {
213        labels_.erase(it);
214        break;
215      }
216    }
217  }
218}
219
220
221void VeneerPoolManager::Emit(Label::Offset target) {
222  checkpoint_ = Label::kMaxOffset;
223  // Sort labels (regarding their checkpoint) to avoid that a veneer
224  // becomes out of range.
225  labels_.sort(Label::CompareLabels);
226  // To avoid too many veneers, generate veneers which will be necessary soon.
227  static const size_t kVeneerEmissionMargin = 1 * KBytes;
228  // To avoid too many veneers, use generated veneers for other not too far
229  // uses.
230  static const size_t kVeneerEmittedMargin = 2 * KBytes;
231  Label::Offset emitted_target = target + kVeneerEmittedMargin;
232  target += kVeneerEmissionMargin;
233  // Reset the checkpoint. It will be computed again in the loop.
234  checkpoint_ = Label::kMaxOffset;
235  for (std::list<Label*>::iterator it = labels_.begin(); it != labels_.end();) {
236    // The labels are sorted. As soon as a veneer is not needed, we can stop.
237    if ((*it)->GetCheckpoint() > target) {
238      checkpoint_ = std::min(checkpoint_, (*it)->GetCheckpoint());
239      break;
240    }
241    // Define the veneer.
242    Label veneer;
243    masm_->Bind(&veneer);
244    Label::Offset label_checkpoint = Label::kMaxOffset;
245    // Check all uses of this label.
246    for (Label::ForwardRefList::iterator ref = (*it)->GetFirstForwardRef();
247         ref != (*it)->GetEndForwardRef();) {
248      if (ref->IsBranch()) {
249        if (ref->GetCheckpoint() <= emitted_target) {
250          // Use the veneer.
251          masm_->EncodeLabelFor(*ref, &veneer);
252          ref = (*it)->Erase(ref);
253        } else {
254          // Don't use the veneer => update checkpoint.
255          label_checkpoint = std::min(label_checkpoint, ref->GetCheckpoint());
256          ++ref;
257        }
258      } else {
259        ++ref;
260      }
261    }
262    // Even if we no longer have use of this label, we can keep it in the list
263    // as the next "B" would add it back.
264    (*it)->SetCheckpoint(label_checkpoint);
265    checkpoint_ = std::min(checkpoint_, label_checkpoint);
266    // Generate the veneer.
267    masm_->B(*it);
268    ++it;
269  }
270#ifdef VIXL_DEBUG
271  for (std::list<Label*>::iterator it = labels_.begin(); it != labels_.end();
272       ++it) {
273    VIXL_ASSERT((*it)->GetCheckpoint() >= checkpoint_);
274  }
275#endif
276  masm_->ComputeCheckpoint();
277}
278
279
280void MacroAssembler::PerformEnsureEmit(Label::Offset target, uint32_t size) {
281  EmitOption option = kBranchRequired;
282  Label after_pools;
283  if (target > veneer_pool_manager_.GetCheckpoint()) {
284#ifdef VIXL_DEBUG
285    // Here, we can't use an AssemblerAccurateScope as it would call
286    // PerformEnsureEmit in an infinite loop.
287    bool save_assembler_state = AllowAssembler();
288    SetAllowAssembler(true);
289#endif
290    GetBuffer()->EnsureSpaceFor(kMaxInstructionSizeInBytes);
291    b(&after_pools);
292#ifdef VIXL_DEBUG
293    SetAllowAssembler(false);
294#endif
295    veneer_pool_manager_.Emit(target);
296    option = kNoBranchRequired;
297#ifdef VIXL_DEBUG
298    SetAllowAssembler(save_assembler_state);
299#endif
300  }
301  // Check if the macro-assembler's internal literal pool should be emitted
302  // to avoid any overflow. If we already generated the veneers, we can
303  // emit the pool (the branch is already done).
304  VIXL_ASSERT(GetCursorOffset() <= literal_pool_manager_.GetCheckpoint());
305  if ((target > literal_pool_manager_.GetCheckpoint()) ||
306      (option == kNoBranchRequired)) {
307    // We will generate the literal pool. Generate all the veneers which
308    // would become out of range.
309    size_t literal_pool_size = literal_pool_manager_.GetLiteralPoolSize();
310    VIXL_ASSERT(IsInt32(literal_pool_size));
311    Label::Offset veneers_target =
312        target + static_cast<Label::Offset>(literal_pool_size);
313    VIXL_ASSERT(veneers_target >= 0);
314    if (veneers_target >= veneer_pool_manager_.GetCheckpoint()) {
315      veneer_pool_manager_.Emit(veneers_target);
316    }
317    EmitLiteralPool(option);
318  }
319  BindHelper(&after_pools);
320  if (GetBuffer()->IsManaged()) {
321    bool grow_requested;
322    GetBuffer()->EnsureSpaceFor(size, &grow_requested);
323    if (grow_requested) ComputeCheckpoint();
324  }
325}
326
327
328void MacroAssembler::ComputeCheckpoint() {
329  checkpoint_ = veneer_pool_manager_.GetCheckpoint();
330  if (literal_pool_manager_.GetCheckpoint() != Label::kMaxOffset) {
331    size_t veneer_max_size = veneer_pool_manager_.GetMaxSize();
332    VIXL_ASSERT(IsInt32(veneer_max_size));
333    // We must be able to generate the pool and a branch over the pool.
334    Label::Offset tmp = literal_pool_manager_.GetCheckpoint() -
335                        static_cast<Label::Offset>(veneer_max_size +
336                                                   kMaxInstructionSizeInBytes);
337    VIXL_ASSERT(tmp >= 0);
338    checkpoint_ = std::min(checkpoint_, tmp);
339  }
340  size_t buffer_size = GetBuffer()->GetCapacity();
341  VIXL_ASSERT(IsInt32(buffer_size));
342  Label::Offset buffer_checkpoint = static_cast<Label::Offset>(buffer_size);
343  checkpoint_ = std::min(checkpoint_, buffer_checkpoint);
344}
345
346
347void MacroAssembler::Switch(Register reg, JumpTableBase* table) {
348  // 32-bit table A32:
349  // adr ip, table
350  // add ip, r1, lsl 2
351  // ldr ip, [ip]
352  // jmp: add pc, pc, ip, lsl 2
353  // table:
354  // .int (case_0 - (jmp + 8)) >> 2
355  // .int (case_1 - (jmp + 8)) >> 2
356  // .int (case_2 - (jmp + 8)) >> 2
357
358  // 16-bit table T32:
359  // adr ip, table
360  // jmp: tbh ip, r1
361  // table:
362  // .short (case_0 - (jmp + 4)) >> 1
363  // .short (case_1 - (jmp + 4)) >> 1
364  // .short (case_2 - (jmp + 4)) >> 1
365  // case_0:
366  //   ...
367  //   b end_switch
368  // case_1:
369  //   ...
370  //   b end_switch
371  // ...
372  // end_switch:
373  Label jump_table;
374  UseScratchRegisterScope temps(this);
375  Register scratch = temps.Acquire();
376  int table_size = AlignUp(table->GetTableSizeInBytes(), 4);
377
378  // Jumpt to default if reg is not in [0, table->GetLength()[
379  Cmp(reg, table->GetLength());
380  B(ge, table->GetDefaultLabel());
381
382  Adr(scratch, &jump_table);
383  if (IsUsingA32()) {
384    Add(scratch, scratch, Operand(reg, LSL, table->GetOffsetShift()));
385    switch (table->GetOffsetShift()) {
386      case 0:
387        Ldrb(scratch, MemOperand(scratch));
388        break;
389      case 1:
390        Ldrh(scratch, MemOperand(scratch));
391        break;
392      case 2:
393        Ldr(scratch, MemOperand(scratch));
394        break;
395      default:
396        VIXL_ABORT_WITH_MSG("Unsupported jump table size.\n");
397    }
398    // Emit whatever needs to be emitted if we want to
399    // correctly rescord the position of the branch instruction
400    uint32_t branch_location = GetCursorOffset();
401    table->SetBranchLocation(branch_location + GetArchitectureStatePCOffset());
402    AssemblerAccurateScope scope(this,
403                                 table_size + kA32InstructionSizeInBytes,
404                                 CodeBufferCheckScope::kMaximumSize);
405    add(pc, pc, Operand(scratch, LSL, 2));
406    VIXL_ASSERT((GetCursorOffset() - branch_location) == 4);
407    bind(&jump_table);
408    GenerateSwitchTable(table, table_size);
409  } else {
410    // Thumb mode - We have tbb and tbh to do this for 8 or 16bit offsets.
411    //  But for 32bit offsets, we use the same coding as for A32
412    if (table->GetOffsetShift() == 2) {
413      // 32bit offsets
414      Add(scratch, scratch, Operand(reg, LSL, 2));
415      Ldr(scratch, MemOperand(scratch));
416      // Cannot use add pc, pc, r lsl 1 as this is unpredictable in T32,
417      // so let's do the shift before
418      Lsl(scratch, scratch, 1);
419      // Emit whatever needs to be emitted if we want to
420      // correctly rescord the position of the branch instruction
421      uint32_t branch_location = GetCursorOffset();
422      table->SetBranchLocation(branch_location +
423                               GetArchitectureStatePCOffset());
424      AssemblerAccurateScope scope(this,
425                                   table_size + kMaxInstructionSizeInBytes,
426                                   CodeBufferCheckScope::kMaximumSize);
427      add(pc, pc, scratch);
428      // add pc, pc, rm fits in 16bit T2 (except for rm = sp)
429      VIXL_ASSERT((GetCursorOffset() - branch_location) == 2);
430      bind(&jump_table);
431      GenerateSwitchTable(table, table_size);
432    } else {
433      VIXL_ASSERT((table->GetOffsetShift() == 0) ||
434                  (table->GetOffsetShift() == 1));
435      // Emit whatever needs to be emitted if we want to
436      // correctly rescord the position of the branch instruction
437      uint32_t branch_location = GetCursorOffset();
438      table->SetBranchLocation(branch_location +
439                               GetArchitectureStatePCOffset());
440      AssemblerAccurateScope scope(this,
441                                   table_size + kMaxInstructionSizeInBytes,
442                                   CodeBufferCheckScope::kMaximumSize);
443      if (table->GetOffsetShift() == 0) {
444        // 8bit offsets
445        tbb(scratch, reg);
446      } else {
447        // 16bit offsets
448        tbh(scratch, reg);
449      }
450      // tbb/tbh is a 32bit instruction
451      VIXL_ASSERT((GetCursorOffset() - branch_location) == 4);
452      bind(&jump_table);
453      GenerateSwitchTable(table, table_size);
454    }
455  }
456}
457
458
459void MacroAssembler::GenerateSwitchTable(JumpTableBase* table, int table_size) {
460  table->BindTable(GetCursorOffset());
461  for (int i = 0; i < table_size / 4; i++) {
462    GetBuffer()->Emit32(0);
463  }
464}
465
466
467// switch/case/default : case
468// case_index is assumed to be < table->GetLength()
469// which is checked in JumpTable::Link and Table::SetPresenceBit
470void MacroAssembler::Case(JumpTableBase* table, int case_index) {
471  table->Link(this, case_index, GetCursorOffset());
472  table->SetPresenceBitForCase(case_index);
473}
474
475// switch/case/default : default
476void MacroAssembler::Default(JumpTableBase* table) {
477  Bind(table->GetDefaultLabel());
478}
479
480// switch/case/default : break
481void MacroAssembler::Break(JumpTableBase* table) { B(table->GetEndLabel()); }
482
483// switch/case/default : finalize
484// Manage the default path, mosstly. All empty offsets in the jumptable
485// will point to default.
486// All values not in [0, table->GetLength()[ are already pointing here anyway.
487void MacroAssembler::EndSwitch(JumpTableBase* table) { table->Finalize(this); }
488
489void MacroAssembler::HandleOutOfBoundsImmediate(Condition cond,
490                                                Register tmp,
491                                                uint32_t imm) {
492  if (IsUintN(16, imm)) {
493    CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
494    mov(cond, tmp, imm & 0xffff);
495    return;
496  }
497  if (IsUsingT32()) {
498    if (ImmediateT32::IsImmediateT32(~imm)) {
499      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
500      mvn(cond, tmp, ~imm);
501      return;
502    }
503  } else {
504    if (ImmediateA32::IsImmediateA32(~imm)) {
505      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
506      mvn(cond, tmp, ~imm);
507      return;
508    }
509  }
510  CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
511  mov(cond, tmp, imm & 0xffff);
512  movt(cond, tmp, imm >> 16);
513}
514
515
516void MacroAssembler::PadToMinimumBranchRange(Label* label) {
517  const Label::ForwardReference* last_reference = label->GetForwardRefBack();
518  if ((last_reference != NULL) && last_reference->IsUsingT32()) {
519    uint32_t location = last_reference->GetLocation();
520    if (location + k16BitT32InstructionSizeInBytes ==
521        static_cast<uint32_t>(GetCursorOffset())) {
522      uint16_t* instr_ptr = buffer_.GetOffsetAddress<uint16_t*>(location);
523      if ((instr_ptr[0] & kCbzCbnzMask) == kCbzCbnzValue) {
524        VIXL_ASSERT(!InITBlock());
525        // A Cbz or a Cbnz can't jump immediately after the instruction. If the
526        // target is immediately after the Cbz or Cbnz, we insert a nop to
527        // avoid that.
528        EmitT32_16(k16BitT32NopOpcode);
529      }
530    }
531  }
532}
533
534
535HARDFLOAT void PrintfTrampolineRRRR(
536    const char* format, uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
537  printf(format, a, b, c, d);
538}
539
540
541HARDFLOAT void PrintfTrampolineRRRD(
542    const char* format, uint32_t a, uint32_t b, uint32_t c, double d) {
543  printf(format, a, b, c, d);
544}
545
546
547HARDFLOAT void PrintfTrampolineRRDR(
548    const char* format, uint32_t a, uint32_t b, double c, uint32_t d) {
549  printf(format, a, b, c, d);
550}
551
552
553HARDFLOAT void PrintfTrampolineRRDD(
554    const char* format, uint32_t a, uint32_t b, double c, double d) {
555  printf(format, a, b, c, d);
556}
557
558
559HARDFLOAT void PrintfTrampolineRDRR(
560    const char* format, uint32_t a, double b, uint32_t c, uint32_t d) {
561  printf(format, a, b, c, d);
562}
563
564
565HARDFLOAT void PrintfTrampolineRDRD(
566    const char* format, uint32_t a, double b, uint32_t c, double d) {
567  printf(format, a, b, c, d);
568}
569
570
571HARDFLOAT void PrintfTrampolineRDDR(
572    const char* format, uint32_t a, double b, double c, uint32_t d) {
573  printf(format, a, b, c, d);
574}
575
576
577HARDFLOAT void PrintfTrampolineRDDD(
578    const char* format, uint32_t a, double b, double c, double d) {
579  printf(format, a, b, c, d);
580}
581
582
583HARDFLOAT void PrintfTrampolineDRRR(
584    const char* format, double a, uint32_t b, uint32_t c, uint32_t d) {
585  printf(format, a, b, c, d);
586}
587
588
589HARDFLOAT void PrintfTrampolineDRRD(
590    const char* format, double a, uint32_t b, uint32_t c, double d) {
591  printf(format, a, b, c, d);
592}
593
594
595HARDFLOAT void PrintfTrampolineDRDR(
596    const char* format, double a, uint32_t b, double c, uint32_t d) {
597  printf(format, a, b, c, d);
598}
599
600
601HARDFLOAT void PrintfTrampolineDRDD(
602    const char* format, double a, uint32_t b, double c, double d) {
603  printf(format, a, b, c, d);
604}
605
606
607HARDFLOAT void PrintfTrampolineDDRR(
608    const char* format, double a, double b, uint32_t c, uint32_t d) {
609  printf(format, a, b, c, d);
610}
611
612
613HARDFLOAT void PrintfTrampolineDDRD(
614    const char* format, double a, double b, uint32_t c, double d) {
615  printf(format, a, b, c, d);
616}
617
618
619HARDFLOAT void PrintfTrampolineDDDR(
620    const char* format, double a, double b, double c, uint32_t d) {
621  printf(format, a, b, c, d);
622}
623
624
625HARDFLOAT void PrintfTrampolineDDDD(
626    const char* format, double a, double b, double c, double d) {
627  printf(format, a, b, c, d);
628}
629
630
631void MacroAssembler::Printf(const char* format,
632                            CPURegister reg1,
633                            CPURegister reg2,
634                            CPURegister reg3,
635                            CPURegister reg4) {
636  if (generate_simulator_code_) {
637    PushRegister(reg4);
638    PushRegister(reg3);
639    PushRegister(reg2);
640    PushRegister(reg1);
641    Push(RegisterList(r0, r1));
642    StringLiteral* format_literal =
643        new StringLiteral(format, RawLiteral::kDeletedOnPlacementByPool);
644    Adr(r0, format_literal);
645    uint32_t args = (reg4.GetType() << 12) | (reg3.GetType() << 8) |
646                    (reg2.GetType() << 4) | reg1.GetType();
647    Mov(r1, args);
648    Hvc(kPrintfCode);
649    Pop(RegisterList(r0, r1));
650    int size = reg4.GetRegSizeInBytes() + reg3.GetRegSizeInBytes() +
651               reg2.GetRegSizeInBytes() + reg1.GetRegSizeInBytes();
652    Drop(size);
653  } else {
654    // Generate on a native platform => 32 bit environment.
655    // Preserve core registers r0-r3, r12, r14
656    const uint32_t saved_registers_mask =
657        kCallerSavedRegistersMask | (1 << r5.GetCode());
658    Push(RegisterList(saved_registers_mask));
659    // Push VFP registers.
660    Vpush(Untyped64, DRegisterList(d0, 8));
661    if (Has32DRegs()) Vpush(Untyped64, DRegisterList(d16, 16));
662    // Search one register which has been saved and which doesn't need to be
663    // printed.
664    RegisterList available_registers(kCallerSavedRegistersMask);
665    if (reg1.GetType() == CPURegister::kRRegister) {
666      available_registers.Remove(Register(reg1.GetCode()));
667    }
668    if (reg2.GetType() == CPURegister::kRRegister) {
669      available_registers.Remove(Register(reg2.GetCode()));
670    }
671    if (reg3.GetType() == CPURegister::kRRegister) {
672      available_registers.Remove(Register(reg3.GetCode()));
673    }
674    if (reg4.GetType() == CPURegister::kRRegister) {
675      available_registers.Remove(Register(reg4.GetCode()));
676    }
677    Register tmp = available_registers.GetFirstAvailableRegister();
678    VIXL_ASSERT(tmp.GetType() == CPURegister::kRRegister);
679    // Push the flags.
680    Mrs(tmp, APSR);
681    Push(tmp);
682    Vmrs(RegisterOrAPSR_nzcv(tmp.GetCode()), FPSCR);
683    Push(tmp);
684    // Push the registers to print on the stack.
685    PushRegister(reg4);
686    PushRegister(reg3);
687    PushRegister(reg2);
688    PushRegister(reg1);
689    int core_count = 1;
690    int vfp_count = 0;
691    uint32_t printf_type = 0;
692    // Pop the registers to print and store them into r1-r3 and/or d0-d3.
693    // Reg4 may stay into the stack if all the register to print are core
694    // registers.
695    PreparePrintfArgument(reg1, &core_count, &vfp_count, &printf_type);
696    PreparePrintfArgument(reg2, &core_count, &vfp_count, &printf_type);
697    PreparePrintfArgument(reg3, &core_count, &vfp_count, &printf_type);
698    PreparePrintfArgument(reg4, &core_count, &vfp_count, &printf_type);
699    // Ensure that the stack is aligned on 8 bytes.
700    And(r5, sp, 0x7);
701    if (core_count == 5) {
702      // One 32 bit argument (reg4) has been left on the stack =>  align the
703      // stack
704      // before the argument.
705      Pop(r0);
706      Sub(sp, sp, r5);
707      Push(r0);
708    } else {
709      Sub(sp, sp, r5);
710    }
711    // Select the right trampoline depending on the arguments.
712    uintptr_t address;
713    switch (printf_type) {
714      case 0:
715        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRRR);
716        break;
717      case 1:
718        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRRR);
719        break;
720      case 2:
721        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDRR);
722        break;
723      case 3:
724        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDRR);
725        break;
726      case 4:
727        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRDR);
728        break;
729      case 5:
730        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRDR);
731        break;
732      case 6:
733        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDDR);
734        break;
735      case 7:
736        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDDR);
737        break;
738      case 8:
739        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRRD);
740        break;
741      case 9:
742        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRRD);
743        break;
744      case 10:
745        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDRD);
746        break;
747      case 11:
748        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDRD);
749        break;
750      case 12:
751        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRDD);
752        break;
753      case 13:
754        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRDD);
755        break;
756      case 14:
757        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDDD);
758        break;
759      case 15:
760        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDDD);
761        break;
762      default:
763        VIXL_UNREACHABLE();
764        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRRR);
765        break;
766    }
767    StringLiteral* format_literal =
768        new StringLiteral(format, RawLiteral::kDeletedOnPlacementByPool);
769    Adr(r0, format_literal);
770    Mov(ip, Operand::From(address));
771    Blx(ip);
772    // If register reg4 was left on the stack => skip it.
773    if (core_count == 5) Drop(kRegSizeInBytes);
774    // Restore the stack as it was before alignment.
775    Add(sp, sp, r5);
776    // Restore the flags.
777    Pop(tmp);
778    Vmsr(FPSCR, tmp);
779    Pop(tmp);
780    Msr(APSR_nzcvqg, tmp);
781    // Restore the regsisters.
782    if (Has32DRegs()) Vpop(Untyped64, DRegisterList(d16, 16));
783    Vpop(Untyped64, DRegisterList(d0, 8));
784    Pop(RegisterList(saved_registers_mask));
785  }
786}
787
788
789void MacroAssembler::PushRegister(CPURegister reg) {
790  switch (reg.GetType()) {
791    case CPURegister::kNoRegister:
792      break;
793    case CPURegister::kRRegister:
794      Push(Register(reg.GetCode()));
795      break;
796    case CPURegister::kSRegister:
797      Vpush(Untyped32, SRegisterList(SRegister(reg.GetCode())));
798      break;
799    case CPURegister::kDRegister:
800      Vpush(Untyped64, DRegisterList(DRegister(reg.GetCode())));
801      break;
802    case CPURegister::kQRegister:
803      VIXL_UNIMPLEMENTED();
804      break;
805  }
806}
807
808
809void MacroAssembler::PreparePrintfArgument(CPURegister reg,
810                                           int* core_count,
811                                           int* vfp_count,
812                                           uint32_t* printf_type) {
813  switch (reg.GetType()) {
814    case CPURegister::kNoRegister:
815      break;
816    case CPURegister::kRRegister:
817      VIXL_ASSERT(*core_count <= 4);
818      if (*core_count < 4) Pop(Register(*core_count));
819      *core_count += 1;
820      break;
821    case CPURegister::kSRegister:
822      VIXL_ASSERT(*vfp_count < 4);
823      *printf_type |= 1 << (*core_count + *vfp_count - 1);
824      Vpop(Untyped32, SRegisterList(SRegister(*vfp_count * 2)));
825      Vcvt(F64, F32, DRegister(*vfp_count), SRegister(*vfp_count * 2));
826      *vfp_count += 1;
827      break;
828    case CPURegister::kDRegister:
829      VIXL_ASSERT(*vfp_count < 4);
830      *printf_type |= 1 << (*core_count + *vfp_count - 1);
831      Vpop(Untyped64, DRegisterList(DRegister(*vfp_count)));
832      *vfp_count += 1;
833      break;
834    case CPURegister::kQRegister:
835      VIXL_UNIMPLEMENTED();
836      break;
837  }
838}
839
840
841void MacroAssembler::Delegate(InstructionType type,
842                              InstructionCondROp instruction,
843                              Condition cond,
844                              Register rn,
845                              const Operand& operand) {
846  // movt, sxtb16, teq, uxtb16
847  VIXL_ASSERT((type == kMovt) || (type == kSxtb16) || (type == kTeq) ||
848              (type == kUxtb16));
849
850  if (type == kMovt) {
851    VIXL_ABORT_WITH_MSG("`Movt` expects a 16-bit immediate.");
852  }
853
854  // This delegate only supports teq with immediates.
855  CONTEXT_SCOPE;
856  if ((type == kTeq) && operand.IsImmediate()) {
857    UseScratchRegisterScope temps(this);
858    Register scratch = temps.Acquire();
859    HandleOutOfBoundsImmediate(cond, scratch, operand.GetImmediate());
860    CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
861    teq(cond, rn, scratch);
862    return;
863  }
864  Assembler::Delegate(type, instruction, cond, rn, operand);
865}
866
867
868void MacroAssembler::Delegate(InstructionType type,
869                              InstructionCondSizeROp instruction,
870                              Condition cond,
871                              EncodingSize size,
872                              Register rn,
873                              const Operand& operand) {
874  // cmn cmp mov movs mvn mvns sxtb sxth tst uxtb uxth
875  CONTEXT_SCOPE;
876  VIXL_ASSERT(size.IsBest());
877  VIXL_ASSERT((type == kCmn) || (type == kCmp) || (type == kMov) ||
878              (type == kMovs) || (type == kMvn) || (type == kMvns) ||
879              (type == kSxtb) || (type == kSxth) || (type == kTst) ||
880              (type == kUxtb) || (type == kUxth));
881  if (IsUsingT32() && operand.IsRegisterShiftedRegister()) {
882    VIXL_ASSERT((type != kMov) || (type != kMovs));
883    InstructionCondRROp shiftop = NULL;
884    switch (operand.GetShift().GetType()) {
885      case LSL:
886        shiftop = &Assembler::lsl;
887        break;
888      case LSR:
889        shiftop = &Assembler::lsr;
890        break;
891      case ASR:
892        shiftop = &Assembler::asr;
893        break;
894      case RRX:
895        // A RegisterShiftedRegister operand cannot have a shift of type RRX.
896        VIXL_UNREACHABLE();
897        break;
898      case ROR:
899        shiftop = &Assembler::ror;
900        break;
901      default:
902        VIXL_UNREACHABLE();
903    }
904    if (shiftop != NULL) {
905      UseScratchRegisterScope temps(this);
906      Register scratch = temps.Acquire();
907      CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
908      (this->*shiftop)(cond,
909                       scratch,
910                       operand.GetBaseRegister(),
911                       operand.GetShiftRegister());
912      (this->*instruction)(cond, size, rn, scratch);
913      return;
914    }
915  }
916  if (operand.IsImmediate()) {
917    uint32_t imm = operand.GetImmediate();
918    switch (type) {
919      case kMov:
920      case kMovs:
921        if (!rn.IsPC()) {
922          // Immediate is too large, but not using PC, so handle with mov{t}.
923          HandleOutOfBoundsImmediate(cond, rn, imm);
924          if (type == kMovs) {
925            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
926            tst(cond, rn, rn);
927          }
928          return;
929        } else if (type == kMov) {
930          VIXL_ASSERT(IsUsingA32() || cond.Is(al));
931          // Immediate is too large and using PC, so handle using a temporary
932          // register.
933          UseScratchRegisterScope temps(this);
934          Register scratch = temps.Acquire();
935          HandleOutOfBoundsImmediate(al, scratch, imm);
936          EnsureEmitFor(kMaxInstructionSizeInBytes);
937          bx(cond, scratch);
938          return;
939        }
940        break;
941      case kCmn:
942      case kCmp:
943        if (IsUsingA32() || !rn.IsPC()) {
944          UseScratchRegisterScope temps(this);
945          Register scratch = temps.Acquire();
946          HandleOutOfBoundsImmediate(cond, scratch, imm);
947          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
948          (this->*instruction)(cond, size, rn, scratch);
949          return;
950        }
951        break;
952      case kMvn:
953      case kMvns:
954        if (!rn.IsPC()) {
955          UseScratchRegisterScope temps(this);
956          Register scratch = temps.Acquire();
957          HandleOutOfBoundsImmediate(cond, scratch, imm);
958          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
959          (this->*instruction)(cond, size, rn, scratch);
960          return;
961        }
962        break;
963      case kTst:
964        if (IsUsingA32() || !rn.IsPC()) {
965          UseScratchRegisterScope temps(this);
966          Register scratch = temps.Acquire();
967          HandleOutOfBoundsImmediate(cond, scratch, imm);
968          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
969          (this->*instruction)(cond, size, rn, scratch);
970          return;
971        }
972        break;
973      default:  // kSxtb, Sxth, Uxtb, Uxth
974        break;
975    }
976  }
977  Assembler::Delegate(type, instruction, cond, size, rn, operand);
978}
979
980
981void MacroAssembler::Delegate(InstructionType type,
982                              InstructionCondRROp instruction,
983                              Condition cond,
984                              Register rd,
985                              Register rn,
986                              const Operand& operand) {
987  // orn orns pkhbt pkhtb rsc rscs sxtab sxtab16 sxtah uxtab uxtab16 uxtah
988
989  if ((type == kSxtab) || (type == kSxtab16) || (type == kSxtah) ||
990      (type == kUxtab) || (type == kUxtab16) || (type == kUxtah) ||
991      (type == kPkhbt) || (type == kPkhtb)) {
992    UnimplementedDelegate(type);
993    return;
994  }
995
996  // This delegate only handles the following instructions.
997  VIXL_ASSERT((type == kOrn) || (type == kOrns) || (type == kRsc) ||
998              (type == kRscs));
999  CONTEXT_SCOPE;
1000
1001  // T32 does not support register shifted register operands, emulate it.
1002  if (IsUsingT32() && operand.IsRegisterShiftedRegister()) {
1003    InstructionCondRROp shiftop = NULL;
1004    switch (operand.GetShift().GetType()) {
1005      case LSL:
1006        shiftop = &Assembler::lsl;
1007        break;
1008      case LSR:
1009        shiftop = &Assembler::lsr;
1010        break;
1011      case ASR:
1012        shiftop = &Assembler::asr;
1013        break;
1014      case RRX:
1015        // A RegisterShiftedRegister operand cannot have a shift of type RRX.
1016        VIXL_UNREACHABLE();
1017        break;
1018      case ROR:
1019        shiftop = &Assembler::ror;
1020        break;
1021      default:
1022        VIXL_UNREACHABLE();
1023    }
1024    if (shiftop != NULL) {
1025      UseScratchRegisterScope temps(this);
1026      Register rm = operand.GetBaseRegister();
1027      Register rs = operand.GetShiftRegister();
1028      // If different from `rn`, we can make use of either `rd`, `rm` or `rs` as
1029      // a scratch register.
1030      if (!rd.Is(rn)) temps.Include(rd);
1031      if (!rm.Is(rn)) temps.Include(rm);
1032      if (!rs.Is(rn)) temps.Include(rs);
1033      Register scratch = temps.Acquire();
1034      // TODO: The scope length was measured empirically. We should analyse the
1035      // worst-case size and add targetted tests.
1036      CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1037      (this->*shiftop)(cond, scratch, rm, rs);
1038      (this->*instruction)(cond, rd, rn, scratch);
1039      return;
1040    }
1041  }
1042
1043  // T32 does not have a Rsc instruction, negate the lhs input and turn it into
1044  // an Adc. Adc and Rsc are equivalent using a bitwise NOT:
1045  //   adc rd, rn, operand <-> rsc rd, NOT(rn), operand
1046  if (IsUsingT32() && ((type == kRsc) || (type == kRscs))) {
1047    // The RegisterShiftRegister case should have been handled above.
1048    VIXL_ASSERT(!operand.IsRegisterShiftedRegister());
1049    UseScratchRegisterScope temps(this);
1050    Register negated_rn;
1051    if (operand.IsImmediate() || !operand.GetBaseRegister().Is(rn)) {
1052      // In this case, we can just negate `rn` instead of using a temporary
1053      // register.
1054      negated_rn = rn;
1055    } else {
1056      if (!rd.Is(rn)) temps.Include(rd);
1057      negated_rn = temps.Acquire();
1058    }
1059    {
1060      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1061      mvn(cond, negated_rn, rn);
1062    }
1063    if (type == kRsc) {
1064      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1065      adc(cond, rd, negated_rn, operand);
1066      return;
1067    }
1068    // TODO: We shouldn't have to specify how much space the next instruction
1069    // needs.
1070    CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1071    adcs(cond, rd, negated_rn, operand);
1072    return;
1073  }
1074
1075  // A32 does not have a Orn instruction, negate the rhs input and turn it into
1076  // a Orr.
1077  if (IsUsingA32() && ((type == kOrn) || (type == kOrns))) {
1078    // TODO: orn r0, r1, imm -> orr r0, r1, neg(imm) if doable
1079    //  mvn r0, r2
1080    //  orr r0, r1, r0
1081    Register scratch;
1082    UseScratchRegisterScope temps(this);
1083    // If different from `rn`, we can make use of source and destination
1084    // registers as a scratch register.
1085    if (!rd.Is(rn)) temps.Include(rd);
1086    if (!operand.IsImmediate() && !operand.GetBaseRegister().Is(rn)) {
1087      temps.Include(operand.GetBaseRegister());
1088    }
1089    if (operand.IsRegisterShiftedRegister() &&
1090        !operand.GetShiftRegister().Is(rn)) {
1091      temps.Include(operand.GetShiftRegister());
1092    }
1093    scratch = temps.Acquire();
1094    {
1095      // TODO: We shouldn't have to specify how much space the next instruction
1096      // needs.
1097      CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1098      mvn(cond, scratch, operand);
1099    }
1100    if (type == kOrns) {
1101      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1102      orrs(cond, rd, rn, scratch);
1103      return;
1104    }
1105    CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1106    orr(cond, rd, rn, scratch);
1107    return;
1108  }
1109  if (operand.IsImmediate()) {
1110    int32_t imm = operand.GetSignedImmediate();
1111
1112    // If the immediate can be encoded when inverted, turn Orn into Orr.
1113    // Otherwise rely on HandleOutOfBoundsImmediate to generate a series of
1114    // mov.
1115    if (IsUsingT32() && ((type == kOrn) || (type == kOrns)) &&
1116        ImmediateT32::IsImmediateT32(~imm)) {
1117      VIXL_ASSERT((type == kOrn) || (type == kOrns));
1118      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1119      switch (type) {
1120        case kOrn:
1121          orr(cond, rd, rn, ~imm);
1122          return;
1123        case kOrns:
1124          orrs(cond, rd, rn, ~imm);
1125          return;
1126        default:
1127          VIXL_UNREACHABLE();
1128          break;
1129      }
1130    } else {
1131      UseScratchRegisterScope temps(this);
1132      // Allow using the destination as a scratch register if possible.
1133      if (!rd.Is(rn)) temps.Include(rd);
1134      Register scratch = temps.Acquire();
1135      HandleOutOfBoundsImmediate(cond, scratch, imm);
1136      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1137      (this->*instruction)(cond, rd, rn, scratch);
1138      return;
1139    }
1140  }
1141  Assembler::Delegate(type, instruction, cond, rd, rn, operand);
1142}
1143
1144
1145void MacroAssembler::Delegate(InstructionType type,
1146                              InstructionCondSizeRROp instruction,
1147                              Condition cond,
1148                              EncodingSize size,
1149                              Register rd,
1150                              Register rn,
1151                              const Operand& operand) {
1152  // adc adcs add adds and_ ands asr asrs bic bics eor eors lsl lsls lsr lsrs
1153  // orr orrs ror rors rsb rsbs sbc sbcs sub subs
1154
1155  VIXL_ASSERT(
1156      (type == kAdc) || (type == kAdcs) || (type == kAdd) || (type == kAdds) ||
1157      (type == kAnd) || (type == kAnds) || (type == kAsr) || (type == kAsrs) ||
1158      (type == kBic) || (type == kBics) || (type == kEor) || (type == kEors) ||
1159      (type == kLsl) || (type == kLsls) || (type == kLsr) || (type == kLsrs) ||
1160      (type == kOrr) || (type == kOrrs) || (type == kRor) || (type == kRors) ||
1161      (type == kRsb) || (type == kRsbs) || (type == kSbc) || (type == kSbcs) ||
1162      (type == kSub) || (type == kSubs));
1163
1164  CONTEXT_SCOPE;
1165  VIXL_ASSERT(size.IsBest());
1166  if (IsUsingT32() && operand.IsRegisterShiftedRegister()) {
1167    InstructionCondRROp shiftop = NULL;
1168    switch (operand.GetShift().GetType()) {
1169      case LSL:
1170        shiftop = &Assembler::lsl;
1171        break;
1172      case LSR:
1173        shiftop = &Assembler::lsr;
1174        break;
1175      case ASR:
1176        shiftop = &Assembler::asr;
1177        break;
1178      case RRX:
1179        // A RegisterShiftedRegister operand cannot have a shift of type RRX.
1180        VIXL_UNREACHABLE();
1181        break;
1182      case ROR:
1183        shiftop = &Assembler::ror;
1184        break;
1185      default:
1186        VIXL_UNREACHABLE();
1187    }
1188    if (shiftop != NULL) {
1189      UseScratchRegisterScope temps(this);
1190      Register rm = operand.GetBaseRegister();
1191      Register rs = operand.GetShiftRegister();
1192      // If different from `rn`, we can make use of either `rd`, `rm` or `rs` as
1193      // a scratch register.
1194      if (!rd.Is(rn)) temps.Include(rd);
1195      if (!rm.Is(rn)) temps.Include(rm);
1196      if (!rs.Is(rn)) temps.Include(rs);
1197      Register scratch = temps.Acquire();
1198      CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
1199      (this->*shiftop)(cond, scratch, rm, rs);
1200      (this->*instruction)(cond, size, rd, rn, scratch);
1201      return;
1202    }
1203  }
1204  if (operand.IsImmediate()) {
1205    int32_t imm = operand.GetSignedImmediate();
1206    if (ImmediateT32::IsImmediateT32(~imm)) {
1207      if (IsUsingT32()) {
1208        switch (type) {
1209          case kOrr:
1210            orn(cond, rd, rn, ~imm);
1211            return;
1212          case kOrrs:
1213            orns(cond, rd, rn, ~imm);
1214            return;
1215          default:
1216            break;
1217        }
1218      }
1219    }
1220    if (imm < 0) {
1221      InstructionCondSizeRROp asmcb = NULL;
1222      // Add and sub are equivalent using an arithmetic negation:
1223      //   add rd, rn, #imm <-> sub rd, rn, - #imm
1224      // Add and sub with carry are equivalent using a bitwise NOT:
1225      //   adc rd, rn, #imm <-> sbc rd, rn, NOT #imm
1226      switch (type) {
1227        case kAdd:
1228          asmcb = &Assembler::sub;
1229          imm = -imm;
1230          break;
1231        case kAdds:
1232          asmcb = &Assembler::subs;
1233          imm = -imm;
1234          break;
1235        case kSub:
1236          asmcb = &Assembler::add;
1237          imm = -imm;
1238          break;
1239        case kSubs:
1240          asmcb = &Assembler::adds;
1241          imm = -imm;
1242          break;
1243        case kAdc:
1244          asmcb = &Assembler::sbc;
1245          imm = ~imm;
1246          break;
1247        case kAdcs:
1248          asmcb = &Assembler::sbcs;
1249          imm = ~imm;
1250          break;
1251        case kSbc:
1252          asmcb = &Assembler::adc;
1253          imm = ~imm;
1254          break;
1255        case kSbcs:
1256          asmcb = &Assembler::adcs;
1257          imm = ~imm;
1258          break;
1259        default:
1260          break;
1261      }
1262      if (asmcb != NULL) {
1263        CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1264        (this->*asmcb)(cond, size, rd, rn, Operand(imm));
1265        return;
1266      }
1267    }
1268    UseScratchRegisterScope temps(this);
1269    // Allow using the destination as a scratch register if possible.
1270    if (!rd.Is(rn)) temps.Include(rd);
1271    Register scratch = temps.Acquire();
1272    // TODO: The scope length was measured empirically. We should analyse the
1273    // worst-case size and add targetted tests.
1274    CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1275    mov(cond, scratch, operand.GetImmediate());
1276    (this->*instruction)(cond, size, rd, rn, scratch);
1277    return;
1278  }
1279  Assembler::Delegate(type, instruction, cond, size, rd, rn, operand);
1280}
1281
1282
1283void MacroAssembler::Delegate(InstructionType type,
1284                              InstructionRL instruction,
1285                              Register rn,
1286                              Label* label) {
1287  // cbz cbnz
1288  VIXL_ASSERT((type == kCbz) || (type == kCbnz));
1289
1290  CONTEXT_SCOPE;
1291  CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
1292  if (IsUsingA32()) {
1293    if (type == kCbz) {
1294      VIXL_ABORT_WITH_MSG("Cbz is only available for T32.\n");
1295    } else {
1296      VIXL_ABORT_WITH_MSG("Cbnz is only available for T32.\n");
1297    }
1298  } else if (rn.IsLow()) {
1299    switch (type) {
1300      case kCbnz: {
1301        Label done;
1302        cbz(rn, &done);
1303        b(label);
1304        Bind(&done);
1305        return;
1306      }
1307      case kCbz: {
1308        Label done;
1309        cbnz(rn, &done);
1310        b(label);
1311        Bind(&done);
1312        return;
1313      }
1314      default:
1315        break;
1316    }
1317  }
1318  Assembler::Delegate(type, instruction, rn, label);
1319}
1320
1321
1322template <typename T>
1323static inline bool IsI64BitPattern(T imm) {
1324  for (T mask = 0xff << ((sizeof(T) - 1) * 8); mask != 0; mask >>= 8) {
1325    if (((imm & mask) != mask) && ((imm & mask) != 0)) return false;
1326  }
1327  return true;
1328}
1329
1330
1331template <typename T>
1332static inline bool IsI8BitPattern(T imm) {
1333  uint8_t imm8 = imm & 0xff;
1334  for (unsigned rep = sizeof(T) - 1; rep > 0; rep--) {
1335    imm >>= 8;
1336    if ((imm & 0xff) != imm8) return false;
1337  }
1338  return true;
1339}
1340
1341
1342static inline bool CanBeInverted(uint32_t imm32) {
1343  uint32_t fill8 = 0;
1344
1345  if ((imm32 & 0xffffff00) == 0xffffff00) {
1346    //    11111111 11111111 11111111 abcdefgh
1347    return true;
1348  }
1349  if (((imm32 & 0xff) == 0) || ((imm32 & 0xff) == 0xff)) {
1350    fill8 = imm32 & 0xff;
1351    imm32 >>= 8;
1352    if ((imm32 >> 8) == 0xffff) {
1353      //    11111111 11111111 abcdefgh 00000000
1354      // or 11111111 11111111 abcdefgh 11111111
1355      return true;
1356    }
1357    if ((imm32 & 0xff) == fill8) {
1358      imm32 >>= 8;
1359      if ((imm32 >> 8) == 0xff) {
1360        //    11111111 abcdefgh 00000000 00000000
1361        // or 11111111 abcdefgh 11111111 11111111
1362        return true;
1363      }
1364      if ((fill8 == 0xff) && ((imm32 & 0xff) == 0xff)) {
1365        //    abcdefgh 11111111 11111111 11111111
1366        return true;
1367      }
1368    }
1369  }
1370  return false;
1371}
1372
1373
1374template <typename RES, typename T>
1375static inline RES replicate(T imm) {
1376  VIXL_ASSERT((sizeof(RES) > sizeof(T)) &&
1377              (((sizeof(RES) / sizeof(T)) * sizeof(T)) == sizeof(RES)));
1378  RES res = imm;
1379  for (unsigned i = sizeof(RES) / sizeof(T) - 1; i > 0; i--) {
1380    res = (res << (sizeof(T) * 8)) | imm;
1381  }
1382  return res;
1383}
1384
1385
1386void MacroAssembler::Delegate(InstructionType type,
1387                              InstructionCondDtSSop instruction,
1388                              Condition cond,
1389                              DataType dt,
1390                              SRegister rd,
1391                              const SOperand& operand) {
1392  CONTEXT_SCOPE;
1393  if (type == kVmov) {
1394    if (operand.IsImmediate() && dt.Is(F32)) {
1395      const NeonImmediate& neon_imm = operand.GetNeonImmediate();
1396      if (neon_imm.CanConvert<float>()) {
1397        // movw ip, imm16
1398        // movk ip, imm16
1399        // vmov s0, ip
1400        UseScratchRegisterScope temps(this);
1401        Register scratch = temps.Acquire();
1402        float f = neon_imm.GetImmediate<float>();
1403        // TODO: The scope length was measured empirically. We should analyse
1404        // the
1405        // worst-case size and add targetted tests.
1406        CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1407        mov(cond, scratch, FloatToRawbits(f));
1408        vmov(cond, rd, scratch);
1409        return;
1410      }
1411    }
1412  }
1413  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
1414}
1415
1416
1417void MacroAssembler::Delegate(InstructionType type,
1418                              InstructionCondDtDDop instruction,
1419                              Condition cond,
1420                              DataType dt,
1421                              DRegister rd,
1422                              const DOperand& operand) {
1423  CONTEXT_SCOPE;
1424  if (type == kVmov) {
1425    if (operand.IsImmediate()) {
1426      const NeonImmediate& neon_imm = operand.GetNeonImmediate();
1427      switch (dt.GetValue()) {
1428        case I32:
1429          if (neon_imm.CanConvert<uint32_t>()) {
1430            uint32_t imm = neon_imm.GetImmediate<uint32_t>();
1431            // vmov.i32 d0, 0xabababab will translate into vmov.i8 d0, 0xab
1432            if (IsI8BitPattern(imm)) {
1433              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1434              vmov(cond, I8, rd, imm & 0xff);
1435              return;
1436            }
1437            // vmov.i32 d0, 0xff0000ff will translate into
1438            // vmov.i64 d0, 0xff0000ffff0000ff
1439            if (IsI64BitPattern(imm)) {
1440              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1441              vmov(cond, I64, rd, replicate<uint64_t>(imm));
1442              return;
1443            }
1444            // vmov.i32 d0, 0xffab0000 will translate into
1445            // vmvn.i32 d0, 0x0054ffff
1446            if (cond.Is(al) && CanBeInverted(imm)) {
1447              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1448              vmvn(I32, rd, ~imm);
1449              return;
1450            }
1451          }
1452          break;
1453        case I16:
1454          if (neon_imm.CanConvert<uint16_t>()) {
1455            uint16_t imm = neon_imm.GetImmediate<uint16_t>();
1456            // vmov.i16 d0, 0xabab will translate into vmov.i8 d0, 0xab
1457            if (IsI8BitPattern(imm)) {
1458              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1459              vmov(cond, I8, rd, imm & 0xff);
1460              return;
1461            }
1462          }
1463          break;
1464        case I64:
1465          if (neon_imm.CanConvert<uint64_t>()) {
1466            uint64_t imm = neon_imm.GetImmediate<uint64_t>();
1467            // vmov.i64 d0, -1 will translate into vmov.i8 d0, 0xff
1468            if (IsI8BitPattern(imm)) {
1469              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1470              vmov(cond, I8, rd, imm & 0xff);
1471              return;
1472            }
1473            // mov ip, lo(imm64)
1474            // vdup d0, ip
1475            // vdup is prefered to 'vmov d0[0]' as d0[1] does not need to be
1476            // preserved
1477            {
1478              UseScratchRegisterScope temps(this);
1479              Register scratch = temps.Acquire();
1480              {
1481                // TODO: The scope length was measured empirically. We should
1482                // analyse the
1483                // worst-case size and add targetted tests.
1484                CodeBufferCheckScope scope(this,
1485                                           2 * kMaxInstructionSizeInBytes);
1486                mov(cond, scratch, static_cast<uint32_t>(imm & 0xffffffff));
1487              }
1488              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1489              vdup(cond, Untyped32, rd, scratch);
1490            }
1491            // mov ip, hi(imm64)
1492            // vmov d0[1], ip
1493            {
1494              UseScratchRegisterScope temps(this);
1495              Register scratch = temps.Acquire();
1496              {
1497                // TODO: The scope length was measured empirically. We should
1498                // analyse the
1499                // worst-case size and add targetted tests.
1500                CodeBufferCheckScope scope(this,
1501                                           2 * kMaxInstructionSizeInBytes);
1502                mov(cond, scratch, static_cast<uint32_t>(imm >> 32));
1503              }
1504              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1505              vmov(cond, Untyped32, DRegisterLane(rd, 1), scratch);
1506            }
1507            return;
1508          }
1509          break;
1510        default:
1511          break;
1512      }
1513      VIXL_ASSERT(!dt.Is(I8));  // I8 cases should have been handled already.
1514      if ((dt.Is(I16) || dt.Is(I32)) && neon_imm.CanConvert<uint32_t>()) {
1515        // mov ip, imm32
1516        // vdup.16 d0, ip
1517        UseScratchRegisterScope temps(this);
1518        Register scratch = temps.Acquire();
1519        {
1520          CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
1521          mov(cond, scratch, neon_imm.GetImmediate<uint32_t>());
1522        }
1523        DataTypeValue vdup_dt = Untyped32;
1524        switch (dt.GetValue()) {
1525          case I16:
1526            vdup_dt = Untyped16;
1527            break;
1528          case I32:
1529            vdup_dt = Untyped32;
1530            break;
1531          default:
1532            VIXL_UNREACHABLE();
1533        }
1534        CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1535        vdup(cond, vdup_dt, rd, scratch);
1536        return;
1537      }
1538      if (dt.Is(F32) && neon_imm.CanConvert<float>()) {
1539        float f = neon_imm.GetImmediate<float>();
1540        // Punt to vmov.i32
1541        // TODO: The scope length was guessed based on the double case below. We
1542        // should analyse the worst-case size and add targetted tests.
1543        CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1544        vmov(cond, I32, rd, FloatToRawbits(f));
1545        return;
1546      }
1547      if (dt.Is(F64) && neon_imm.CanConvert<double>()) {
1548        // Punt to vmov.i64
1549        double d = neon_imm.GetImmediate<double>();
1550        // TODO: The scope length was measured empirically. We should analyse
1551        // the
1552        // worst-case size and add targetted tests.
1553        CodeBufferCheckScope scope(this, 6 * kMaxInstructionSizeInBytes);
1554        vmov(cond, I64, rd, DoubleToRawbits(d));
1555        return;
1556      }
1557    }
1558  }
1559  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
1560}
1561
1562
1563void MacroAssembler::Delegate(InstructionType type,
1564                              InstructionCondDtQQop instruction,
1565                              Condition cond,
1566                              DataType dt,
1567                              QRegister rd,
1568                              const QOperand& operand) {
1569  CONTEXT_SCOPE;
1570  if (type == kVmov) {
1571    if (operand.IsImmediate()) {
1572      const NeonImmediate& neon_imm = operand.GetNeonImmediate();
1573      switch (dt.GetValue()) {
1574        case I32:
1575          if (neon_imm.CanConvert<uint32_t>()) {
1576            uint32_t imm = neon_imm.GetImmediate<uint32_t>();
1577            // vmov.i32 d0, 0xabababab will translate into vmov.i8 d0, 0xab
1578            if (IsI8BitPattern(imm)) {
1579              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1580              vmov(cond, I8, rd, imm & 0xff);
1581              return;
1582            }
1583            // vmov.i32 d0, 0xff0000ff will translate into
1584            // vmov.i64 d0, 0xff0000ffff0000ff
1585            if (IsI64BitPattern(imm)) {
1586              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1587              vmov(cond, I64, rd, replicate<uint64_t>(imm));
1588              return;
1589            }
1590            // vmov.i32 d0, 0xffab0000 will translate into
1591            // vmvn.i32 d0, 0x0054ffff
1592            if (CanBeInverted(imm)) {
1593              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1594              vmvn(cond, I32, rd, ~imm);
1595              return;
1596            }
1597          }
1598          break;
1599        case I16:
1600          if (neon_imm.CanConvert<uint16_t>()) {
1601            uint16_t imm = neon_imm.GetImmediate<uint16_t>();
1602            // vmov.i16 d0, 0xabab will translate into vmov.i8 d0, 0xab
1603            if (IsI8BitPattern(imm)) {
1604              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1605              vmov(cond, I8, rd, imm & 0xff);
1606              return;
1607            }
1608          }
1609          break;
1610        case I64:
1611          if (neon_imm.CanConvert<uint64_t>()) {
1612            uint64_t imm = neon_imm.GetImmediate<uint64_t>();
1613            // vmov.i64 d0, -1 will translate into vmov.i8 d0, 0xff
1614            if (IsI8BitPattern(imm)) {
1615              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1616              vmov(cond, I8, rd, imm & 0xff);
1617              return;
1618            }
1619            // mov ip, lo(imm64)
1620            // vdup q0, ip
1621            // vdup is prefered to 'vmov d0[0]' as d0[1-3] don't need to be
1622            // preserved
1623            {
1624              UseScratchRegisterScope temps(this);
1625              Register scratch = temps.Acquire();
1626              {
1627                CodeBufferCheckScope scope(this,
1628                                           2 * kMaxInstructionSizeInBytes);
1629                mov(cond, scratch, static_cast<uint32_t>(imm & 0xffffffff));
1630              }
1631              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1632              vdup(cond, Untyped32, rd, scratch);
1633            }
1634            // mov ip, hi(imm64)
1635            // vmov.i32 d0[1], ip
1636            // vmov d1, d0
1637            {
1638              UseScratchRegisterScope temps(this);
1639              Register scratch = temps.Acquire();
1640              {
1641                CodeBufferCheckScope scope(this,
1642                                           2 * kMaxInstructionSizeInBytes);
1643                mov(cond, scratch, static_cast<uint32_t>(imm >> 32));
1644              }
1645              {
1646                CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1647                vmov(cond,
1648                     Untyped32,
1649                     DRegisterLane(rd.GetLowDRegister(), 1),
1650                     scratch);
1651              }
1652              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1653              vmov(cond, F64, rd.GetHighDRegister(), rd.GetLowDRegister());
1654            }
1655            return;
1656          }
1657          break;
1658        default:
1659          break;
1660      }
1661      VIXL_ASSERT(!dt.Is(I8));  // I8 cases should have been handled already.
1662      if ((dt.Is(I16) || dt.Is(I32)) && neon_imm.CanConvert<uint32_t>()) {
1663        // mov ip, imm32
1664        // vdup.16 d0, ip
1665        UseScratchRegisterScope temps(this);
1666        Register scratch = temps.Acquire();
1667        {
1668          CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
1669          mov(cond, scratch, neon_imm.GetImmediate<uint32_t>());
1670        }
1671        DataTypeValue vdup_dt = Untyped32;
1672        switch (dt.GetValue()) {
1673          case I16:
1674            vdup_dt = Untyped16;
1675            break;
1676          case I32:
1677            vdup_dt = Untyped32;
1678            break;
1679          default:
1680            VIXL_UNREACHABLE();
1681        }
1682        CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1683        vdup(cond, vdup_dt, rd, scratch);
1684        return;
1685      }
1686      if (dt.Is(F32) && neon_imm.CanConvert<float>()) {
1687        // Punt to vmov.i64
1688        float f = neon_imm.GetImmediate<float>();
1689        CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1690        vmov(cond, I32, rd, FloatToRawbits(f));
1691        return;
1692      }
1693      if (dt.Is(F64) && neon_imm.CanConvert<double>()) {
1694        // Use vmov to create the double in the low D register, then duplicate
1695        // it into the high D register.
1696        double d = neon_imm.GetImmediate<double>();
1697        CodeBufferCheckScope scope(this, 7 * kMaxInstructionSizeInBytes);
1698        vmov(cond, F64, rd.GetLowDRegister(), d);
1699        vmov(cond, F64, rd.GetHighDRegister(), rd.GetLowDRegister());
1700        return;
1701      }
1702    }
1703  }
1704  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
1705}
1706
1707
1708void MacroAssembler::Delegate(InstructionType type,
1709                              InstructionCondSizeRMop instruction,
1710                              Condition cond,
1711                              EncodingSize size,
1712                              Register rd,
1713                              const MemOperand& operand) {
1714  // ldr ldrb ldrh ldrsb ldrsh str strb strh
1715  CONTEXT_SCOPE;
1716  VIXL_ASSERT(size.IsBest());
1717  VIXL_ASSERT((type == kLdr) || (type == kLdrb) || (type == kLdrh) ||
1718              (type == kLdrsb) || (type == kLdrsh) || (type == kStr) ||
1719              (type == kStrb) || (type == kStrh));
1720  if (operand.IsImmediate()) {
1721    const Register& rn = operand.GetBaseRegister();
1722    AddrMode addrmode = operand.GetAddrMode();
1723    int32_t offset = operand.GetOffsetImmediate();
1724    uint32_t mask = 0;
1725    switch (type) {
1726      case kLdr:
1727      case kLdrb:
1728      case kStr:
1729      case kStrb:
1730        if (IsUsingA32() || (addrmode == Offset)) {
1731          mask = 0xfff;
1732        } else {
1733          mask = 0xff;
1734        }
1735        break;
1736      case kLdrsb:
1737      case kLdrh:
1738      case kLdrsh:
1739      case kStrh:
1740        if (IsUsingT32() && (addrmode == Offset)) {
1741          mask = 0xfff;
1742        } else {
1743          mask = 0xff;
1744        }
1745        break;
1746      default:
1747        VIXL_UNREACHABLE();
1748        return;
1749    }
1750    bool negative;
1751    // Try to maximize the offset use by the MemOperand (load_store_offset).
1752    // Add or subtract the part which can't be used by the MemOperand
1753    // (add_sub_offset).
1754    int32_t add_sub_offset;
1755    int32_t load_store_offset;
1756    load_store_offset = offset & mask;
1757    if (offset >= 0) {
1758      negative = false;
1759      add_sub_offset = offset & ~mask;
1760    } else {
1761      negative = true;
1762      add_sub_offset = -offset & ~mask;
1763      if (load_store_offset > 0) add_sub_offset += mask + 1;
1764    }
1765    switch (addrmode) {
1766      case PreIndex:
1767        // Avoid the unpredictable case 'str r0, [r0, imm]!'
1768        if (!rn.Is(rd)) {
1769          // Pre-Indexed case:
1770          // ldr r0, [r1, 12345]! will translate into
1771          //   add r1, r1, 12345
1772          //   ldr r0, [r1]
1773          {
1774            CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1775            if (negative) {
1776              sub(cond, rn, rn, add_sub_offset);
1777            } else {
1778              add(cond, rn, rn, add_sub_offset);
1779            }
1780          }
1781          {
1782            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1783            (this->*instruction)(cond,
1784                                 size,
1785                                 rd,
1786                                 MemOperand(rn, load_store_offset, PreIndex));
1787          }
1788          return;
1789        }
1790        break;
1791      case Offset: {
1792        UseScratchRegisterScope temps(this);
1793        // Allow using the destination as a scratch register if possible.
1794        if ((type != kStr) && (type != kStrb) && (type != kStrh) &&
1795            !rd.Is(rn)) {
1796          temps.Include(rd);
1797        }
1798        Register scratch = temps.Acquire();
1799        // Offset case:
1800        // ldr r0, [r1, 12345] will translate into
1801        //   add r0, r1, 12345
1802        //   ldr r0, [r0]
1803        {
1804          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1805          if (negative) {
1806            sub(cond, scratch, rn, add_sub_offset);
1807          } else {
1808            add(cond, scratch, rn, add_sub_offset);
1809          }
1810        }
1811        {
1812          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1813          (this->*instruction)(cond,
1814                               size,
1815                               rd,
1816                               MemOperand(scratch, load_store_offset));
1817        }
1818        return;
1819      }
1820      case PostIndex:
1821        // Avoid the unpredictable case 'ldr r0, [r0], imm'
1822        if (!rn.Is(rd)) {
1823          // Post-indexed case:
1824          // ldr r0. [r1], imm32 will translate into
1825          //   ldr r0, [r1]
1826          //   movw ip. imm32 & 0xffffffff
1827          //   movt ip, imm32 >> 16
1828          //   add r1, r1, ip
1829          {
1830            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1831            (this->*instruction)(cond,
1832                                 size,
1833                                 rd,
1834                                 MemOperand(rn, load_store_offset, PostIndex));
1835          }
1836          {
1837            CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1838            if (negative) {
1839              sub(cond, rn, rn, add_sub_offset);
1840            } else {
1841              add(cond, rn, rn, add_sub_offset);
1842            }
1843          }
1844          return;
1845        }
1846        break;
1847    }
1848  } else if (operand.IsPlainRegister()) {
1849    const Register& rn = operand.GetBaseRegister();
1850    AddrMode addrmode = operand.GetAddrMode();
1851    const Register& rm = operand.GetOffsetRegister();
1852    if (rm.IsPC()) {
1853      VIXL_ABORT_WITH_MSG(
1854          "The MacroAssembler does not convert loads and stores with a PC "
1855          "offset register.\n");
1856    }
1857    if (rn.IsPC() && addrmode != Offset) {
1858      VIXL_ABORT_WITH_MSG(
1859          "The MacroAssembler does not convert loads and stores with a PC "
1860          "base register in pre-index or post-index mode.\n");
1861    }
1862    switch (addrmode) {
1863      case PreIndex:
1864        // Avoid the unpredictable case 'str r0, [r0, imm]!'
1865        if (!rn.Is(rd)) {
1866          // Pre-Indexed case:
1867          // ldr r0, [r1, r2]! will translate into
1868          //   add r1, r1, r2
1869          //   ldr r0, [r1]
1870          {
1871            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1872            if (operand.GetSign().IsPlus()) {
1873              add(cond, rn, rn, rm);
1874            } else {
1875              sub(cond, rn, rn, rm);
1876            }
1877          }
1878          {
1879            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1880            (this->*instruction)(cond, size, rd, MemOperand(rn, Offset));
1881          }
1882          return;
1883        }
1884        break;
1885      case Offset: {
1886        UseScratchRegisterScope temps(this);
1887        // Allow using the destination as a scratch register if this is not a
1888        // store.
1889        // Avoid using PC as a temporary as this has side-effects.
1890        if ((type != kStr) && (type != kStrb) && (type != kStrh) &&
1891            !rd.IsPC()) {
1892          temps.Include(rd);
1893        }
1894        Register scratch = temps.Acquire();
1895        // Offset case:
1896        // ldr r0, [r1, r2] will translate into
1897        //   add r0, r1, r2
1898        //   ldr r0, [r0]
1899        {
1900          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1901          if (operand.GetSign().IsPlus()) {
1902            add(cond, scratch, rn, rm);
1903          } else {
1904            sub(cond, scratch, rn, rm);
1905          }
1906        }
1907        {
1908          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1909          (this->*instruction)(cond, size, rd, MemOperand(scratch, Offset));
1910        }
1911        return;
1912      }
1913      case PostIndex:
1914        // Avoid the unpredictable case 'ldr r0, [r0], imm'
1915        if (!rn.Is(rd)) {
1916          // Post-indexed case:
1917          // ldr r0. [r1], r2 will translate into
1918          //   ldr r0, [r1]
1919          //   add r1, r1, r2
1920          {
1921            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1922            (this->*instruction)(cond, size, rd, MemOperand(rn, Offset));
1923          }
1924          {
1925            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1926            if (operand.GetSign().IsPlus()) {
1927              add(cond, rn, rn, rm);
1928            } else {
1929              sub(cond, rn, rn, rm);
1930            }
1931          }
1932          return;
1933        }
1934        break;
1935    }
1936  }
1937  Assembler::Delegate(type, instruction, cond, size, rd, operand);
1938}
1939
1940
1941void MacroAssembler::Delegate(InstructionType type,
1942                              InstructionCondRRMop instruction,
1943                              Condition cond,
1944                              Register rt,
1945                              Register rt2,
1946                              const MemOperand& operand) {
1947  // ldaexd, ldrd, ldrexd, stlex, stlexb, stlexh, strd, strex, strexb, strexh
1948
1949  if ((type == kLdaexd) || (type == kLdrexd) || (type == kStlex) ||
1950      (type == kStlexb) || (type == kStlexh) || (type == kStrex) ||
1951      (type == kStrexb) || (type == kStrexh)) {
1952    UnimplementedDelegate(type);
1953    return;
1954  }
1955
1956  VIXL_ASSERT((type == kLdrd) || (type == kStrd));
1957
1958  CONTEXT_SCOPE;
1959
1960  // TODO: Should we allow these cases?
1961  if (IsUsingA32()) {
1962    // The first register needs to be even.
1963    if ((rt.GetCode() & 1) != 0) {
1964      UnimplementedDelegate(type);
1965      return;
1966    }
1967    // Registers need to be adjacent.
1968    if (((rt.GetCode() + 1) % kNumberOfRegisters) != rt2.GetCode()) {
1969      UnimplementedDelegate(type);
1970      return;
1971    }
1972    // LDRD lr, pc [...] is not allowed.
1973    if (rt.Is(lr)) {
1974      UnimplementedDelegate(type);
1975      return;
1976    }
1977  }
1978
1979  if (operand.IsImmediate()) {
1980    const Register& rn = operand.GetBaseRegister();
1981    AddrMode addrmode = operand.GetAddrMode();
1982    int32_t offset = operand.GetOffsetImmediate();
1983    switch (addrmode) {
1984      case PreIndex: {
1985        // Allow using the destinations as a scratch registers if possible.
1986        UseScratchRegisterScope temps(this);
1987        if (type == kLdrd) {
1988          if (!rt.Is(rn)) temps.Include(rt);
1989          if (!rt2.Is(rn)) temps.Include(rt2);
1990        }
1991
1992        // Pre-Indexed case:
1993        // ldrd r0, r1, [r2, 12345]! will translate into
1994        //   add r2, 12345
1995        //   ldrd r0, r1, [r2]
1996        {
1997          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1998          add(cond, rn, rn, offset);
1999        }
2000        {
2001          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2002          (this->*instruction)(cond, rt, rt2, MemOperand(rn, Offset));
2003        }
2004        return;
2005      }
2006      case Offset: {
2007        UseScratchRegisterScope temps(this);
2008        // Allow using the destinations as a scratch registers if possible.
2009        if (type == kLdrd) {
2010          if (!rt.Is(rn)) temps.Include(rt);
2011          if (!rt2.Is(rn)) temps.Include(rt2);
2012        }
2013        Register scratch = temps.Acquire();
2014        // Offset case:
2015        // ldrd r0, r1, [r2, 12345] will translate into
2016        //   add r0, r2, 12345
2017        //   ldrd r0, r1, [r0]
2018        {
2019          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2020          add(cond, scratch, rn, offset);
2021        }
2022        {
2023          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2024          (this->*instruction)(cond, rt, rt2, MemOperand(scratch, Offset));
2025        }
2026        return;
2027      }
2028      case PostIndex:
2029        // Avoid the unpredictable case 'ldrd r0, r1, [r0], imm'
2030        if (!rn.Is(rt) && !rn.Is(rt2)) {
2031          // Post-indexed case:
2032          // ldrd r0, r1, [r2], imm32 will translate into
2033          //   ldrd r0, r1, [r2]
2034          //   movw ip. imm32 & 0xffffffff
2035          //   movt ip, imm32 >> 16
2036          //   add r2, ip
2037          {
2038            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2039            (this->*instruction)(cond, rt, rt2, MemOperand(rn, Offset));
2040          }
2041          {
2042            CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2043            add(cond, rn, rn, offset);
2044          }
2045          return;
2046        }
2047        break;
2048    }
2049  }
2050  if (operand.IsPlainRegister()) {
2051    const Register& rn = operand.GetBaseRegister();
2052    const Register& rm = operand.GetOffsetRegister();
2053    AddrMode addrmode = operand.GetAddrMode();
2054    switch (addrmode) {
2055      case PreIndex:
2056        // ldrd r0, r1, [r2, r3]! will translate into
2057        //   add r2, r3
2058        //   ldrd r0, r1, [r2]
2059        {
2060          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2061          if (operand.GetSign().IsPlus()) {
2062            add(cond, rn, rn, rm);
2063          } else {
2064            sub(cond, rn, rn, rm);
2065          }
2066        }
2067        {
2068          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2069          (this->*instruction)(cond, rt, rt2, MemOperand(rn, Offset));
2070        }
2071        return;
2072      case PostIndex:
2073        // ldrd r0, r1, [r2], r3 will translate into
2074        //   ldrd r0, r1, [r2]
2075        //   add r2, r3
2076        {
2077          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2078          (this->*instruction)(cond, rt, rt2, MemOperand(rn, Offset));
2079        }
2080        {
2081          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2082          if (operand.GetSign().IsPlus()) {
2083            add(cond, rn, rn, rm);
2084          } else {
2085            sub(cond, rn, rn, rm);
2086          }
2087        }
2088        return;
2089      case Offset: {
2090        UseScratchRegisterScope temps(this);
2091        // Allow using the destinations as a scratch registers if possible.
2092        if (type == kLdrd) {
2093          if (!rt.Is(rn)) temps.Include(rt);
2094          if (!rt2.Is(rn)) temps.Include(rt2);
2095        }
2096        Register scratch = temps.Acquire();
2097        // Offset case:
2098        // ldrd r0, r1, [r2, r3] will translate into
2099        //   add r0, r2, r3
2100        //   ldrd r0, r1, [r0]
2101        {
2102          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2103          if (operand.GetSign().IsPlus()) {
2104            add(cond, scratch, rn, rm);
2105          } else {
2106            sub(cond, scratch, rn, rm);
2107          }
2108        }
2109        {
2110          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2111          (this->*instruction)(cond, rt, rt2, MemOperand(scratch, Offset));
2112        }
2113        return;
2114      }
2115    }
2116  }
2117  Assembler::Delegate(type, instruction, cond, rt, rt2, operand);
2118}
2119
2120
2121void MacroAssembler::Delegate(InstructionType type,
2122                              InstructionCondDtSMop instruction,
2123                              Condition cond,
2124                              DataType dt,
2125                              SRegister rd,
2126                              const MemOperand& operand) {
2127  // vldr.32 vstr.32
2128  CONTEXT_SCOPE;
2129  if (operand.IsImmediate()) {
2130    const Register& rn = operand.GetBaseRegister();
2131    AddrMode addrmode = operand.GetAddrMode();
2132    int32_t offset = operand.GetOffsetImmediate();
2133    VIXL_ASSERT(((offset > 0) && operand.GetSign().IsPlus()) ||
2134                ((offset < 0) && operand.GetSign().IsMinus()) || (offset == 0));
2135    if (rn.IsPC()) {
2136      VIXL_ABORT_WITH_MSG(
2137          "The MacroAssembler does not convert vldr or vstr with a PC base "
2138          "register.\n");
2139    }
2140    switch (addrmode) {
2141      case PreIndex:
2142        // Pre-Indexed case:
2143        // vldr.32 s0, [r1, 12345]! will translate into
2144        //   add r1, 12345
2145        //   vldr.32 s0, [r1]
2146        if (offset != 0) {
2147          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2148          add(cond, rn, rn, offset);
2149        }
2150        {
2151          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2152          (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
2153        }
2154        return;
2155      case Offset: {
2156        UseScratchRegisterScope temps(this);
2157        Register scratch = temps.Acquire();
2158        // Offset case:
2159        // vldr.32 s0, [r1, 12345] will translate into
2160        //   add ip, r1, 12345
2161        //   vldr.32 s0, [ip]
2162        {
2163          VIXL_ASSERT(offset != 0);
2164          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2165          add(cond, scratch, rn, offset);
2166        }
2167        {
2168          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2169          (this->*instruction)(cond, dt, rd, MemOperand(scratch, Offset));
2170        }
2171        return;
2172      }
2173      case PostIndex:
2174        // Post-indexed case:
2175        // vldr.32 s0, [r1], imm32 will translate into
2176        //   vldr.32 s0, [r1]
2177        //   movw ip. imm32 & 0xffffffff
2178        //   movt ip, imm32 >> 16
2179        //   add r1, ip
2180        {
2181          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2182          (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
2183        }
2184        if (offset != 0) {
2185          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2186          add(cond, rn, rn, offset);
2187        }
2188        return;
2189    }
2190  }
2191  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
2192}
2193
2194
2195void MacroAssembler::Delegate(InstructionType type,
2196                              InstructionCondDtDMop instruction,
2197                              Condition cond,
2198                              DataType dt,
2199                              DRegister rd,
2200                              const MemOperand& operand) {
2201  // vldr.64 vstr.64
2202  CONTEXT_SCOPE;
2203  if (operand.IsImmediate()) {
2204    const Register& rn = operand.GetBaseRegister();
2205    AddrMode addrmode = operand.GetAddrMode();
2206    int32_t offset = operand.GetOffsetImmediate();
2207    VIXL_ASSERT(((offset > 0) && operand.GetSign().IsPlus()) ||
2208                ((offset < 0) && operand.GetSign().IsMinus()) || (offset == 0));
2209    if (rn.IsPC()) {
2210      VIXL_ABORT_WITH_MSG(
2211          "The MacroAssembler does not convert vldr or vstr with a PC base "
2212          "register.\n");
2213    }
2214    switch (addrmode) {
2215      case PreIndex:
2216        // Pre-Indexed case:
2217        // vldr.64 d0, [r1, 12345]! will translate into
2218        //   add r1, 12345
2219        //   vldr.64 d0, [r1]
2220        if (offset != 0) {
2221          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2222          add(cond, rn, rn, offset);
2223        }
2224        {
2225          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2226          (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
2227        }
2228        return;
2229      case Offset: {
2230        UseScratchRegisterScope temps(this);
2231        Register scratch = temps.Acquire();
2232        // Offset case:
2233        // vldr.64 d0, [r1, 12345] will translate into
2234        //   add ip, r1, 12345
2235        //   vldr.32 s0, [ip]
2236        {
2237          VIXL_ASSERT(offset != 0);
2238          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2239          add(cond, scratch, rn, offset);
2240        }
2241        {
2242          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2243          (this->*instruction)(cond, dt, rd, MemOperand(scratch, Offset));
2244        }
2245        return;
2246      }
2247      case PostIndex:
2248        // Post-indexed case:
2249        // vldr.64 d0. [r1], imm32 will translate into
2250        //   vldr.64 d0, [r1]
2251        //   movw ip. imm32 & 0xffffffff
2252        //   movt ip, imm32 >> 16
2253        //   add r1, ip
2254        {
2255          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2256          (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
2257        }
2258        if (offset != 0) {
2259          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2260          add(cond, rn, rn, offset);
2261        }
2262        return;
2263    }
2264  }
2265  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
2266}
2267
2268
2269void MacroAssembler::Delegate(InstructionType type,
2270                              InstructionCondMsrOp instruction,
2271                              Condition cond,
2272                              MaskedSpecialRegister spec_reg,
2273                              const Operand& operand) {
2274  USE(type);
2275  VIXL_ASSERT(type == kMsr);
2276  if (operand.IsImmediate()) {
2277    UseScratchRegisterScope temps(this);
2278    Register scratch = temps.Acquire();
2279    {
2280      CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
2281      mov(cond, scratch, operand);
2282    }
2283    CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2284    msr(cond, spec_reg, scratch);
2285    return;
2286  }
2287  Assembler::Delegate(type, instruction, cond, spec_reg, operand);
2288}
2289
2290#undef CONTEXT_SCOPE
2291#undef TOSTRING
2292#undef STRINGIFY
2293
2294// Start of generated code.
2295// End of generated code.
2296}  // namespace aarch32
2297}  // namespace vixl
2298