macro-assembler-aarch32.cc revision fd7f94dd378ec93adcc201d8eb9d71836948de09
1// Copyright 2015, VIXL authors
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are met:
6//
7//   * Redistributions of source code must retain the above copyright notice,
8//     this list of conditions and the following disclaimer.
9//   * Redistributions in binary form must reproduce the above copyright
10//     notice, this list of conditions and the following disclaimer in the
11//     documentation and/or other materials provided with the distribution.
12//   * Neither the name of ARM Limited nor the names of its contributors may
13//     be used to endorse or promote products derived from this software
14//     without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26// POSSIBILITY OF SUCH DAMAGE.
27
28#include "aarch32/macro-assembler-aarch32.h"
29
30#define STRINGIFY(x) #x
31#define TOSTRING(x) STRINGIFY(x)
32
33#define CONTEXT_SCOPE \
34  ContextScope context(this, __FILE__ ":" TOSTRING(__LINE__))
35
36namespace vixl {
37namespace aarch32 {
38
39void UseScratchRegisterScope::Open(MacroAssembler* masm) {
40  VIXL_ASSERT((available_ == NULL) && (available_vfp_ == NULL));
41  available_ = masm->GetScratchRegisterList();
42  old_available_ = available_->GetList();
43  available_vfp_ = masm->GetScratchVRegisterList();
44  old_available_vfp_ = available_vfp_->GetList();
45}
46
47
48void UseScratchRegisterScope::Close() {
49  if (available_ != NULL) {
50    available_->SetList(old_available_);
51    available_ = NULL;
52  }
53  if (available_vfp_ != NULL) {
54    available_vfp_->SetList(old_available_vfp_);
55    available_vfp_ = NULL;
56  }
57}
58
59
60bool UseScratchRegisterScope::IsAvailable(const Register& reg) const {
61  VIXL_ASSERT(available_ != NULL);
62  VIXL_ASSERT(reg.IsValid());
63  return available_->Includes(reg);
64}
65
66
67bool UseScratchRegisterScope::IsAvailable(const VRegister& reg) const {
68  VIXL_ASSERT(available_vfp_ != NULL);
69  VIXL_ASSERT(reg.IsValid());
70  return available_vfp_->IncludesAllOf(reg);
71}
72
73
74Register UseScratchRegisterScope::Acquire() {
75  VIXL_ASSERT(available_ != NULL);
76  VIXL_CHECK(!available_->IsEmpty());
77  Register reg = available_->GetFirstAvailableRegister();
78  available_->Remove(reg);
79  return reg;
80}
81
82
83VRegister UseScratchRegisterScope::AcquireV(unsigned size_in_bits) {
84  switch (size_in_bits) {
85    case kSRegSizeInBits:
86      return AcquireS();
87    case kDRegSizeInBits:
88      return AcquireD();
89    case kQRegSizeInBits:
90      return AcquireQ();
91    default:
92      VIXL_UNREACHABLE();
93      return NoVReg;
94  }
95}
96
97
98QRegister UseScratchRegisterScope::AcquireQ() {
99  VIXL_ASSERT(available_vfp_ != NULL);
100  VIXL_CHECK(!available_vfp_->IsEmpty());
101  QRegister reg = available_vfp_->GetFirstAvailableQRegister();
102  available_vfp_->Remove(reg);
103  return reg;
104}
105
106
107DRegister UseScratchRegisterScope::AcquireD() {
108  VIXL_ASSERT(available_vfp_ != NULL);
109  VIXL_CHECK(!available_vfp_->IsEmpty());
110  DRegister reg = available_vfp_->GetFirstAvailableDRegister();
111  available_vfp_->Remove(reg);
112  return reg;
113}
114
115
116SRegister UseScratchRegisterScope::AcquireS() {
117  VIXL_ASSERT(available_vfp_ != NULL);
118  VIXL_CHECK(!available_vfp_->IsEmpty());
119  SRegister reg = available_vfp_->GetFirstAvailableSRegister();
120  available_vfp_->Remove(reg);
121  return reg;
122}
123
124
125void UseScratchRegisterScope::Release(const Register& reg) {
126  VIXL_ASSERT(available_ != NULL);
127  VIXL_ASSERT(reg.IsValid());
128  VIXL_ASSERT(!available_->Includes(reg));
129  available_->Combine(reg);
130}
131
132
133void UseScratchRegisterScope::Release(const VRegister& reg) {
134  VIXL_ASSERT(available_vfp_ != NULL);
135  VIXL_ASSERT(reg.IsValid());
136  VIXL_ASSERT(!available_vfp_->IncludesAliasOf(reg));
137  available_vfp_->Combine(reg);
138}
139
140
141void UseScratchRegisterScope::Include(const RegisterList& list) {
142  VIXL_ASSERT(available_ != NULL);
143  RegisterList excluded_registers(sp, lr, pc);
144  uint32_t mask = list.GetList() & ~excluded_registers.GetList();
145  available_->SetList(available_->GetList() | mask);
146}
147
148
149void UseScratchRegisterScope::Include(const VRegisterList& list) {
150  VIXL_ASSERT(available_vfp_ != NULL);
151  available_vfp_->SetList(available_vfp_->GetList() | list.GetList());
152}
153
154
155void UseScratchRegisterScope::Exclude(const RegisterList& list) {
156  VIXL_ASSERT(available_ != NULL);
157  available_->SetList(available_->GetList() & ~list.GetList());
158}
159
160
161void UseScratchRegisterScope::Exclude(const VRegisterList& list) {
162  VIXL_ASSERT(available_vfp_ != NULL);
163  available_vfp_->SetList(available_vfp_->GetList() & ~list.GetList());
164}
165
166
167void UseScratchRegisterScope::Exclude(const Operand& operand) {
168  if (operand.IsImmediateShiftedRegister()) {
169    Exclude(operand.GetBaseRegister());
170  } else if (operand.IsRegisterShiftedRegister()) {
171    Exclude(operand.GetBaseRegister(), operand.GetShiftRegister());
172  } else {
173    VIXL_ASSERT(operand.IsImmediate());
174  }
175}
176
177
178void UseScratchRegisterScope::ExcludeAll() {
179  if (available_ != NULL) {
180    available_->SetList(0);
181  }
182  if (available_vfp_ != NULL) {
183    available_vfp_->SetList(0);
184  }
185}
186
187
188void VeneerPoolManager::AddLabel(Label* label) {
189  if (!label->IsInVeneerPool()) {
190    label->SetVeneerPoolManager(this);
191    labels_.push_back(label);
192  }
193  Label::ForwardReference& back = label->GetBackForwardRef();
194  back.SetIsBranch();
195  label->UpdateCheckpoint();
196  Label::Offset tmp = label->GetCheckpoint();
197  if (checkpoint_ > tmp) {
198    checkpoint_ = tmp;
199    masm_->ComputeCheckpoint();
200  }
201}
202
203
204void VeneerPoolManager::RemoveLabel(Label* label) {
205  label->ClearVeneerPoolManager();
206  if (label->GetCheckpoint() == checkpoint_) {
207    // We have to compute checkpoint again.
208    checkpoint_ = Label::kMaxOffset;
209    for (std::list<Label*>::iterator it = labels_.begin();
210         it != labels_.end();) {
211      if (*it == label) {
212        it = labels_.erase(it);
213      } else {
214        checkpoint_ = std::min(checkpoint_, (*it)->GetCheckpoint());
215        ++it;
216      }
217    }
218    masm_->ComputeCheckpoint();
219  } else {
220    // We only have to remove the label from the list.
221    for (std::list<Label*>::iterator it = labels_.begin();; ++it) {
222      VIXL_ASSERT(it != labels_.end());
223      if (*it == label) {
224        labels_.erase(it);
225        break;
226      }
227    }
228  }
229}
230
231
232void VeneerPoolManager::Emit(Label::Offset target) {
233  VIXL_ASSERT(!IsBlocked());
234  checkpoint_ = Label::kMaxOffset;
235  // Sort labels (regarding their checkpoint) to avoid that a veneer
236  // becomes out of range.
237  labels_.sort(Label::CompareLabels);
238  // To avoid too many veneers, generate veneers which will be necessary soon.
239  static const size_t kVeneerEmissionMargin = 1 * KBytes;
240  // To avoid too many veneers, use generated veneers for other not too far
241  // uses.
242  static const size_t kVeneerEmittedMargin = 2 * KBytes;
243  Label::Offset emitted_target = target + kVeneerEmittedMargin;
244  target += kVeneerEmissionMargin;
245  // Reset the checkpoint. It will be computed again in the loop.
246  checkpoint_ = Label::kMaxOffset;
247  for (std::list<Label*>::iterator it = labels_.begin(); it != labels_.end();) {
248    // The labels are sorted. As soon as a veneer is not needed, we can stop.
249    if ((*it)->GetCheckpoint() > target) {
250      checkpoint_ = std::min(checkpoint_, (*it)->GetCheckpoint());
251      break;
252    }
253    // Define the veneer.
254    Label veneer;
255    masm_->Bind(&veneer);
256    Label::Offset label_checkpoint = Label::kMaxOffset;
257    // Check all uses of this label.
258    for (Label::ForwardRefList::iterator ref = (*it)->GetFirstForwardRef();
259         ref != (*it)->GetEndForwardRef();) {
260      if (ref->IsBranch()) {
261        if (ref->GetCheckpoint() <= emitted_target) {
262          // Use the veneer.
263          masm_->EncodeLabelFor(*ref, &veneer);
264          ref = (*it)->Erase(ref);
265        } else {
266          // Don't use the veneer => update checkpoint.
267          label_checkpoint = std::min(label_checkpoint, ref->GetCheckpoint());
268          ++ref;
269        }
270      } else {
271        ++ref;
272      }
273    }
274    // Even if we no longer have use of this label, we can keep it in the list
275    // as the next "B" would add it back.
276    (*it)->SetCheckpoint(label_checkpoint);
277    checkpoint_ = std::min(checkpoint_, label_checkpoint);
278    // Generate the veneer.
279    masm_->B(*it);
280    ++it;
281  }
282#ifdef VIXL_DEBUG
283  for (std::list<Label*>::iterator it = labels_.begin(); it != labels_.end();
284       ++it) {
285    VIXL_ASSERT((*it)->GetCheckpoint() >= checkpoint_);
286  }
287#endif
288  masm_->ComputeCheckpoint();
289}
290
291
292// We use a subclass to access the protected `ExactAssemblyScope` constructor
293// giving us control over the pools, and make the constructor private to limit
294// usage to code paths emitting pools.
295class ExactAssemblyScopeWithoutPoolsCheck : public ExactAssemblyScope {
296 private:
297  ExactAssemblyScopeWithoutPoolsCheck(MacroAssembler* masm,
298                                      size_t size,
299                                      SizePolicy size_policy = kExactSize)
300      : ExactAssemblyScope(masm,
301                           size,
302                           size_policy,
303                           ExactAssemblyScope::kIgnorePools) {}
304
305  friend void MacroAssembler::EmitLiteralPool(LiteralPool* const literal_pool,
306                                              EmitOption option);
307
308  // TODO: `PerformEnsureEmit` is `private`, so we have to make the
309  // `MacroAssembler` a friend.
310  friend class MacroAssembler;
311};
312
313
314void MacroAssembler::PerformEnsureEmit(Label::Offset target, uint32_t size) {
315  EmitOption option = kBranchRequired;
316  Label after_pools;
317  if (target > veneer_pool_manager_.GetCheckpoint()) {
318    {
319      ExactAssemblyScopeWithoutPoolsCheck
320          guard(this,
321                kMaxInstructionSizeInBytes,
322                ExactAssemblyScope::kMaximumSize);
323      b(&after_pools);
324    }
325    veneer_pool_manager_.Emit(target);
326    option = kNoBranchRequired;
327  }
328  // Check if the macro-assembler's internal literal pool should be emitted
329  // to avoid any overflow. If we already generated the veneers, we can
330  // emit the pool (the branch is already done).
331  VIXL_ASSERT(GetCursorOffset() <= literal_pool_manager_.GetCheckpoint());
332  if ((target > literal_pool_manager_.GetCheckpoint()) ||
333      (option == kNoBranchRequired)) {
334    // We will generate the literal pool. Generate all the veneers which
335    // would become out of range.
336    size_t literal_pool_size = literal_pool_manager_.GetLiteralPoolSize();
337    VIXL_ASSERT(IsInt32(literal_pool_size));
338    Label::Offset veneers_target =
339        target + static_cast<Label::Offset>(literal_pool_size);
340    VIXL_ASSERT(veneers_target >= 0);
341    if (veneers_target >= veneer_pool_manager_.GetCheckpoint()) {
342      veneer_pool_manager_.Emit(veneers_target);
343    }
344    EmitLiteralPool(option);
345  }
346  BindHelper(&after_pools);
347  if (GetBuffer()->IsManaged()) {
348    bool grow_requested;
349    GetBuffer()->EnsureSpaceFor(size, &grow_requested);
350    if (grow_requested) ComputeCheckpoint();
351  }
352}
353
354
355void MacroAssembler::ComputeCheckpoint() {
356  checkpoint_ = veneer_pool_manager_.GetCheckpoint();
357  if (literal_pool_manager_.GetCheckpoint() != Label::kMaxOffset) {
358    size_t veneer_max_size = veneer_pool_manager_.GetMaxSize();
359    VIXL_ASSERT(IsInt32(veneer_max_size));
360    // We must be able to generate the pool and a branch over the pool.
361    Label::Offset tmp = literal_pool_manager_.GetCheckpoint() -
362                        static_cast<Label::Offset>(veneer_max_size +
363                                                   kMaxInstructionSizeInBytes);
364    VIXL_ASSERT(tmp >= 0);
365    checkpoint_ = std::min(checkpoint_, tmp);
366  }
367  size_t buffer_size = GetBuffer()->GetCapacity();
368  VIXL_ASSERT(IsInt32(buffer_size));
369  Label::Offset buffer_checkpoint = static_cast<Label::Offset>(buffer_size);
370  checkpoint_ = std::min(checkpoint_, buffer_checkpoint);
371}
372
373
374void MacroAssembler::EmitLiteralPool(LiteralPool* const literal_pool,
375                                     EmitOption option) {
376  if (literal_pool->GetSize() > 0) {
377#ifdef VIXL_DEBUG
378    for (LiteralPool::RawLiteralListIterator literal_it =
379             literal_pool->GetFirst();
380         literal_it != literal_pool->GetEnd();
381         literal_it++) {
382      RawLiteral* literal = *literal_it;
383      VIXL_ASSERT(GetCursorOffset() < literal->GetCheckpoint());
384    }
385#endif
386    Label after_literal;
387    if (option == kBranchRequired) {
388      GetBuffer()->EnsureSpaceFor(kMaxInstructionSizeInBytes);
389      VIXL_ASSERT(!AllowAssembler());
390      {
391        ExactAssemblyScopeWithoutPoolsCheck
392            guard(this,
393                  kMaxInstructionSizeInBytes,
394                  ExactAssemblyScope::kMaximumSize);
395        b(&after_literal);
396      }
397    }
398    GetBuffer()->Align();
399    GetBuffer()->EnsureSpaceFor(literal_pool->GetSize());
400    for (LiteralPool::RawLiteralListIterator it = literal_pool->GetFirst();
401         it != literal_pool->GetEnd();
402         it++) {
403      PlaceHelper(*it);
404      GetBuffer()->Align();
405    }
406    if (option == kBranchRequired) BindHelper(&after_literal);
407    literal_pool->Clear();
408  }
409}
410
411
412void MacroAssembler::Switch(Register reg, JumpTableBase* table) {
413  // 32-bit table A32:
414  // adr ip, table
415  // add ip, r1, lsl 2
416  // ldr ip, [ip]
417  // jmp: add pc, pc, ip, lsl 2
418  // table:
419  // .int (case_0 - (jmp + 8)) >> 2
420  // .int (case_1 - (jmp + 8)) >> 2
421  // .int (case_2 - (jmp + 8)) >> 2
422
423  // 16-bit table T32:
424  // adr ip, table
425  // jmp: tbh ip, r1
426  // table:
427  // .short (case_0 - (jmp + 4)) >> 1
428  // .short (case_1 - (jmp + 4)) >> 1
429  // .short (case_2 - (jmp + 4)) >> 1
430  // case_0:
431  //   ...
432  //   b end_switch
433  // case_1:
434  //   ...
435  //   b end_switch
436  // ...
437  // end_switch:
438  Label jump_table;
439  UseScratchRegisterScope temps(this);
440  Register scratch = temps.Acquire();
441  int table_size = AlignUp(table->GetTableSizeInBytes(), 4);
442
443  // Jump to default if reg is not in [0, table->GetLength()[
444  Cmp(reg, table->GetLength());
445  B(ge, table->GetDefaultLabel());
446
447  Adr(scratch, &jump_table);
448  if (IsUsingA32()) {
449    Add(scratch, scratch, Operand(reg, LSL, table->GetOffsetShift()));
450    switch (table->GetOffsetShift()) {
451      case 0:
452        Ldrb(scratch, MemOperand(scratch));
453        break;
454      case 1:
455        Ldrh(scratch, MemOperand(scratch));
456        break;
457      case 2:
458        Ldr(scratch, MemOperand(scratch));
459        break;
460      default:
461        VIXL_ABORT_WITH_MSG("Unsupported jump table size.\n");
462    }
463    // Emit whatever needs to be emitted if we want to
464    // correctly record the position of the branch instruction
465    uint32_t branch_location = GetCursorOffset();
466    table->SetBranchLocation(branch_location + GetArchitectureStatePCOffset());
467    ExactAssemblyScope scope(this,
468                             table_size + kA32InstructionSizeInBytes,
469                             ExactAssemblyScope::kMaximumSize);
470    add(pc, pc, Operand(scratch, LSL, 2));
471    VIXL_ASSERT((GetCursorOffset() - branch_location) == 4);
472    bind(&jump_table);
473    GenerateSwitchTable(table, table_size);
474  } else {
475    // Thumb mode - We have tbb and tbh to do this for 8 or 16bit offsets.
476    //  But for 32bit offsets, we use the same coding as for A32
477    if (table->GetOffsetShift() == 2) {
478      // 32bit offsets
479      Add(scratch, scratch, Operand(reg, LSL, 2));
480      Ldr(scratch, MemOperand(scratch));
481      // Cannot use add pc, pc, r lsl 1 as this is unpredictable in T32,
482      // so let's do the shift before
483      Lsl(scratch, scratch, 1);
484      // Emit whatever needs to be emitted if we want to
485      // correctly record the position of the branch instruction
486      uint32_t branch_location = GetCursorOffset();
487      table->SetBranchLocation(branch_location +
488                               GetArchitectureStatePCOffset());
489      ExactAssemblyScope scope(this,
490                               table_size + kMaxInstructionSizeInBytes,
491                               ExactAssemblyScope::kMaximumSize);
492      add(pc, pc, scratch);
493      // add pc, pc, rm fits in 16bit T2 (except for rm = sp)
494      VIXL_ASSERT((GetCursorOffset() - branch_location) == 2);
495      bind(&jump_table);
496      GenerateSwitchTable(table, table_size);
497    } else {
498      VIXL_ASSERT((table->GetOffsetShift() == 0) ||
499                  (table->GetOffsetShift() == 1));
500      // Emit whatever needs to be emitted if we want to
501      // correctly record the position of the branch instruction
502      uint32_t branch_location = GetCursorOffset();
503      table->SetBranchLocation(branch_location +
504                               GetArchitectureStatePCOffset());
505      ExactAssemblyScope scope(this,
506                               table_size + kMaxInstructionSizeInBytes,
507                               ExactAssemblyScope::kMaximumSize);
508      if (table->GetOffsetShift() == 0) {
509        // 8bit offsets
510        tbb(scratch, reg);
511      } else {
512        // 16bit offsets
513        tbh(scratch, reg);
514      }
515      // tbb/tbh is a 32bit instruction
516      VIXL_ASSERT((GetCursorOffset() - branch_location) == 4);
517      bind(&jump_table);
518      GenerateSwitchTable(table, table_size);
519    }
520  }
521}
522
523
524void MacroAssembler::GenerateSwitchTable(JumpTableBase* table, int table_size) {
525  table->BindTable(GetCursorOffset());
526  for (int i = 0; i < table_size / 4; i++) {
527    GetBuffer()->Emit32(0);
528  }
529}
530
531
532// switch/case/default : case
533// case_index is assumed to be < table->GetLength()
534// which is checked in JumpTable::Link and Table::SetPresenceBit
535void MacroAssembler::Case(JumpTableBase* table, int case_index) {
536  table->Link(this, case_index, GetCursorOffset());
537  table->SetPresenceBitForCase(case_index);
538}
539
540// switch/case/default : default
541void MacroAssembler::Default(JumpTableBase* table) {
542  Bind(table->GetDefaultLabel());
543}
544
545// switch/case/default : break
546void MacroAssembler::Break(JumpTableBase* table) { B(table->GetEndLabel()); }
547
548// switch/case/default : finalize
549// Manage the default path, mosstly. All empty offsets in the jumptable
550// will point to default.
551// All values not in [0, table->GetLength()[ are already pointing here anyway.
552void MacroAssembler::EndSwitch(JumpTableBase* table) { table->Finalize(this); }
553
554void MacroAssembler::HandleOutOfBoundsImmediate(Condition cond,
555                                                Register tmp,
556                                                uint32_t imm) {
557  if (IsUintN(16, imm)) {
558    CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
559    mov(cond, tmp, imm & 0xffff);
560    return;
561  }
562  if (IsUsingT32()) {
563    if (ImmediateT32::IsImmediateT32(~imm)) {
564      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
565      mvn(cond, tmp, ~imm);
566      return;
567    }
568  } else {
569    if (ImmediateA32::IsImmediateA32(~imm)) {
570      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
571      mvn(cond, tmp, ~imm);
572      return;
573    }
574  }
575  CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
576  mov(cond, tmp, imm & 0xffff);
577  movt(cond, tmp, imm >> 16);
578}
579
580
581void MacroAssembler::PadToMinimumBranchRange(Label* label) {
582  const Label::ForwardReference* last_reference = label->GetForwardRefBack();
583  if ((last_reference != NULL) && last_reference->IsUsingT32()) {
584    uint32_t location = last_reference->GetLocation();
585    if (location + k16BitT32InstructionSizeInBytes ==
586        static_cast<uint32_t>(GetCursorOffset())) {
587      uint16_t* instr_ptr = buffer_.GetOffsetAddress<uint16_t*>(location);
588      if ((instr_ptr[0] & kCbzCbnzMask) == kCbzCbnzValue) {
589        VIXL_ASSERT(!InITBlock());
590        // A Cbz or a Cbnz can't jump immediately after the instruction. If the
591        // target is immediately after the Cbz or Cbnz, we insert a nop to
592        // avoid that.
593        EmitT32_16(k16BitT32NopOpcode);
594      }
595    }
596  }
597}
598
599
600HARDFLOAT void PrintfTrampolineRRRR(
601    const char* format, uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
602  printf(format, a, b, c, d);
603}
604
605
606HARDFLOAT void PrintfTrampolineRRRD(
607    const char* format, uint32_t a, uint32_t b, uint32_t c, double d) {
608  printf(format, a, b, c, d);
609}
610
611
612HARDFLOAT void PrintfTrampolineRRDR(
613    const char* format, uint32_t a, uint32_t b, double c, uint32_t d) {
614  printf(format, a, b, c, d);
615}
616
617
618HARDFLOAT void PrintfTrampolineRRDD(
619    const char* format, uint32_t a, uint32_t b, double c, double d) {
620  printf(format, a, b, c, d);
621}
622
623
624HARDFLOAT void PrintfTrampolineRDRR(
625    const char* format, uint32_t a, double b, uint32_t c, uint32_t d) {
626  printf(format, a, b, c, d);
627}
628
629
630HARDFLOAT void PrintfTrampolineRDRD(
631    const char* format, uint32_t a, double b, uint32_t c, double d) {
632  printf(format, a, b, c, d);
633}
634
635
636HARDFLOAT void PrintfTrampolineRDDR(
637    const char* format, uint32_t a, double b, double c, uint32_t d) {
638  printf(format, a, b, c, d);
639}
640
641
642HARDFLOAT void PrintfTrampolineRDDD(
643    const char* format, uint32_t a, double b, double c, double d) {
644  printf(format, a, b, c, d);
645}
646
647
648HARDFLOAT void PrintfTrampolineDRRR(
649    const char* format, double a, uint32_t b, uint32_t c, uint32_t d) {
650  printf(format, a, b, c, d);
651}
652
653
654HARDFLOAT void PrintfTrampolineDRRD(
655    const char* format, double a, uint32_t b, uint32_t c, double d) {
656  printf(format, a, b, c, d);
657}
658
659
660HARDFLOAT void PrintfTrampolineDRDR(
661    const char* format, double a, uint32_t b, double c, uint32_t d) {
662  printf(format, a, b, c, d);
663}
664
665
666HARDFLOAT void PrintfTrampolineDRDD(
667    const char* format, double a, uint32_t b, double c, double d) {
668  printf(format, a, b, c, d);
669}
670
671
672HARDFLOAT void PrintfTrampolineDDRR(
673    const char* format, double a, double b, uint32_t c, uint32_t d) {
674  printf(format, a, b, c, d);
675}
676
677
678HARDFLOAT void PrintfTrampolineDDRD(
679    const char* format, double a, double b, uint32_t c, double d) {
680  printf(format, a, b, c, d);
681}
682
683
684HARDFLOAT void PrintfTrampolineDDDR(
685    const char* format, double a, double b, double c, uint32_t d) {
686  printf(format, a, b, c, d);
687}
688
689
690HARDFLOAT void PrintfTrampolineDDDD(
691    const char* format, double a, double b, double c, double d) {
692  printf(format, a, b, c, d);
693}
694
695
696void MacroAssembler::Printf(const char* format,
697                            CPURegister reg1,
698                            CPURegister reg2,
699                            CPURegister reg3,
700                            CPURegister reg4) {
701  if (generate_simulator_code_) {
702    PushRegister(reg4);
703    PushRegister(reg3);
704    PushRegister(reg2);
705    PushRegister(reg1);
706    Push(RegisterList(r0, r1));
707    StringLiteral* format_literal =
708        new StringLiteral(format, RawLiteral::kDeletedOnPlacementByPool);
709    Adr(r0, format_literal);
710    uint32_t args = (reg4.GetType() << 12) | (reg3.GetType() << 8) |
711                    (reg2.GetType() << 4) | reg1.GetType();
712    Mov(r1, args);
713    Hvc(kPrintfCode);
714    Pop(RegisterList(r0, r1));
715    int size = reg4.GetRegSizeInBytes() + reg3.GetRegSizeInBytes() +
716               reg2.GetRegSizeInBytes() + reg1.GetRegSizeInBytes();
717    Drop(size);
718  } else {
719    // Generate on a native platform => 32 bit environment.
720    // Preserve core registers r0-r3, r12, r14
721    const uint32_t saved_registers_mask =
722        kCallerSavedRegistersMask | (1 << r5.GetCode());
723    Push(RegisterList(saved_registers_mask));
724    // Push VFP registers.
725    Vpush(Untyped64, DRegisterList(d0, 8));
726    if (Has32DRegs()) Vpush(Untyped64, DRegisterList(d16, 16));
727    // Search one register which has been saved and which doesn't need to be
728    // printed.
729    RegisterList available_registers(kCallerSavedRegistersMask);
730    if (reg1.GetType() == CPURegister::kRRegister) {
731      available_registers.Remove(Register(reg1.GetCode()));
732    }
733    if (reg2.GetType() == CPURegister::kRRegister) {
734      available_registers.Remove(Register(reg2.GetCode()));
735    }
736    if (reg3.GetType() == CPURegister::kRRegister) {
737      available_registers.Remove(Register(reg3.GetCode()));
738    }
739    if (reg4.GetType() == CPURegister::kRRegister) {
740      available_registers.Remove(Register(reg4.GetCode()));
741    }
742    Register tmp = available_registers.GetFirstAvailableRegister();
743    VIXL_ASSERT(tmp.GetType() == CPURegister::kRRegister);
744    // Push the flags.
745    Mrs(tmp, APSR);
746    Push(tmp);
747    Vmrs(RegisterOrAPSR_nzcv(tmp.GetCode()), FPSCR);
748    Push(tmp);
749    // Push the registers to print on the stack.
750    PushRegister(reg4);
751    PushRegister(reg3);
752    PushRegister(reg2);
753    PushRegister(reg1);
754    int core_count = 1;
755    int vfp_count = 0;
756    uint32_t printf_type = 0;
757    // Pop the registers to print and store them into r1-r3 and/or d0-d3.
758    // Reg4 may stay into the stack if all the register to print are core
759    // registers.
760    PreparePrintfArgument(reg1, &core_count, &vfp_count, &printf_type);
761    PreparePrintfArgument(reg2, &core_count, &vfp_count, &printf_type);
762    PreparePrintfArgument(reg3, &core_count, &vfp_count, &printf_type);
763    PreparePrintfArgument(reg4, &core_count, &vfp_count, &printf_type);
764    // Ensure that the stack is aligned on 8 bytes.
765    And(r5, sp, 0x7);
766    if (core_count == 5) {
767      // One 32 bit argument (reg4) has been left on the stack =>  align the
768      // stack
769      // before the argument.
770      Pop(r0);
771      Sub(sp, sp, r5);
772      Push(r0);
773    } else {
774      Sub(sp, sp, r5);
775    }
776    // Select the right trampoline depending on the arguments.
777    uintptr_t address;
778    switch (printf_type) {
779      case 0:
780        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRRR);
781        break;
782      case 1:
783        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRRR);
784        break;
785      case 2:
786        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDRR);
787        break;
788      case 3:
789        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDRR);
790        break;
791      case 4:
792        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRDR);
793        break;
794      case 5:
795        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRDR);
796        break;
797      case 6:
798        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDDR);
799        break;
800      case 7:
801        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDDR);
802        break;
803      case 8:
804        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRRD);
805        break;
806      case 9:
807        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRRD);
808        break;
809      case 10:
810        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDRD);
811        break;
812      case 11:
813        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDRD);
814        break;
815      case 12:
816        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRDD);
817        break;
818      case 13:
819        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRDD);
820        break;
821      case 14:
822        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDDD);
823        break;
824      case 15:
825        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDDD);
826        break;
827      default:
828        VIXL_UNREACHABLE();
829        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRRR);
830        break;
831    }
832    StringLiteral* format_literal =
833        new StringLiteral(format, RawLiteral::kDeletedOnPlacementByPool);
834    Adr(r0, format_literal);
835    Mov(ip, Operand::From(address));
836    Blx(ip);
837    // If register reg4 was left on the stack => skip it.
838    if (core_count == 5) Drop(kRegSizeInBytes);
839    // Restore the stack as it was before alignment.
840    Add(sp, sp, r5);
841    // Restore the flags.
842    Pop(tmp);
843    Vmsr(FPSCR, tmp);
844    Pop(tmp);
845    Msr(APSR_nzcvqg, tmp);
846    // Restore the regsisters.
847    if (Has32DRegs()) Vpop(Untyped64, DRegisterList(d16, 16));
848    Vpop(Untyped64, DRegisterList(d0, 8));
849    Pop(RegisterList(saved_registers_mask));
850  }
851}
852
853
854void MacroAssembler::PushRegister(CPURegister reg) {
855  switch (reg.GetType()) {
856    case CPURegister::kNoRegister:
857      break;
858    case CPURegister::kRRegister:
859      Push(Register(reg.GetCode()));
860      break;
861    case CPURegister::kSRegister:
862      Vpush(Untyped32, SRegisterList(SRegister(reg.GetCode())));
863      break;
864    case CPURegister::kDRegister:
865      Vpush(Untyped64, DRegisterList(DRegister(reg.GetCode())));
866      break;
867    case CPURegister::kQRegister:
868      VIXL_UNIMPLEMENTED();
869      break;
870  }
871}
872
873
874void MacroAssembler::PreparePrintfArgument(CPURegister reg,
875                                           int* core_count,
876                                           int* vfp_count,
877                                           uint32_t* printf_type) {
878  switch (reg.GetType()) {
879    case CPURegister::kNoRegister:
880      break;
881    case CPURegister::kRRegister:
882      VIXL_ASSERT(*core_count <= 4);
883      if (*core_count < 4) Pop(Register(*core_count));
884      *core_count += 1;
885      break;
886    case CPURegister::kSRegister:
887      VIXL_ASSERT(*vfp_count < 4);
888      *printf_type |= 1 << (*core_count + *vfp_count - 1);
889      Vpop(Untyped32, SRegisterList(SRegister(*vfp_count * 2)));
890      Vcvt(F64, F32, DRegister(*vfp_count), SRegister(*vfp_count * 2));
891      *vfp_count += 1;
892      break;
893    case CPURegister::kDRegister:
894      VIXL_ASSERT(*vfp_count < 4);
895      *printf_type |= 1 << (*core_count + *vfp_count - 1);
896      Vpop(Untyped64, DRegisterList(DRegister(*vfp_count)));
897      *vfp_count += 1;
898      break;
899    case CPURegister::kQRegister:
900      VIXL_UNIMPLEMENTED();
901      break;
902  }
903}
904
905
906void MacroAssembler::Delegate(InstructionType type,
907                              InstructionCondROp instruction,
908                              Condition cond,
909                              Register rn,
910                              const Operand& operand) {
911  // movt, sxtb16, teq, uxtb16
912  VIXL_ASSERT((type == kMovt) || (type == kSxtb16) || (type == kTeq) ||
913              (type == kUxtb16));
914
915  if (type == kMovt) {
916    VIXL_ABORT_WITH_MSG("`Movt` expects a 16-bit immediate.");
917  }
918
919  // This delegate only supports teq with immediates.
920  CONTEXT_SCOPE;
921  if ((type == kTeq) && operand.IsImmediate()) {
922    UseScratchRegisterScope temps(this);
923    Register scratch = temps.Acquire();
924    HandleOutOfBoundsImmediate(cond, scratch, operand.GetImmediate());
925    CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
926    teq(cond, rn, scratch);
927    return;
928  }
929  Assembler::Delegate(type, instruction, cond, rn, operand);
930}
931
932
933void MacroAssembler::Delegate(InstructionType type,
934                              InstructionCondSizeROp instruction,
935                              Condition cond,
936                              EncodingSize size,
937                              Register rn,
938                              const Operand& operand) {
939  // cmn cmp mov movs mvn mvns sxtb sxth tst uxtb uxth
940  CONTEXT_SCOPE;
941  VIXL_ASSERT(size.IsBest());
942  VIXL_ASSERT((type == kCmn) || (type == kCmp) || (type == kMov) ||
943              (type == kMovs) || (type == kMvn) || (type == kMvns) ||
944              (type == kSxtb) || (type == kSxth) || (type == kTst) ||
945              (type == kUxtb) || (type == kUxth));
946  if (IsUsingT32() && operand.IsRegisterShiftedRegister()) {
947    VIXL_ASSERT((type != kMov) || (type != kMovs));
948    InstructionCondRROp shiftop = NULL;
949    switch (operand.GetShift().GetType()) {
950      case LSL:
951        shiftop = &Assembler::lsl;
952        break;
953      case LSR:
954        shiftop = &Assembler::lsr;
955        break;
956      case ASR:
957        shiftop = &Assembler::asr;
958        break;
959      case RRX:
960        // A RegisterShiftedRegister operand cannot have a shift of type RRX.
961        VIXL_UNREACHABLE();
962        break;
963      case ROR:
964        shiftop = &Assembler::ror;
965        break;
966      default:
967        VIXL_UNREACHABLE();
968    }
969    if (shiftop != NULL) {
970      UseScratchRegisterScope temps(this);
971      Register scratch = temps.Acquire();
972      CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
973      (this->*shiftop)(cond,
974                       scratch,
975                       operand.GetBaseRegister(),
976                       operand.GetShiftRegister());
977      (this->*instruction)(cond, size, rn, scratch);
978      return;
979    }
980  }
981  if (operand.IsImmediate()) {
982    uint32_t imm = operand.GetImmediate();
983    switch (type) {
984      case kMov:
985      case kMovs:
986        if (!rn.IsPC()) {
987          // Immediate is too large, but not using PC, so handle with mov{t}.
988          HandleOutOfBoundsImmediate(cond, rn, imm);
989          if (type == kMovs) {
990            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
991            tst(cond, rn, rn);
992          }
993          return;
994        } else if (type == kMov) {
995          VIXL_ASSERT(IsUsingA32() || cond.Is(al));
996          // Immediate is too large and using PC, so handle using a temporary
997          // register.
998          UseScratchRegisterScope temps(this);
999          Register scratch = temps.Acquire();
1000          HandleOutOfBoundsImmediate(al, scratch, imm);
1001          EnsureEmitFor(kMaxInstructionSizeInBytes);
1002          bx(cond, scratch);
1003          return;
1004        }
1005        break;
1006      case kCmn:
1007      case kCmp:
1008        if (IsUsingA32() || !rn.IsPC()) {
1009          UseScratchRegisterScope temps(this);
1010          Register scratch = temps.Acquire();
1011          HandleOutOfBoundsImmediate(cond, scratch, imm);
1012          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1013          (this->*instruction)(cond, size, rn, scratch);
1014          return;
1015        }
1016        break;
1017      case kMvn:
1018      case kMvns:
1019        if (!rn.IsPC()) {
1020          UseScratchRegisterScope temps(this);
1021          Register scratch = temps.Acquire();
1022          HandleOutOfBoundsImmediate(cond, scratch, imm);
1023          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1024          (this->*instruction)(cond, size, rn, scratch);
1025          return;
1026        }
1027        break;
1028      case kTst:
1029        if (IsUsingA32() || !rn.IsPC()) {
1030          UseScratchRegisterScope temps(this);
1031          Register scratch = temps.Acquire();
1032          HandleOutOfBoundsImmediate(cond, scratch, imm);
1033          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1034          (this->*instruction)(cond, size, rn, scratch);
1035          return;
1036        }
1037        break;
1038      default:  // kSxtb, Sxth, Uxtb, Uxth
1039        break;
1040    }
1041  }
1042  Assembler::Delegate(type, instruction, cond, size, rn, operand);
1043}
1044
1045
1046void MacroAssembler::Delegate(InstructionType type,
1047                              InstructionCondRROp instruction,
1048                              Condition cond,
1049                              Register rd,
1050                              Register rn,
1051                              const Operand& operand) {
1052  // orn orns pkhbt pkhtb rsc rscs sxtab sxtab16 sxtah uxtab uxtab16 uxtah
1053
1054  if ((type == kSxtab) || (type == kSxtab16) || (type == kSxtah) ||
1055      (type == kUxtab) || (type == kUxtab16) || (type == kUxtah) ||
1056      (type == kPkhbt) || (type == kPkhtb)) {
1057    UnimplementedDelegate(type);
1058    return;
1059  }
1060
1061  // This delegate only handles the following instructions.
1062  VIXL_ASSERT((type == kOrn) || (type == kOrns) || (type == kRsc) ||
1063              (type == kRscs));
1064  CONTEXT_SCOPE;
1065
1066  // T32 does not support register shifted register operands, emulate it.
1067  if (IsUsingT32() && operand.IsRegisterShiftedRegister()) {
1068    InstructionCondRROp shiftop = NULL;
1069    switch (operand.GetShift().GetType()) {
1070      case LSL:
1071        shiftop = &Assembler::lsl;
1072        break;
1073      case LSR:
1074        shiftop = &Assembler::lsr;
1075        break;
1076      case ASR:
1077        shiftop = &Assembler::asr;
1078        break;
1079      case RRX:
1080        // A RegisterShiftedRegister operand cannot have a shift of type RRX.
1081        VIXL_UNREACHABLE();
1082        break;
1083      case ROR:
1084        shiftop = &Assembler::ror;
1085        break;
1086      default:
1087        VIXL_UNREACHABLE();
1088    }
1089    if (shiftop != NULL) {
1090      UseScratchRegisterScope temps(this);
1091      Register rm = operand.GetBaseRegister();
1092      Register rs = operand.GetShiftRegister();
1093      // Try to use rd as a scratch register. We can do this if it aliases rs or
1094      // rm (because we read them in the first instruction), but not rn.
1095      if (!rd.Is(rn)) temps.Include(rd);
1096      Register scratch = temps.Acquire();
1097      // TODO: The scope length was measured empirically. We should analyse the
1098      // worst-case size and add targetted tests.
1099      CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1100      (this->*shiftop)(cond, scratch, rm, rs);
1101      (this->*instruction)(cond, rd, rn, scratch);
1102      return;
1103    }
1104  }
1105
1106  // T32 does not have a Rsc instruction, negate the lhs input and turn it into
1107  // an Adc. Adc and Rsc are equivalent using a bitwise NOT:
1108  //   adc rd, rn, operand <-> rsc rd, NOT(rn), operand
1109  if (IsUsingT32() && ((type == kRsc) || (type == kRscs))) {
1110    // The RegisterShiftRegister case should have been handled above.
1111    VIXL_ASSERT(!operand.IsRegisterShiftedRegister());
1112    UseScratchRegisterScope temps(this);
1113    // Try to use rd as a scratch register. We can do this if it aliases rn
1114    // (because we read it in the first instruction), but not rm.
1115    temps.Include(rd);
1116    temps.Exclude(operand);
1117    Register negated_rn = temps.Acquire();
1118    {
1119      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1120      mvn(cond, negated_rn, rn);
1121    }
1122    if (type == kRsc) {
1123      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1124      adc(cond, rd, negated_rn, operand);
1125      return;
1126    }
1127    // TODO: We shouldn't have to specify how much space the next instruction
1128    // needs.
1129    CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1130    adcs(cond, rd, negated_rn, operand);
1131    return;
1132  }
1133
1134  // A32 does not have a Orn instruction, negate the rhs input and turn it into
1135  // a Orr.
1136  if (IsUsingA32() && ((type == kOrn) || (type == kOrns))) {
1137    // TODO: orn r0, r1, imm -> orr r0, r1, neg(imm) if doable
1138    //  mvn r0, r2
1139    //  orr r0, r1, r0
1140    Register scratch;
1141    UseScratchRegisterScope temps(this);
1142    // Try to use rd as a scratch register. We can do this if it aliases rs or
1143    // rm (because we read them in the first instruction), but not rn.
1144    if (!rd.Is(rn)) temps.Include(rd);
1145    scratch = temps.Acquire();
1146    {
1147      // TODO: We shouldn't have to specify how much space the next instruction
1148      // needs.
1149      CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1150      mvn(cond, scratch, operand);
1151    }
1152    if (type == kOrns) {
1153      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1154      orrs(cond, rd, rn, scratch);
1155      return;
1156    }
1157    CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1158    orr(cond, rd, rn, scratch);
1159    return;
1160  }
1161  if (operand.IsImmediate()) {
1162    int32_t imm = operand.GetSignedImmediate();
1163
1164    // If the immediate can be encoded when inverted, turn Orn into Orr.
1165    // Otherwise rely on HandleOutOfBoundsImmediate to generate a series of
1166    // mov.
1167    if (IsUsingT32() && ((type == kOrn) || (type == kOrns)) &&
1168        ImmediateT32::IsImmediateT32(~imm)) {
1169      VIXL_ASSERT((type == kOrn) || (type == kOrns));
1170      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1171      switch (type) {
1172        case kOrn:
1173          orr(cond, rd, rn, ~imm);
1174          return;
1175        case kOrns:
1176          orrs(cond, rd, rn, ~imm);
1177          return;
1178        default:
1179          VIXL_UNREACHABLE();
1180          break;
1181      }
1182    } else {
1183      UseScratchRegisterScope temps(this);
1184      // Allow using the destination as a scratch register if possible.
1185      if (!rd.Is(rn)) temps.Include(rd);
1186      Register scratch = temps.Acquire();
1187      HandleOutOfBoundsImmediate(cond, scratch, imm);
1188      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1189      (this->*instruction)(cond, rd, rn, scratch);
1190      return;
1191    }
1192  }
1193  Assembler::Delegate(type, instruction, cond, rd, rn, operand);
1194}
1195
1196
1197void MacroAssembler::Delegate(InstructionType type,
1198                              InstructionCondSizeRROp instruction,
1199                              Condition cond,
1200                              EncodingSize size,
1201                              Register rd,
1202                              Register rn,
1203                              const Operand& operand) {
1204  // adc adcs add adds and_ ands asr asrs bic bics eor eors lsl lsls lsr lsrs
1205  // orr orrs ror rors rsb rsbs sbc sbcs sub subs
1206
1207  VIXL_ASSERT(
1208      (type == kAdc) || (type == kAdcs) || (type == kAdd) || (type == kAdds) ||
1209      (type == kAnd) || (type == kAnds) || (type == kAsr) || (type == kAsrs) ||
1210      (type == kBic) || (type == kBics) || (type == kEor) || (type == kEors) ||
1211      (type == kLsl) || (type == kLsls) || (type == kLsr) || (type == kLsrs) ||
1212      (type == kOrr) || (type == kOrrs) || (type == kRor) || (type == kRors) ||
1213      (type == kRsb) || (type == kRsbs) || (type == kSbc) || (type == kSbcs) ||
1214      (type == kSub) || (type == kSubs));
1215
1216  CONTEXT_SCOPE;
1217  VIXL_ASSERT(size.IsBest());
1218  if (IsUsingT32() && operand.IsRegisterShiftedRegister()) {
1219    InstructionCondRROp shiftop = NULL;
1220    switch (operand.GetShift().GetType()) {
1221      case LSL:
1222        shiftop = &Assembler::lsl;
1223        break;
1224      case LSR:
1225        shiftop = &Assembler::lsr;
1226        break;
1227      case ASR:
1228        shiftop = &Assembler::asr;
1229        break;
1230      case RRX:
1231        // A RegisterShiftedRegister operand cannot have a shift of type RRX.
1232        VIXL_UNREACHABLE();
1233        break;
1234      case ROR:
1235        shiftop = &Assembler::ror;
1236        break;
1237      default:
1238        VIXL_UNREACHABLE();
1239    }
1240    if (shiftop != NULL) {
1241      UseScratchRegisterScope temps(this);
1242      Register rm = operand.GetBaseRegister();
1243      Register rs = operand.GetShiftRegister();
1244      // Try to use rd as a scratch register. We can do this if it aliases rs or
1245      // rm (because we read them in the first instruction), but not rn.
1246      if (!rd.Is(rn)) temps.Include(rd);
1247      Register scratch = temps.Acquire();
1248      CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
1249      (this->*shiftop)(cond, scratch, rm, rs);
1250      (this->*instruction)(cond, size, rd, rn, scratch);
1251      return;
1252    }
1253  }
1254  if (operand.IsImmediate()) {
1255    int32_t imm = operand.GetSignedImmediate();
1256    if (ImmediateT32::IsImmediateT32(~imm)) {
1257      if (IsUsingT32()) {
1258        switch (type) {
1259          case kOrr:
1260            orn(cond, rd, rn, ~imm);
1261            return;
1262          case kOrrs:
1263            orns(cond, rd, rn, ~imm);
1264            return;
1265          default:
1266            break;
1267        }
1268      }
1269    }
1270    if (imm < 0) {
1271      InstructionCondSizeRROp asmcb = NULL;
1272      // Add and sub are equivalent using an arithmetic negation:
1273      //   add rd, rn, #imm <-> sub rd, rn, - #imm
1274      // Add and sub with carry are equivalent using a bitwise NOT:
1275      //   adc rd, rn, #imm <-> sbc rd, rn, NOT #imm
1276      switch (type) {
1277        case kAdd:
1278          asmcb = &Assembler::sub;
1279          imm = -imm;
1280          break;
1281        case kAdds:
1282          asmcb = &Assembler::subs;
1283          imm = -imm;
1284          break;
1285        case kSub:
1286          asmcb = &Assembler::add;
1287          imm = -imm;
1288          break;
1289        case kSubs:
1290          asmcb = &Assembler::adds;
1291          imm = -imm;
1292          break;
1293        case kAdc:
1294          asmcb = &Assembler::sbc;
1295          imm = ~imm;
1296          break;
1297        case kAdcs:
1298          asmcb = &Assembler::sbcs;
1299          imm = ~imm;
1300          break;
1301        case kSbc:
1302          asmcb = &Assembler::adc;
1303          imm = ~imm;
1304          break;
1305        case kSbcs:
1306          asmcb = &Assembler::adcs;
1307          imm = ~imm;
1308          break;
1309        default:
1310          break;
1311      }
1312      if (asmcb != NULL) {
1313        CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1314        (this->*asmcb)(cond, size, rd, rn, Operand(imm));
1315        return;
1316      }
1317    }
1318    UseScratchRegisterScope temps(this);
1319    // Allow using the destination as a scratch register if possible.
1320    if (!rd.Is(rn)) temps.Include(rd);
1321    Register scratch = temps.Acquire();
1322    // TODO: The scope length was measured empirically. We should analyse the
1323    // worst-case size and add targetted tests.
1324    CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1325    mov(cond, scratch, operand.GetImmediate());
1326    (this->*instruction)(cond, size, rd, rn, scratch);
1327    return;
1328  }
1329  Assembler::Delegate(type, instruction, cond, size, rd, rn, operand);
1330}
1331
1332
1333void MacroAssembler::Delegate(InstructionType type,
1334                              InstructionRL instruction,
1335                              Register rn,
1336                              Label* label) {
1337  // cbz cbnz
1338  VIXL_ASSERT((type == kCbz) || (type == kCbnz));
1339
1340  CONTEXT_SCOPE;
1341  CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
1342  if (IsUsingA32()) {
1343    if (type == kCbz) {
1344      VIXL_ABORT_WITH_MSG("Cbz is only available for T32.\n");
1345    } else {
1346      VIXL_ABORT_WITH_MSG("Cbnz is only available for T32.\n");
1347    }
1348  } else if (rn.IsLow()) {
1349    switch (type) {
1350      case kCbnz: {
1351        Label done;
1352        cbz(rn, &done);
1353        b(label);
1354        Bind(&done);
1355        return;
1356      }
1357      case kCbz: {
1358        Label done;
1359        cbnz(rn, &done);
1360        b(label);
1361        Bind(&done);
1362        return;
1363      }
1364      default:
1365        break;
1366    }
1367  }
1368  Assembler::Delegate(type, instruction, rn, label);
1369}
1370
1371
1372template <typename T>
1373static inline bool IsI64BitPattern(T imm) {
1374  for (T mask = 0xff << ((sizeof(T) - 1) * 8); mask != 0; mask >>= 8) {
1375    if (((imm & mask) != mask) && ((imm & mask) != 0)) return false;
1376  }
1377  return true;
1378}
1379
1380
1381template <typename T>
1382static inline bool IsI8BitPattern(T imm) {
1383  uint8_t imm8 = imm & 0xff;
1384  for (unsigned rep = sizeof(T) - 1; rep > 0; rep--) {
1385    imm >>= 8;
1386    if ((imm & 0xff) != imm8) return false;
1387  }
1388  return true;
1389}
1390
1391
1392static inline bool CanBeInverted(uint32_t imm32) {
1393  uint32_t fill8 = 0;
1394
1395  if ((imm32 & 0xffffff00) == 0xffffff00) {
1396    //    11111111 11111111 11111111 abcdefgh
1397    return true;
1398  }
1399  if (((imm32 & 0xff) == 0) || ((imm32 & 0xff) == 0xff)) {
1400    fill8 = imm32 & 0xff;
1401    imm32 >>= 8;
1402    if ((imm32 >> 8) == 0xffff) {
1403      //    11111111 11111111 abcdefgh 00000000
1404      // or 11111111 11111111 abcdefgh 11111111
1405      return true;
1406    }
1407    if ((imm32 & 0xff) == fill8) {
1408      imm32 >>= 8;
1409      if ((imm32 >> 8) == 0xff) {
1410        //    11111111 abcdefgh 00000000 00000000
1411        // or 11111111 abcdefgh 11111111 11111111
1412        return true;
1413      }
1414      if ((fill8 == 0xff) && ((imm32 & 0xff) == 0xff)) {
1415        //    abcdefgh 11111111 11111111 11111111
1416        return true;
1417      }
1418    }
1419  }
1420  return false;
1421}
1422
1423
1424template <typename RES, typename T>
1425static inline RES replicate(T imm) {
1426  VIXL_ASSERT((sizeof(RES) > sizeof(T)) &&
1427              (((sizeof(RES) / sizeof(T)) * sizeof(T)) == sizeof(RES)));
1428  RES res = imm;
1429  for (unsigned i = sizeof(RES) / sizeof(T) - 1; i > 0; i--) {
1430    res = (res << (sizeof(T) * 8)) | imm;
1431  }
1432  return res;
1433}
1434
1435
1436void MacroAssembler::Delegate(InstructionType type,
1437                              InstructionCondDtSSop instruction,
1438                              Condition cond,
1439                              DataType dt,
1440                              SRegister rd,
1441                              const SOperand& operand) {
1442  CONTEXT_SCOPE;
1443  if (type == kVmov) {
1444    if (operand.IsImmediate() && dt.Is(F32)) {
1445      const NeonImmediate& neon_imm = operand.GetNeonImmediate();
1446      if (neon_imm.CanConvert<float>()) {
1447        // movw ip, imm16
1448        // movk ip, imm16
1449        // vmov s0, ip
1450        UseScratchRegisterScope temps(this);
1451        Register scratch = temps.Acquire();
1452        float f = neon_imm.GetImmediate<float>();
1453        // TODO: The scope length was measured empirically. We should analyse
1454        // the
1455        // worst-case size and add targetted tests.
1456        CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1457        mov(cond, scratch, FloatToRawbits(f));
1458        vmov(cond, rd, scratch);
1459        return;
1460      }
1461    }
1462  }
1463  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
1464}
1465
1466
1467void MacroAssembler::Delegate(InstructionType type,
1468                              InstructionCondDtDDop instruction,
1469                              Condition cond,
1470                              DataType dt,
1471                              DRegister rd,
1472                              const DOperand& operand) {
1473  CONTEXT_SCOPE;
1474  if (type == kVmov) {
1475    if (operand.IsImmediate()) {
1476      const NeonImmediate& neon_imm = operand.GetNeonImmediate();
1477      switch (dt.GetValue()) {
1478        case I32:
1479          if (neon_imm.CanConvert<uint32_t>()) {
1480            uint32_t imm = neon_imm.GetImmediate<uint32_t>();
1481            // vmov.i32 d0, 0xabababab will translate into vmov.i8 d0, 0xab
1482            if (IsI8BitPattern(imm)) {
1483              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1484              vmov(cond, I8, rd, imm & 0xff);
1485              return;
1486            }
1487            // vmov.i32 d0, 0xff0000ff will translate into
1488            // vmov.i64 d0, 0xff0000ffff0000ff
1489            if (IsI64BitPattern(imm)) {
1490              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1491              vmov(cond, I64, rd, replicate<uint64_t>(imm));
1492              return;
1493            }
1494            // vmov.i32 d0, 0xffab0000 will translate into
1495            // vmvn.i32 d0, 0x0054ffff
1496            if (cond.Is(al) && CanBeInverted(imm)) {
1497              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1498              vmvn(I32, rd, ~imm);
1499              return;
1500            }
1501          }
1502          break;
1503        case I16:
1504          if (neon_imm.CanConvert<uint16_t>()) {
1505            uint16_t imm = neon_imm.GetImmediate<uint16_t>();
1506            // vmov.i16 d0, 0xabab will translate into vmov.i8 d0, 0xab
1507            if (IsI8BitPattern(imm)) {
1508              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1509              vmov(cond, I8, rd, imm & 0xff);
1510              return;
1511            }
1512          }
1513          break;
1514        case I64:
1515          if (neon_imm.CanConvert<uint64_t>()) {
1516            uint64_t imm = neon_imm.GetImmediate<uint64_t>();
1517            // vmov.i64 d0, -1 will translate into vmov.i8 d0, 0xff
1518            if (IsI8BitPattern(imm)) {
1519              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1520              vmov(cond, I8, rd, imm & 0xff);
1521              return;
1522            }
1523            // mov ip, lo(imm64)
1524            // vdup d0, ip
1525            // vdup is prefered to 'vmov d0[0]' as d0[1] does not need to be
1526            // preserved
1527            {
1528              UseScratchRegisterScope temps(this);
1529              Register scratch = temps.Acquire();
1530              {
1531                // TODO: The scope length was measured empirically. We should
1532                // analyse the
1533                // worst-case size and add targetted tests.
1534                CodeBufferCheckScope scope(this,
1535                                           2 * kMaxInstructionSizeInBytes);
1536                mov(cond, scratch, static_cast<uint32_t>(imm & 0xffffffff));
1537              }
1538              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1539              vdup(cond, Untyped32, rd, scratch);
1540            }
1541            // mov ip, hi(imm64)
1542            // vmov d0[1], ip
1543            {
1544              UseScratchRegisterScope temps(this);
1545              Register scratch = temps.Acquire();
1546              {
1547                // TODO: The scope length was measured empirically. We should
1548                // analyse the
1549                // worst-case size and add targetted tests.
1550                CodeBufferCheckScope scope(this,
1551                                           2 * kMaxInstructionSizeInBytes);
1552                mov(cond, scratch, static_cast<uint32_t>(imm >> 32));
1553              }
1554              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1555              vmov(cond, Untyped32, DRegisterLane(rd, 1), scratch);
1556            }
1557            return;
1558          }
1559          break;
1560        default:
1561          break;
1562      }
1563      VIXL_ASSERT(!dt.Is(I8));  // I8 cases should have been handled already.
1564      if ((dt.Is(I16) || dt.Is(I32)) && neon_imm.CanConvert<uint32_t>()) {
1565        // mov ip, imm32
1566        // vdup.16 d0, ip
1567        UseScratchRegisterScope temps(this);
1568        Register scratch = temps.Acquire();
1569        {
1570          CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
1571          mov(cond, scratch, neon_imm.GetImmediate<uint32_t>());
1572        }
1573        DataTypeValue vdup_dt = Untyped32;
1574        switch (dt.GetValue()) {
1575          case I16:
1576            vdup_dt = Untyped16;
1577            break;
1578          case I32:
1579            vdup_dt = Untyped32;
1580            break;
1581          default:
1582            VIXL_UNREACHABLE();
1583        }
1584        CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1585        vdup(cond, vdup_dt, rd, scratch);
1586        return;
1587      }
1588      if (dt.Is(F32) && neon_imm.CanConvert<float>()) {
1589        float f = neon_imm.GetImmediate<float>();
1590        // Punt to vmov.i32
1591        // TODO: The scope length was guessed based on the double case below. We
1592        // should analyse the worst-case size and add targetted tests.
1593        CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1594        vmov(cond, I32, rd, FloatToRawbits(f));
1595        return;
1596      }
1597      if (dt.Is(F64) && neon_imm.CanConvert<double>()) {
1598        // Punt to vmov.i64
1599        double d = neon_imm.GetImmediate<double>();
1600        // TODO: The scope length was measured empirically. We should analyse
1601        // the
1602        // worst-case size and add targetted tests.
1603        CodeBufferCheckScope scope(this, 6 * kMaxInstructionSizeInBytes);
1604        vmov(cond, I64, rd, DoubleToRawbits(d));
1605        return;
1606      }
1607    }
1608  }
1609  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
1610}
1611
1612
1613void MacroAssembler::Delegate(InstructionType type,
1614                              InstructionCondDtQQop instruction,
1615                              Condition cond,
1616                              DataType dt,
1617                              QRegister rd,
1618                              const QOperand& operand) {
1619  CONTEXT_SCOPE;
1620  if (type == kVmov) {
1621    if (operand.IsImmediate()) {
1622      const NeonImmediate& neon_imm = operand.GetNeonImmediate();
1623      switch (dt.GetValue()) {
1624        case I32:
1625          if (neon_imm.CanConvert<uint32_t>()) {
1626            uint32_t imm = neon_imm.GetImmediate<uint32_t>();
1627            // vmov.i32 d0, 0xabababab will translate into vmov.i8 d0, 0xab
1628            if (IsI8BitPattern(imm)) {
1629              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1630              vmov(cond, I8, rd, imm & 0xff);
1631              return;
1632            }
1633            // vmov.i32 d0, 0xff0000ff will translate into
1634            // vmov.i64 d0, 0xff0000ffff0000ff
1635            if (IsI64BitPattern(imm)) {
1636              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1637              vmov(cond, I64, rd, replicate<uint64_t>(imm));
1638              return;
1639            }
1640            // vmov.i32 d0, 0xffab0000 will translate into
1641            // vmvn.i32 d0, 0x0054ffff
1642            if (CanBeInverted(imm)) {
1643              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1644              vmvn(cond, I32, rd, ~imm);
1645              return;
1646            }
1647          }
1648          break;
1649        case I16:
1650          if (neon_imm.CanConvert<uint16_t>()) {
1651            uint16_t imm = neon_imm.GetImmediate<uint16_t>();
1652            // vmov.i16 d0, 0xabab will translate into vmov.i8 d0, 0xab
1653            if (IsI8BitPattern(imm)) {
1654              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1655              vmov(cond, I8, rd, imm & 0xff);
1656              return;
1657            }
1658          }
1659          break;
1660        case I64:
1661          if (neon_imm.CanConvert<uint64_t>()) {
1662            uint64_t imm = neon_imm.GetImmediate<uint64_t>();
1663            // vmov.i64 d0, -1 will translate into vmov.i8 d0, 0xff
1664            if (IsI8BitPattern(imm)) {
1665              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1666              vmov(cond, I8, rd, imm & 0xff);
1667              return;
1668            }
1669            // mov ip, lo(imm64)
1670            // vdup q0, ip
1671            // vdup is prefered to 'vmov d0[0]' as d0[1-3] don't need to be
1672            // preserved
1673            {
1674              UseScratchRegisterScope temps(this);
1675              Register scratch = temps.Acquire();
1676              {
1677                CodeBufferCheckScope scope(this,
1678                                           2 * kMaxInstructionSizeInBytes);
1679                mov(cond, scratch, static_cast<uint32_t>(imm & 0xffffffff));
1680              }
1681              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1682              vdup(cond, Untyped32, rd, scratch);
1683            }
1684            // mov ip, hi(imm64)
1685            // vmov.i32 d0[1], ip
1686            // vmov d1, d0
1687            {
1688              UseScratchRegisterScope temps(this);
1689              Register scratch = temps.Acquire();
1690              {
1691                CodeBufferCheckScope scope(this,
1692                                           2 * kMaxInstructionSizeInBytes);
1693                mov(cond, scratch, static_cast<uint32_t>(imm >> 32));
1694              }
1695              {
1696                CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1697                vmov(cond,
1698                     Untyped32,
1699                     DRegisterLane(rd.GetLowDRegister(), 1),
1700                     scratch);
1701              }
1702              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1703              vmov(cond, F64, rd.GetHighDRegister(), rd.GetLowDRegister());
1704            }
1705            return;
1706          }
1707          break;
1708        default:
1709          break;
1710      }
1711      VIXL_ASSERT(!dt.Is(I8));  // I8 cases should have been handled already.
1712      if ((dt.Is(I16) || dt.Is(I32)) && neon_imm.CanConvert<uint32_t>()) {
1713        // mov ip, imm32
1714        // vdup.16 d0, ip
1715        UseScratchRegisterScope temps(this);
1716        Register scratch = temps.Acquire();
1717        {
1718          CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
1719          mov(cond, scratch, neon_imm.GetImmediate<uint32_t>());
1720        }
1721        DataTypeValue vdup_dt = Untyped32;
1722        switch (dt.GetValue()) {
1723          case I16:
1724            vdup_dt = Untyped16;
1725            break;
1726          case I32:
1727            vdup_dt = Untyped32;
1728            break;
1729          default:
1730            VIXL_UNREACHABLE();
1731        }
1732        CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1733        vdup(cond, vdup_dt, rd, scratch);
1734        return;
1735      }
1736      if (dt.Is(F32) && neon_imm.CanConvert<float>()) {
1737        // Punt to vmov.i64
1738        float f = neon_imm.GetImmediate<float>();
1739        CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1740        vmov(cond, I32, rd, FloatToRawbits(f));
1741        return;
1742      }
1743      if (dt.Is(F64) && neon_imm.CanConvert<double>()) {
1744        // Use vmov to create the double in the low D register, then duplicate
1745        // it into the high D register.
1746        double d = neon_imm.GetImmediate<double>();
1747        CodeBufferCheckScope scope(this, 7 * kMaxInstructionSizeInBytes);
1748        vmov(cond, F64, rd.GetLowDRegister(), d);
1749        vmov(cond, F64, rd.GetHighDRegister(), rd.GetLowDRegister());
1750        return;
1751      }
1752    }
1753  }
1754  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
1755}
1756
1757
1758void MacroAssembler::Delegate(InstructionType type,
1759                              InstructionCondSizeRMop instruction,
1760                              Condition cond,
1761                              EncodingSize size,
1762                              Register rd,
1763                              const MemOperand& operand) {
1764  // ldr ldrb ldrh ldrsb ldrsh str strb strh
1765  CONTEXT_SCOPE;
1766  VIXL_ASSERT(size.IsBest());
1767  VIXL_ASSERT((type == kLdr) || (type == kLdrb) || (type == kLdrh) ||
1768              (type == kLdrsb) || (type == kLdrsh) || (type == kStr) ||
1769              (type == kStrb) || (type == kStrh));
1770  if (operand.IsImmediate()) {
1771    const Register& rn = operand.GetBaseRegister();
1772    AddrMode addrmode = operand.GetAddrMode();
1773    int32_t offset = operand.GetOffsetImmediate();
1774    uint32_t mask = 0;
1775    switch (type) {
1776      case kLdr:
1777      case kLdrb:
1778      case kStr:
1779      case kStrb:
1780        if (IsUsingA32() || (addrmode == Offset)) {
1781          mask = 0xfff;
1782        } else {
1783          mask = 0xff;
1784        }
1785        break;
1786      case kLdrsb:
1787      case kLdrh:
1788      case kLdrsh:
1789      case kStrh:
1790        if (IsUsingT32() && (addrmode == Offset)) {
1791          mask = 0xfff;
1792        } else {
1793          mask = 0xff;
1794        }
1795        break;
1796      default:
1797        VIXL_UNREACHABLE();
1798        return;
1799    }
1800    bool negative;
1801    // Try to maximize the offset use by the MemOperand (load_store_offset).
1802    // Add or subtract the part which can't be used by the MemOperand
1803    // (add_sub_offset).
1804    int32_t add_sub_offset;
1805    int32_t load_store_offset;
1806    load_store_offset = offset & mask;
1807    if (offset >= 0) {
1808      negative = false;
1809      add_sub_offset = offset & ~mask;
1810    } else {
1811      negative = true;
1812      add_sub_offset = -offset & ~mask;
1813      if (load_store_offset > 0) add_sub_offset += mask + 1;
1814    }
1815    switch (addrmode) {
1816      case PreIndex:
1817        // Avoid the unpredictable case 'str r0, [r0, imm]!'
1818        if (!rn.Is(rd)) {
1819          // Pre-Indexed case:
1820          // ldr r0, [r1, 12345]! will translate into
1821          //   add r1, r1, 12345
1822          //   ldr r0, [r1]
1823          {
1824            CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1825            if (negative) {
1826              sub(cond, rn, rn, add_sub_offset);
1827            } else {
1828              add(cond, rn, rn, add_sub_offset);
1829            }
1830          }
1831          {
1832            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1833            (this->*instruction)(cond,
1834                                 size,
1835                                 rd,
1836                                 MemOperand(rn, load_store_offset, PreIndex));
1837          }
1838          return;
1839        }
1840        break;
1841      case Offset: {
1842        UseScratchRegisterScope temps(this);
1843        // Allow using the destination as a scratch register if possible.
1844        if ((type != kStr) && (type != kStrb) && (type != kStrh) &&
1845            !rd.Is(rn)) {
1846          temps.Include(rd);
1847        }
1848        Register scratch = temps.Acquire();
1849        // Offset case:
1850        // ldr r0, [r1, 12345] will translate into
1851        //   add r0, r1, 12345
1852        //   ldr r0, [r0]
1853        {
1854          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1855          if (negative) {
1856            sub(cond, scratch, rn, add_sub_offset);
1857          } else {
1858            add(cond, scratch, rn, add_sub_offset);
1859          }
1860        }
1861        {
1862          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1863          (this->*instruction)(cond,
1864                               size,
1865                               rd,
1866                               MemOperand(scratch, load_store_offset));
1867        }
1868        return;
1869      }
1870      case PostIndex:
1871        // Avoid the unpredictable case 'ldr r0, [r0], imm'
1872        if (!rn.Is(rd)) {
1873          // Post-indexed case:
1874          // ldr r0. [r1], imm32 will translate into
1875          //   ldr r0, [r1]
1876          //   movw ip. imm32 & 0xffffffff
1877          //   movt ip, imm32 >> 16
1878          //   add r1, r1, ip
1879          {
1880            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1881            (this->*instruction)(cond,
1882                                 size,
1883                                 rd,
1884                                 MemOperand(rn, load_store_offset, PostIndex));
1885          }
1886          {
1887            CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1888            if (negative) {
1889              sub(cond, rn, rn, add_sub_offset);
1890            } else {
1891              add(cond, rn, rn, add_sub_offset);
1892            }
1893          }
1894          return;
1895        }
1896        break;
1897    }
1898  } else if (operand.IsPlainRegister()) {
1899    const Register& rn = operand.GetBaseRegister();
1900    AddrMode addrmode = operand.GetAddrMode();
1901    const Register& rm = operand.GetOffsetRegister();
1902    if (rm.IsPC()) {
1903      VIXL_ABORT_WITH_MSG(
1904          "The MacroAssembler does not convert loads and stores with a PC "
1905          "offset register.\n");
1906    }
1907    if (rn.IsPC()) {
1908      if (addrmode == Offset) {
1909        if (IsUsingT32()) {
1910          VIXL_ABORT_WITH_MSG(
1911              "The MacroAssembler does not convert loads and stores with a PC "
1912              "base register for T32.\n");
1913        }
1914      } else {
1915        VIXL_ABORT_WITH_MSG(
1916            "The MacroAssembler does not convert loads and stores with a PC "
1917            "base register in pre-index or post-index mode.\n");
1918      }
1919    }
1920    switch (addrmode) {
1921      case PreIndex:
1922        // Avoid the unpredictable case 'str r0, [r0, imm]!'
1923        if (!rn.Is(rd)) {
1924          // Pre-Indexed case:
1925          // ldr r0, [r1, r2]! will translate into
1926          //   add r1, r1, r2
1927          //   ldr r0, [r1]
1928          {
1929            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1930            if (operand.GetSign().IsPlus()) {
1931              add(cond, rn, rn, rm);
1932            } else {
1933              sub(cond, rn, rn, rm);
1934            }
1935          }
1936          {
1937            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1938            (this->*instruction)(cond, size, rd, MemOperand(rn, Offset));
1939          }
1940          return;
1941        }
1942        break;
1943      case Offset: {
1944        UseScratchRegisterScope temps(this);
1945        // Allow using the destination as a scratch register if this is not a
1946        // store.
1947        // Avoid using PC as a temporary as this has side-effects.
1948        if ((type != kStr) && (type != kStrb) && (type != kStrh) &&
1949            !rd.IsPC()) {
1950          temps.Include(rd);
1951        }
1952        Register scratch = temps.Acquire();
1953        // Offset case:
1954        // ldr r0, [r1, r2] will translate into
1955        //   add r0, r1, r2
1956        //   ldr r0, [r0]
1957        {
1958          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1959          if (operand.GetSign().IsPlus()) {
1960            add(cond, scratch, rn, rm);
1961          } else {
1962            sub(cond, scratch, rn, rm);
1963          }
1964        }
1965        {
1966          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1967          (this->*instruction)(cond, size, rd, MemOperand(scratch, Offset));
1968        }
1969        return;
1970      }
1971      case PostIndex:
1972        // Avoid the unpredictable case 'ldr r0, [r0], imm'
1973        if (!rn.Is(rd)) {
1974          // Post-indexed case:
1975          // ldr r0. [r1], r2 will translate into
1976          //   ldr r0, [r1]
1977          //   add r1, r1, r2
1978          {
1979            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1980            (this->*instruction)(cond, size, rd, MemOperand(rn, Offset));
1981          }
1982          {
1983            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1984            if (operand.GetSign().IsPlus()) {
1985              add(cond, rn, rn, rm);
1986            } else {
1987              sub(cond, rn, rn, rm);
1988            }
1989          }
1990          return;
1991        }
1992        break;
1993    }
1994  }
1995  Assembler::Delegate(type, instruction, cond, size, rd, operand);
1996}
1997
1998
1999void MacroAssembler::Delegate(InstructionType type,
2000                              InstructionCondRRMop instruction,
2001                              Condition cond,
2002                              Register rt,
2003                              Register rt2,
2004                              const MemOperand& operand) {
2005  // ldaexd, ldrd, ldrexd, stlex, stlexb, stlexh, strd, strex, strexb, strexh
2006
2007  if ((type == kLdaexd) || (type == kLdrexd) || (type == kStlex) ||
2008      (type == kStlexb) || (type == kStlexh) || (type == kStrex) ||
2009      (type == kStrexb) || (type == kStrexh)) {
2010    UnimplementedDelegate(type);
2011    return;
2012  }
2013
2014  VIXL_ASSERT((type == kLdrd) || (type == kStrd));
2015
2016  CONTEXT_SCOPE;
2017
2018  // TODO: Should we allow these cases?
2019  if (IsUsingA32()) {
2020    // The first register needs to be even.
2021    if ((rt.GetCode() & 1) != 0) {
2022      UnimplementedDelegate(type);
2023      return;
2024    }
2025    // Registers need to be adjacent.
2026    if (((rt.GetCode() + 1) % kNumberOfRegisters) != rt2.GetCode()) {
2027      UnimplementedDelegate(type);
2028      return;
2029    }
2030    // LDRD lr, pc [...] is not allowed.
2031    if (rt.Is(lr)) {
2032      UnimplementedDelegate(type);
2033      return;
2034    }
2035  }
2036
2037  if (operand.IsImmediate()) {
2038    const Register& rn = operand.GetBaseRegister();
2039    AddrMode addrmode = operand.GetAddrMode();
2040    int32_t offset = operand.GetOffsetImmediate();
2041    switch (addrmode) {
2042      case PreIndex: {
2043        // Allow using the destinations as a scratch registers if possible.
2044        UseScratchRegisterScope temps(this);
2045        if (type == kLdrd) {
2046          if (!rt.Is(rn)) temps.Include(rt);
2047          if (!rt2.Is(rn)) temps.Include(rt2);
2048        }
2049
2050        // Pre-Indexed case:
2051        // ldrd r0, r1, [r2, 12345]! will translate into
2052        //   add r2, 12345
2053        //   ldrd r0, r1, [r2]
2054        {
2055          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2056          add(cond, rn, rn, offset);
2057        }
2058        {
2059          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2060          (this->*instruction)(cond, rt, rt2, MemOperand(rn, Offset));
2061        }
2062        return;
2063      }
2064      case Offset: {
2065        UseScratchRegisterScope temps(this);
2066        // Allow using the destinations as a scratch registers if possible.
2067        if (type == kLdrd) {
2068          if (!rt.Is(rn)) temps.Include(rt);
2069          if (!rt2.Is(rn)) temps.Include(rt2);
2070        }
2071        Register scratch = temps.Acquire();
2072        // Offset case:
2073        // ldrd r0, r1, [r2, 12345] will translate into
2074        //   add r0, r2, 12345
2075        //   ldrd r0, r1, [r0]
2076        {
2077          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2078          add(cond, scratch, rn, offset);
2079        }
2080        {
2081          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2082          (this->*instruction)(cond, rt, rt2, MemOperand(scratch, Offset));
2083        }
2084        return;
2085      }
2086      case PostIndex:
2087        // Avoid the unpredictable case 'ldrd r0, r1, [r0], imm'
2088        if (!rn.Is(rt) && !rn.Is(rt2)) {
2089          // Post-indexed case:
2090          // ldrd r0, r1, [r2], imm32 will translate into
2091          //   ldrd r0, r1, [r2]
2092          //   movw ip. imm32 & 0xffffffff
2093          //   movt ip, imm32 >> 16
2094          //   add r2, ip
2095          {
2096            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2097            (this->*instruction)(cond, rt, rt2, MemOperand(rn, Offset));
2098          }
2099          {
2100            CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2101            add(cond, rn, rn, offset);
2102          }
2103          return;
2104        }
2105        break;
2106    }
2107  }
2108  if (operand.IsPlainRegister()) {
2109    const Register& rn = operand.GetBaseRegister();
2110    const Register& rm = operand.GetOffsetRegister();
2111    AddrMode addrmode = operand.GetAddrMode();
2112    switch (addrmode) {
2113      case PreIndex:
2114        // ldrd r0, r1, [r2, r3]! will translate into
2115        //   add r2, r3
2116        //   ldrd r0, r1, [r2]
2117        {
2118          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2119          if (operand.GetSign().IsPlus()) {
2120            add(cond, rn, rn, rm);
2121          } else {
2122            sub(cond, rn, rn, rm);
2123          }
2124        }
2125        {
2126          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2127          (this->*instruction)(cond, rt, rt2, MemOperand(rn, Offset));
2128        }
2129        return;
2130      case PostIndex:
2131        // ldrd r0, r1, [r2], r3 will translate into
2132        //   ldrd r0, r1, [r2]
2133        //   add r2, r3
2134        {
2135          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2136          (this->*instruction)(cond, rt, rt2, MemOperand(rn, Offset));
2137        }
2138        {
2139          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2140          if (operand.GetSign().IsPlus()) {
2141            add(cond, rn, rn, rm);
2142          } else {
2143            sub(cond, rn, rn, rm);
2144          }
2145        }
2146        return;
2147      case Offset: {
2148        UseScratchRegisterScope temps(this);
2149        // Allow using the destinations as a scratch registers if possible.
2150        if (type == kLdrd) {
2151          if (!rt.Is(rn)) temps.Include(rt);
2152          if (!rt2.Is(rn)) temps.Include(rt2);
2153        }
2154        Register scratch = temps.Acquire();
2155        // Offset case:
2156        // ldrd r0, r1, [r2, r3] will translate into
2157        //   add r0, r2, r3
2158        //   ldrd r0, r1, [r0]
2159        {
2160          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2161          if (operand.GetSign().IsPlus()) {
2162            add(cond, scratch, rn, rm);
2163          } else {
2164            sub(cond, scratch, rn, rm);
2165          }
2166        }
2167        {
2168          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2169          (this->*instruction)(cond, rt, rt2, MemOperand(scratch, Offset));
2170        }
2171        return;
2172      }
2173    }
2174  }
2175  Assembler::Delegate(type, instruction, cond, rt, rt2, operand);
2176}
2177
2178
2179void MacroAssembler::Delegate(InstructionType type,
2180                              InstructionCondDtSMop instruction,
2181                              Condition cond,
2182                              DataType dt,
2183                              SRegister rd,
2184                              const MemOperand& operand) {
2185  // vldr.32 vstr.32
2186  CONTEXT_SCOPE;
2187  if (operand.IsImmediate()) {
2188    const Register& rn = operand.GetBaseRegister();
2189    AddrMode addrmode = operand.GetAddrMode();
2190    int32_t offset = operand.GetOffsetImmediate();
2191    VIXL_ASSERT(((offset > 0) && operand.GetSign().IsPlus()) ||
2192                ((offset < 0) && operand.GetSign().IsMinus()) || (offset == 0));
2193    if (rn.IsPC()) {
2194      VIXL_ABORT_WITH_MSG(
2195          "The MacroAssembler does not convert vldr or vstr with a PC base "
2196          "register.\n");
2197    }
2198    switch (addrmode) {
2199      case PreIndex:
2200        // Pre-Indexed case:
2201        // vldr.32 s0, [r1, 12345]! will translate into
2202        //   add r1, 12345
2203        //   vldr.32 s0, [r1]
2204        if (offset != 0) {
2205          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2206          add(cond, rn, rn, offset);
2207        }
2208        {
2209          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2210          (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
2211        }
2212        return;
2213      case Offset: {
2214        UseScratchRegisterScope temps(this);
2215        Register scratch = temps.Acquire();
2216        // Offset case:
2217        // vldr.32 s0, [r1, 12345] will translate into
2218        //   add ip, r1, 12345
2219        //   vldr.32 s0, [ip]
2220        {
2221          VIXL_ASSERT(offset != 0);
2222          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2223          add(cond, scratch, rn, offset);
2224        }
2225        {
2226          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2227          (this->*instruction)(cond, dt, rd, MemOperand(scratch, Offset));
2228        }
2229        return;
2230      }
2231      case PostIndex:
2232        // Post-indexed case:
2233        // vldr.32 s0, [r1], imm32 will translate into
2234        //   vldr.32 s0, [r1]
2235        //   movw ip. imm32 & 0xffffffff
2236        //   movt ip, imm32 >> 16
2237        //   add r1, ip
2238        {
2239          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2240          (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
2241        }
2242        if (offset != 0) {
2243          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2244          add(cond, rn, rn, offset);
2245        }
2246        return;
2247    }
2248  }
2249  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
2250}
2251
2252
2253void MacroAssembler::Delegate(InstructionType type,
2254                              InstructionCondDtDMop instruction,
2255                              Condition cond,
2256                              DataType dt,
2257                              DRegister rd,
2258                              const MemOperand& operand) {
2259  // vldr.64 vstr.64
2260  CONTEXT_SCOPE;
2261  if (operand.IsImmediate()) {
2262    const Register& rn = operand.GetBaseRegister();
2263    AddrMode addrmode = operand.GetAddrMode();
2264    int32_t offset = operand.GetOffsetImmediate();
2265    VIXL_ASSERT(((offset > 0) && operand.GetSign().IsPlus()) ||
2266                ((offset < 0) && operand.GetSign().IsMinus()) || (offset == 0));
2267    if (rn.IsPC()) {
2268      VIXL_ABORT_WITH_MSG(
2269          "The MacroAssembler does not convert vldr or vstr with a PC base "
2270          "register.\n");
2271    }
2272    switch (addrmode) {
2273      case PreIndex:
2274        // Pre-Indexed case:
2275        // vldr.64 d0, [r1, 12345]! will translate into
2276        //   add r1, 12345
2277        //   vldr.64 d0, [r1]
2278        if (offset != 0) {
2279          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2280          add(cond, rn, rn, offset);
2281        }
2282        {
2283          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2284          (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
2285        }
2286        return;
2287      case Offset: {
2288        UseScratchRegisterScope temps(this);
2289        Register scratch = temps.Acquire();
2290        // Offset case:
2291        // vldr.64 d0, [r1, 12345] will translate into
2292        //   add ip, r1, 12345
2293        //   vldr.32 s0, [ip]
2294        {
2295          VIXL_ASSERT(offset != 0);
2296          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2297          add(cond, scratch, rn, offset);
2298        }
2299        {
2300          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2301          (this->*instruction)(cond, dt, rd, MemOperand(scratch, Offset));
2302        }
2303        return;
2304      }
2305      case PostIndex:
2306        // Post-indexed case:
2307        // vldr.64 d0. [r1], imm32 will translate into
2308        //   vldr.64 d0, [r1]
2309        //   movw ip. imm32 & 0xffffffff
2310        //   movt ip, imm32 >> 16
2311        //   add r1, ip
2312        {
2313          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2314          (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
2315        }
2316        if (offset != 0) {
2317          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2318          add(cond, rn, rn, offset);
2319        }
2320        return;
2321    }
2322  }
2323  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
2324}
2325
2326
2327void MacroAssembler::Delegate(InstructionType type,
2328                              InstructionCondMsrOp instruction,
2329                              Condition cond,
2330                              MaskedSpecialRegister spec_reg,
2331                              const Operand& operand) {
2332  USE(type);
2333  VIXL_ASSERT(type == kMsr);
2334  if (operand.IsImmediate()) {
2335    UseScratchRegisterScope temps(this);
2336    Register scratch = temps.Acquire();
2337    {
2338      CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
2339      mov(cond, scratch, operand);
2340    }
2341    CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2342    msr(cond, spec_reg, scratch);
2343    return;
2344  }
2345  Assembler::Delegate(type, instruction, cond, spec_reg, operand);
2346}
2347
2348#undef CONTEXT_SCOPE
2349#undef TOSTRING
2350#undef STRINGIFY
2351
2352// Start of generated code.
2353// End of generated code.
2354}  // namespace aarch32
2355}  // namespace vixl
2356