macro-assembler-aarch32.cc revision 4a30c5d68ebbc271d6d876d828ffa96db53d8d7c
1// Copyright 2015, VIXL authors
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are met:
6//
7//   * Redistributions of source code must retain the above copyright notice,
8//     this list of conditions and the following disclaimer.
9//   * Redistributions in binary form must reproduce the above copyright
10//     notice, this list of conditions and the following disclaimer in the
11//     documentation and/or other materials provided with the distribution.
12//   * Neither the name of ARM Limited nor the names of its contributors may
13//     be used to endorse or promote products derived from this software
14//     without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26// POSSIBILITY OF SUCH DAMAGE.
27
28#include "aarch32/macro-assembler-aarch32.h"
29
30#define STRINGIFY(x) #x
31#define TOSTRING(x) STRINGIFY(x)
32
33#define CONTEXT_SCOPE \
34  ContextScope context(this, __FILE__ ":" TOSTRING(__LINE__))
35
36namespace vixl {
37namespace aarch32 {
38
39void UseScratchRegisterScope::Open(MacroAssembler* masm) {
40  VIXL_ASSERT((available_ == NULL) && (available_vfp_ == NULL));
41  available_ = masm->GetScratchRegisterList();
42  old_available_ = available_->GetList();
43  available_vfp_ = masm->GetScratchVRegisterList();
44  old_available_vfp_ = available_vfp_->GetList();
45}
46
47
48void UseScratchRegisterScope::Close() {
49  if (available_ != NULL) {
50    available_->SetList(old_available_);
51    available_ = NULL;
52  }
53  if (available_vfp_ != NULL) {
54    available_vfp_->SetList(old_available_vfp_);
55    available_vfp_ = NULL;
56  }
57}
58
59
60bool UseScratchRegisterScope::IsAvailable(const Register& reg) const {
61  VIXL_ASSERT(available_ != NULL);
62  VIXL_ASSERT(reg.IsValid());
63  return available_->Includes(reg);
64}
65
66
67bool UseScratchRegisterScope::IsAvailable(const VRegister& reg) const {
68  VIXL_ASSERT(available_vfp_ != NULL);
69  VIXL_ASSERT(reg.IsValid());
70  return available_vfp_->IncludesAllOf(reg);
71}
72
73
74Register UseScratchRegisterScope::Acquire() {
75  VIXL_ASSERT(available_ != NULL);
76  VIXL_CHECK(!available_->IsEmpty());
77  Register reg = available_->GetFirstAvailableRegister();
78  available_->Remove(reg);
79  return reg;
80}
81
82
83VRegister UseScratchRegisterScope::AcquireV(unsigned size_in_bits) {
84  switch (size_in_bits) {
85    case kSRegSizeInBits:
86      return AcquireS();
87    case kDRegSizeInBits:
88      return AcquireD();
89    case kQRegSizeInBits:
90      return AcquireQ();
91    default:
92      VIXL_UNREACHABLE();
93      return NoVReg;
94  }
95}
96
97
98QRegister UseScratchRegisterScope::AcquireQ() {
99  VIXL_ASSERT(available_vfp_ != NULL);
100  VIXL_CHECK(!available_vfp_->IsEmpty());
101  QRegister reg = available_vfp_->GetFirstAvailableQRegister();
102  available_vfp_->Remove(reg);
103  return reg;
104}
105
106
107DRegister UseScratchRegisterScope::AcquireD() {
108  VIXL_ASSERT(available_vfp_ != NULL);
109  VIXL_CHECK(!available_vfp_->IsEmpty());
110  DRegister reg = available_vfp_->GetFirstAvailableDRegister();
111  available_vfp_->Remove(reg);
112  return reg;
113}
114
115
116SRegister UseScratchRegisterScope::AcquireS() {
117  VIXL_ASSERT(available_vfp_ != NULL);
118  VIXL_CHECK(!available_vfp_->IsEmpty());
119  SRegister reg = available_vfp_->GetFirstAvailableSRegister();
120  available_vfp_->Remove(reg);
121  return reg;
122}
123
124
125void UseScratchRegisterScope::Release(const Register& reg) {
126  VIXL_ASSERT(available_ != NULL);
127  VIXL_ASSERT(reg.IsValid());
128  VIXL_ASSERT(!available_->Includes(reg));
129  available_->Combine(reg);
130}
131
132
133void UseScratchRegisterScope::Release(const VRegister& reg) {
134  VIXL_ASSERT(available_vfp_ != NULL);
135  VIXL_ASSERT(reg.IsValid());
136  VIXL_ASSERT(!available_vfp_->IncludesAliasOf(reg));
137  available_vfp_->Combine(reg);
138}
139
140
141void UseScratchRegisterScope::Include(const RegisterList& list) {
142  VIXL_ASSERT(available_ != NULL);
143  RegisterList excluded_registers(sp, lr, pc);
144  uint32_t mask = list.GetList() & ~excluded_registers.GetList();
145  available_->SetList(available_->GetList() | mask);
146}
147
148
149void UseScratchRegisterScope::Include(const VRegisterList& list) {
150  VIXL_ASSERT(available_vfp_ != NULL);
151  available_vfp_->SetList(available_vfp_->GetList() | list.GetList());
152}
153
154
155void UseScratchRegisterScope::Exclude(const RegisterList& list) {
156  VIXL_ASSERT(available_ != NULL);
157  available_->SetList(available_->GetList() & ~list.GetList());
158}
159
160
161void UseScratchRegisterScope::Exclude(const VRegisterList& list) {
162  VIXL_ASSERT(available_vfp_ != NULL);
163  available_vfp_->SetList(available_vfp_->GetList() & ~list.GetList());
164}
165
166
167void UseScratchRegisterScope::Exclude(const Operand& operand) {
168  if (operand.IsImmediateShiftedRegister()) {
169    Exclude(operand.GetBaseRegister());
170  } else if (operand.IsRegisterShiftedRegister()) {
171    Exclude(operand.GetBaseRegister(), operand.GetShiftRegister());
172  } else {
173    VIXL_ASSERT(operand.IsImmediate());
174  }
175}
176
177
178void UseScratchRegisterScope::ExcludeAll() {
179  if (available_ != NULL) {
180    available_->SetList(0);
181  }
182  if (available_vfp_ != NULL) {
183    available_vfp_->SetList(0);
184  }
185}
186
187
188void VeneerPoolManager::AddLabel(Label* label) {
189  if (last_label_reference_offset_ != 0) {
190    // If the pool grows faster than the instruction stream, we must adjust
191    // the checkpoint to compensate. The veneer pool entries take 32 bits, so
192    // this can only occur when two consecutive 16-bit instructions add veneer
193    // pool entries.
194    // This is typically the case for cbz and cbnz (other forward branches
195    // have a 32 bit variant which is always used).
196    if (last_label_reference_offset_ + 2 * k16BitT32InstructionSizeInBytes ==
197        static_cast<uint32_t>(masm_->GetCursorOffset())) {
198      // We found two 16 bit forward branches generated one after the other.
199      // That means that the pool will grow by one 32-bit branch when
200      // the cursor offset will move forward by only one 16-bit branch.
201      // Update the cbz/cbnz checkpoint to manage the difference.
202      near_checkpoint_ -=
203          k32BitT32InstructionSizeInBytes - k16BitT32InstructionSizeInBytes;
204    }
205  }
206  Label::ForwardReference& back = label->GetBackForwardRef();
207  VIXL_ASSERT(back.GetMaxForwardDistance() >= kCbzCbnzRange);
208  if (!label->IsInVeneerPool()) {
209    if (back.GetMaxForwardDistance() <= kNearLabelRange) {
210      near_labels_.push_back(label);
211      label->SetVeneerPoolManager(this, true);
212    } else {
213      far_labels_.push_back(label);
214      label->SetVeneerPoolManager(this, false);
215    }
216  } else if (back.GetMaxForwardDistance() <= kNearLabelRange) {
217    if (!label->IsNear()) {
218      far_labels_.remove(label);
219      near_labels_.push_back(label);
220      label->SetVeneerPoolManager(this, true);
221    }
222  }
223
224  back.SetIsBranch();
225  last_label_reference_offset_ = back.GetLocation();
226  label->UpdateCheckpoint();
227  Label::Offset tmp = label->GetCheckpoint();
228  if (label->IsNear()) {
229    if (near_checkpoint_ > tmp) near_checkpoint_ = tmp;
230  } else {
231    if (far_checkpoint_ > tmp) far_checkpoint_ = tmp;
232  }
233  // Always compute the global checkpoint as, adding veneers shorten the
234  // literals' checkpoint.
235  masm_->ComputeCheckpoint();
236}
237
238
239void VeneerPoolManager::RemoveLabel(Label* label) {
240  label->ClearVeneerPoolManager();
241  std::list<Label*>& list = label->IsNear() ? near_labels_ : far_labels_;
242  Label::Offset* checkpoint_reference =
243      label->IsNear() ? &near_checkpoint_ : &far_checkpoint_;
244  if (label->GetCheckpoint() == *checkpoint_reference) {
245    // We have to compute checkpoint again.
246    *checkpoint_reference = Label::kMaxOffset;
247    for (std::list<Label*>::iterator it = list.begin(); it != list.end();) {
248      if (*it == label) {
249        it = list.erase(it);
250      } else {
251        *checkpoint_reference =
252            std::min(*checkpoint_reference, (*it)->GetCheckpoint());
253        ++it;
254      }
255    }
256    masm_->ComputeCheckpoint();
257  } else {
258    // We only have to remove the label from the list.
259    list.remove(label);
260  }
261}
262
263
264void VeneerPoolManager::EmitLabel(Label* label, Label::Offset emitted_target) {
265  // Define the veneer.
266  Label veneer;
267  masm_->Bind(&veneer);
268  Label::Offset label_checkpoint = Label::kMaxOffset;
269  // Check all uses of this label.
270  for (Label::ForwardRefList::iterator ref = label->GetFirstForwardRef();
271       ref != label->GetEndForwardRef();) {
272    if (ref->IsBranch()) {
273      if (ref->GetCheckpoint() <= emitted_target) {
274        // Use the veneer.
275        masm_->EncodeLabelFor(*ref, &veneer);
276        ref = label->Erase(ref);
277      } else {
278        // Don't use the veneer => update checkpoint.
279        label_checkpoint = std::min(label_checkpoint, ref->GetCheckpoint());
280        ++ref;
281      }
282    } else {
283      ++ref;
284    }
285  }
286  label->SetCheckpoint(label_checkpoint);
287  if (label->IsNear()) {
288    near_checkpoint_ = std::min(near_checkpoint_, label_checkpoint);
289  } else {
290    far_checkpoint_ = std::min(far_checkpoint_, label_checkpoint);
291  }
292  // Generate the veneer.
293  masm_->B(label);
294}
295
296
297void VeneerPoolManager::Emit(Label::Offset target) {
298  VIXL_ASSERT(!IsBlocked());
299  // Sort labels (regarding their checkpoint) to avoid that a veneer
300  // becomes out of range. Near labels are always sorted as it holds only one
301  // range.
302  far_labels_.sort(Label::CompareLabels);
303  // To avoid too many veneers, generate veneers which will be necessary soon.
304  static const size_t kVeneerEmissionMargin = 1 * KBytes;
305  // To avoid too many veneers, use generated veneers for other not too far
306  // uses.
307  static const size_t kVeneerEmittedMargin = 2 * KBytes;
308  Label::Offset emitted_target = target + kVeneerEmittedMargin;
309  target += kVeneerEmissionMargin;
310  // Reset the checkpoints. They will be computed again in the loop.
311  near_checkpoint_ = Label::kMaxOffset;
312  far_checkpoint_ = Label::kMaxOffset;
313  for (std::list<Label*>::iterator it = near_labels_.begin();
314       it != near_labels_.end();) {
315    Label* label = *it;
316    // Move the label from the near list to the far list as it will be needed in
317    // the far list (as the veneer will generate a far branch).
318    // The label is pushed at the end of the list. The list remains sorted as
319    // we use an unconditional jump which has the biggest range. However, it
320    // wouldn't be a problem if the items at the end of the list were not
321    // sorted as they won't be used by this generation (their range will be
322    // greater than kVeneerEmittedMargin).
323    it = near_labels_.erase(it);
324    far_labels_.push_back(label);
325    label->SetVeneerPoolManager(this, false);
326    EmitLabel(label, emitted_target);
327  }
328  for (std::list<Label*>::iterator it = far_labels_.begin();
329       it != far_labels_.end();) {
330    // The labels are sorted. As soon as a veneer is not needed, we can stop.
331    if ((*it)->GetCheckpoint() > target) {
332      far_checkpoint_ = std::min(far_checkpoint_, (*it)->GetCheckpoint());
333      break;
334    }
335    // Even if we no longer have use of this label, we can keep it in the list
336    // as the next "B" would add it back.
337    EmitLabel(*it, emitted_target);
338    ++it;
339  }
340#ifdef VIXL_DEBUG
341  for (std::list<Label*>::iterator it = near_labels_.begin();
342       it != near_labels_.end();
343       ++it) {
344    VIXL_ASSERT((*it)->GetCheckpoint() >= near_checkpoint_);
345  }
346  for (std::list<Label*>::iterator it = far_labels_.begin();
347       it != far_labels_.end();
348       ++it) {
349    VIXL_ASSERT((*it)->GetCheckpoint() >= far_checkpoint_);
350  }
351#endif
352  masm_->ComputeCheckpoint();
353}
354
355
356// We use a subclass to access the protected `ExactAssemblyScope` constructor
357// giving us control over the pools, and make the constructor private to limit
358// usage to code paths emitting pools.
359class ExactAssemblyScopeWithoutPoolsCheck : public ExactAssemblyScope {
360 private:
361  ExactAssemblyScopeWithoutPoolsCheck(MacroAssembler* masm,
362                                      size_t size,
363                                      SizePolicy size_policy = kExactSize)
364      : ExactAssemblyScope(masm,
365                           size,
366                           size_policy,
367                           ExactAssemblyScope::kIgnorePools) {}
368
369  friend void MacroAssembler::EmitLiteralPool(LiteralPool* const literal_pool,
370                                              EmitOption option);
371
372  // TODO: `PerformEnsureEmit` is `private`, so we have to make the
373  // `MacroAssembler` a friend.
374  friend class MacroAssembler;
375};
376
377
378void MacroAssembler::PerformEnsureEmit(Label::Offset target, uint32_t size) {
379  if (!doing_veneer_pool_generation_) {
380    EmitOption option = kBranchRequired;
381    Label after_pools;
382    Label::Offset literal_target = GetTargetForLiteralEmission();
383    VIXL_ASSERT(literal_target >= 0);
384    bool generate_veneers = target > veneer_pool_manager_.GetCheckpoint();
385    if (target > literal_target) {
386      // We will generate the literal pool. Generate all the veneers which
387      // would become out of range.
388      size_t literal_pool_size = literal_pool_manager_.GetLiteralPoolSize() +
389                                 kMaxInstructionSizeInBytes;
390      VIXL_ASSERT(IsInt32(literal_pool_size));
391      Label::Offset veneers_target =
392          AlignUp(target + static_cast<Label::Offset>(literal_pool_size), 4);
393      VIXL_ASSERT(veneers_target >= 0);
394      if (veneers_target > veneer_pool_manager_.GetCheckpoint()) {
395        generate_veneers = true;
396      }
397    }
398    if (generate_veneers) {
399      {
400        ExactAssemblyScopeWithoutPoolsCheck
401            guard(this,
402                  kMaxInstructionSizeInBytes,
403                  ExactAssemblyScope::kMaximumSize);
404        b(&after_pools);
405      }
406      doing_veneer_pool_generation_ = true;
407      veneer_pool_manager_.Emit(target);
408      doing_veneer_pool_generation_ = false;
409      option = kNoBranchRequired;
410    }
411    // Check if the macro-assembler's internal literal pool should be emitted
412    // to avoid any overflow. If we already generated the veneers, we can
413    // emit the pool (the branch is already done).
414    if ((target > literal_target) || (option == kNoBranchRequired)) {
415      EmitLiteralPool(option);
416    }
417    BindHelper(&after_pools);
418  }
419  if (GetBuffer()->IsManaged()) {
420    bool grow_requested;
421    GetBuffer()->EnsureSpaceFor(size, &grow_requested);
422    if (grow_requested) ComputeCheckpoint();
423  }
424}
425
426
427void MacroAssembler::ComputeCheckpoint() {
428  checkpoint_ = AlignDown(std::min(veneer_pool_manager_.GetCheckpoint(),
429                                   GetTargetForLiteralEmission()),
430                          4);
431  size_t buffer_size = GetBuffer()->GetCapacity();
432  VIXL_ASSERT(IsInt32(buffer_size));
433  Label::Offset buffer_checkpoint = static_cast<Label::Offset>(buffer_size);
434  checkpoint_ = std::min(checkpoint_, buffer_checkpoint);
435}
436
437
438void MacroAssembler::EmitLiteralPool(LiteralPool* const literal_pool,
439                                     EmitOption option) {
440  if (literal_pool->GetSize() > 0) {
441#ifdef VIXL_DEBUG
442    for (LiteralPool::RawLiteralListIterator literal_it =
443             literal_pool->GetFirst();
444         literal_it != literal_pool->GetEnd();
445         literal_it++) {
446      RawLiteral* literal = *literal_it;
447      VIXL_ASSERT(GetCursorOffset() < literal->GetCheckpoint());
448    }
449#endif
450    Label after_literal;
451    if (option == kBranchRequired) {
452      GetBuffer()->EnsureSpaceFor(kMaxInstructionSizeInBytes);
453      VIXL_ASSERT(!AllowAssembler());
454      {
455        ExactAssemblyScopeWithoutPoolsCheck
456            guard(this,
457                  kMaxInstructionSizeInBytes,
458                  ExactAssemblyScope::kMaximumSize);
459        b(&after_literal);
460      }
461    }
462    GetBuffer()->Align();
463    GetBuffer()->EnsureSpaceFor(literal_pool->GetSize());
464    for (LiteralPool::RawLiteralListIterator it = literal_pool->GetFirst();
465         it != literal_pool->GetEnd();
466         it++) {
467      PlaceHelper(*it);
468      GetBuffer()->Align();
469    }
470    if (option == kBranchRequired) BindHelper(&after_literal);
471    literal_pool->Clear();
472  }
473}
474
475
476void MacroAssembler::Switch(Register reg, JumpTableBase* table) {
477  // 32-bit table A32:
478  // adr ip, table
479  // add ip, r1, lsl 2
480  // ldr ip, [ip]
481  // jmp: add pc, pc, ip, lsl 2
482  // table:
483  // .int (case_0 - (jmp + 8)) >> 2
484  // .int (case_1 - (jmp + 8)) >> 2
485  // .int (case_2 - (jmp + 8)) >> 2
486
487  // 16-bit table T32:
488  // adr ip, table
489  // jmp: tbh ip, r1
490  // table:
491  // .short (case_0 - (jmp + 4)) >> 1
492  // .short (case_1 - (jmp + 4)) >> 1
493  // .short (case_2 - (jmp + 4)) >> 1
494  // case_0:
495  //   ...
496  //   b end_switch
497  // case_1:
498  //   ...
499  //   b end_switch
500  // ...
501  // end_switch:
502  Label jump_table;
503  UseScratchRegisterScope temps(this);
504  Register scratch = temps.Acquire();
505  int table_size = AlignUp(table->GetTableSizeInBytes(), 4);
506
507  // Jump to default if reg is not in [0, table->GetLength()[
508  Cmp(reg, table->GetLength());
509  B(ge, table->GetDefaultLabel());
510
511  Adr(scratch, &jump_table);
512  if (IsUsingA32()) {
513    Add(scratch, scratch, Operand(reg, LSL, table->GetOffsetShift()));
514    switch (table->GetOffsetShift()) {
515      case 0:
516        Ldrb(scratch, MemOperand(scratch));
517        break;
518      case 1:
519        Ldrh(scratch, MemOperand(scratch));
520        break;
521      case 2:
522        Ldr(scratch, MemOperand(scratch));
523        break;
524      default:
525        VIXL_ABORT_WITH_MSG("Unsupported jump table size.\n");
526    }
527    // Emit whatever needs to be emitted if we want to
528    // correctly record the position of the branch instruction
529    uint32_t branch_location = GetCursorOffset();
530    table->SetBranchLocation(branch_location + GetArchitectureStatePCOffset());
531    ExactAssemblyScope scope(this,
532                             table_size + kA32InstructionSizeInBytes,
533                             ExactAssemblyScope::kMaximumSize);
534    add(pc, pc, Operand(scratch, LSL, 2));
535    VIXL_ASSERT((GetCursorOffset() - branch_location) == 4);
536    bind(&jump_table);
537    GenerateSwitchTable(table, table_size);
538  } else {
539    // Thumb mode - We have tbb and tbh to do this for 8 or 16bit offsets.
540    //  But for 32bit offsets, we use the same coding as for A32
541    if (table->GetOffsetShift() == 2) {
542      // 32bit offsets
543      Add(scratch, scratch, Operand(reg, LSL, 2));
544      Ldr(scratch, MemOperand(scratch));
545      // Cannot use add pc, pc, r lsl 1 as this is unpredictable in T32,
546      // so let's do the shift before
547      Lsl(scratch, scratch, 1);
548      // Emit whatever needs to be emitted if we want to
549      // correctly record the position of the branch instruction
550      uint32_t branch_location = GetCursorOffset();
551      table->SetBranchLocation(branch_location +
552                               GetArchitectureStatePCOffset());
553      ExactAssemblyScope scope(this,
554                               table_size + kMaxInstructionSizeInBytes,
555                               ExactAssemblyScope::kMaximumSize);
556      add(pc, pc, scratch);
557      // add pc, pc, rm fits in 16bit T2 (except for rm = sp)
558      VIXL_ASSERT((GetCursorOffset() - branch_location) == 2);
559      bind(&jump_table);
560      GenerateSwitchTable(table, table_size);
561    } else {
562      VIXL_ASSERT((table->GetOffsetShift() == 0) ||
563                  (table->GetOffsetShift() == 1));
564      // Emit whatever needs to be emitted if we want to
565      // correctly record the position of the branch instruction
566      uint32_t branch_location = GetCursorOffset();
567      table->SetBranchLocation(branch_location +
568                               GetArchitectureStatePCOffset());
569      ExactAssemblyScope scope(this,
570                               table_size + kMaxInstructionSizeInBytes,
571                               ExactAssemblyScope::kMaximumSize);
572      if (table->GetOffsetShift() == 0) {
573        // 8bit offsets
574        tbb(scratch, reg);
575      } else {
576        // 16bit offsets
577        tbh(scratch, reg);
578      }
579      // tbb/tbh is a 32bit instruction
580      VIXL_ASSERT((GetCursorOffset() - branch_location) == 4);
581      bind(&jump_table);
582      GenerateSwitchTable(table, table_size);
583    }
584  }
585}
586
587
588void MacroAssembler::GenerateSwitchTable(JumpTableBase* table, int table_size) {
589  table->BindTable(GetCursorOffset());
590  for (int i = 0; i < table_size / 4; i++) {
591    GetBuffer()->Emit32(0);
592  }
593}
594
595
596// switch/case/default : case
597// case_index is assumed to be < table->GetLength()
598// which is checked in JumpTable::Link and Table::SetPresenceBit
599void MacroAssembler::Case(JumpTableBase* table, int case_index) {
600  table->Link(this, case_index, GetCursorOffset());
601  table->SetPresenceBitForCase(case_index);
602}
603
604// switch/case/default : default
605void MacroAssembler::Default(JumpTableBase* table) {
606  Bind(table->GetDefaultLabel());
607}
608
609// switch/case/default : break
610void MacroAssembler::Break(JumpTableBase* table) { B(table->GetEndLabel()); }
611
612// switch/case/default : finalize
613// Manage the default path, mosstly. All empty offsets in the jumptable
614// will point to default.
615// All values not in [0, table->GetLength()[ are already pointing here anyway.
616void MacroAssembler::EndSwitch(JumpTableBase* table) { table->Finalize(this); }
617
618void MacroAssembler::HandleOutOfBoundsImmediate(Condition cond,
619                                                Register tmp,
620                                                uint32_t imm) {
621  if (IsUintN(16, imm)) {
622    CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
623    mov(cond, tmp, imm & 0xffff);
624    return;
625  }
626  if (IsUsingT32()) {
627    if (ImmediateT32::IsImmediateT32(~imm)) {
628      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
629      mvn(cond, tmp, ~imm);
630      return;
631    }
632  } else {
633    if (ImmediateA32::IsImmediateA32(~imm)) {
634      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
635      mvn(cond, tmp, ~imm);
636      return;
637    }
638  }
639  CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
640  mov(cond, tmp, imm & 0xffff);
641  movt(cond, tmp, imm >> 16);
642}
643
644
645void MacroAssembler::PadToMinimumBranchRange(Label* label) {
646  const Label::ForwardReference* last_reference = label->GetForwardRefBack();
647  if ((last_reference != NULL) && last_reference->IsUsingT32()) {
648    uint32_t location = last_reference->GetLocation();
649    if (location + k16BitT32InstructionSizeInBytes ==
650        static_cast<uint32_t>(GetCursorOffset())) {
651      uint16_t* instr_ptr = buffer_.GetOffsetAddress<uint16_t*>(location);
652      if ((instr_ptr[0] & kCbzCbnzMask) == kCbzCbnzValue) {
653        VIXL_ASSERT(!InITBlock());
654        // A Cbz or a Cbnz can't jump immediately after the instruction. If the
655        // target is immediately after the Cbz or Cbnz, we insert a nop to
656        // avoid that.
657        EmitT32_16(k16BitT32NopOpcode);
658      }
659    }
660  }
661}
662
663
664MemOperand MacroAssembler::MemOperandComputationHelper(
665    Condition cond,
666    Register scratch,
667    Register base,
668    uint32_t offset,
669    uint32_t extra_offset_mask) {
670  VIXL_ASSERT(!AliasesAvailableScratchRegister(scratch));
671  VIXL_ASSERT(!AliasesAvailableScratchRegister(base));
672  VIXL_ASSERT(allow_macro_instructions_);
673  VIXL_ASSERT(OutsideITBlock());
674
675  // Check for the simple pass-through case.
676  if ((offset & extra_offset_mask) == offset) return MemOperand(base, offset);
677
678  MacroEmissionCheckScope guard(this);
679  ITScope it_scope(this, &cond);
680
681  uint32_t load_store_offset = offset & extra_offset_mask;
682  uint32_t add_offset = offset & ~extra_offset_mask;
683
684  if (base.IsPC()) {
685    // Special handling for PC bases. We must read the PC in the first
686    // instruction (and only in that instruction), and we must also take care to
687    // keep the same address calculation as loads and stores. For T32, that
688    // means using something like ADR, which uses AlignDown(PC, 4).
689
690    // We don't handle positive offsets from PC because the intention is not
691    // clear; does the user expect the offset from the current
692    // GetCursorOffset(), or to allow a certain amount of space after the
693    // instruction?
694    VIXL_ASSERT((offset & 0x80000000) != 0);
695    if (IsUsingT32()) {
696      // T32: make the first instruction "SUB (immediate, from PC)" -- an alias
697      // of ADR -- to get behaviour like loads and stores. This ADR can handle
698      // at least as much offset as the load_store_offset so it can replace it.
699
700      uint32_t sub_pc_offset = (-offset) & 0xfff;
701      load_store_offset = (offset + sub_pc_offset) & extra_offset_mask;
702      add_offset = (offset + sub_pc_offset) & ~extra_offset_mask;
703
704      ExactAssemblyScope scope(this, k32BitT32InstructionSizeInBytes);
705      sub(cond, scratch, base, sub_pc_offset);
706
707      if (add_offset == 0) return MemOperand(scratch, load_store_offset);
708
709      // The rest of the offset can be generated in the usual way.
710      base = scratch;
711    }
712    // A32 can use any SUB instruction, so we don't have to do anything special
713    // here except to ensure that we read the PC first.
714  }
715
716  add(cond, scratch, base, add_offset);
717  return MemOperand(scratch, load_store_offset);
718}
719
720
721uint32_t MacroAssembler::GetOffsetMask(InstructionType type,
722                                       AddrMode addrmode) {
723  switch (type) {
724    case kLdr:
725    case kLdrb:
726    case kStr:
727    case kStrb:
728      if (IsUsingA32() || (addrmode == Offset)) {
729        return 0xfff;
730      } else {
731        return 0xff;
732      }
733    case kLdrsb:
734    case kLdrh:
735    case kLdrsh:
736    case kStrh:
737      if (IsUsingT32() && (addrmode == Offset)) {
738        return 0xfff;
739      } else {
740        return 0xff;
741      }
742    case kVldr:
743    case kVstr:
744      return 0x3fc;
745    case kLdrd:
746    case kStrd:
747      if (IsUsingA32()) {
748        return 0xff;
749      } else {
750        return 0x3fc;
751      }
752    default:
753      VIXL_UNREACHABLE();
754      return 0;
755  }
756}
757
758
759HARDFLOAT void PrintfTrampolineRRRR(
760    const char* format, uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
761  printf(format, a, b, c, d);
762}
763
764
765HARDFLOAT void PrintfTrampolineRRRD(
766    const char* format, uint32_t a, uint32_t b, uint32_t c, double d) {
767  printf(format, a, b, c, d);
768}
769
770
771HARDFLOAT void PrintfTrampolineRRDR(
772    const char* format, uint32_t a, uint32_t b, double c, uint32_t d) {
773  printf(format, a, b, c, d);
774}
775
776
777HARDFLOAT void PrintfTrampolineRRDD(
778    const char* format, uint32_t a, uint32_t b, double c, double d) {
779  printf(format, a, b, c, d);
780}
781
782
783HARDFLOAT void PrintfTrampolineRDRR(
784    const char* format, uint32_t a, double b, uint32_t c, uint32_t d) {
785  printf(format, a, b, c, d);
786}
787
788
789HARDFLOAT void PrintfTrampolineRDRD(
790    const char* format, uint32_t a, double b, uint32_t c, double d) {
791  printf(format, a, b, c, d);
792}
793
794
795HARDFLOAT void PrintfTrampolineRDDR(
796    const char* format, uint32_t a, double b, double c, uint32_t d) {
797  printf(format, a, b, c, d);
798}
799
800
801HARDFLOAT void PrintfTrampolineRDDD(
802    const char* format, uint32_t a, double b, double c, double d) {
803  printf(format, a, b, c, d);
804}
805
806
807HARDFLOAT void PrintfTrampolineDRRR(
808    const char* format, double a, uint32_t b, uint32_t c, uint32_t d) {
809  printf(format, a, b, c, d);
810}
811
812
813HARDFLOAT void PrintfTrampolineDRRD(
814    const char* format, double a, uint32_t b, uint32_t c, double d) {
815  printf(format, a, b, c, d);
816}
817
818
819HARDFLOAT void PrintfTrampolineDRDR(
820    const char* format, double a, uint32_t b, double c, uint32_t d) {
821  printf(format, a, b, c, d);
822}
823
824
825HARDFLOAT void PrintfTrampolineDRDD(
826    const char* format, double a, uint32_t b, double c, double d) {
827  printf(format, a, b, c, d);
828}
829
830
831HARDFLOAT void PrintfTrampolineDDRR(
832    const char* format, double a, double b, uint32_t c, uint32_t d) {
833  printf(format, a, b, c, d);
834}
835
836
837HARDFLOAT void PrintfTrampolineDDRD(
838    const char* format, double a, double b, uint32_t c, double d) {
839  printf(format, a, b, c, d);
840}
841
842
843HARDFLOAT void PrintfTrampolineDDDR(
844    const char* format, double a, double b, double c, uint32_t d) {
845  printf(format, a, b, c, d);
846}
847
848
849HARDFLOAT void PrintfTrampolineDDDD(
850    const char* format, double a, double b, double c, double d) {
851  printf(format, a, b, c, d);
852}
853
854
855void MacroAssembler::Printf(const char* format,
856                            CPURegister reg1,
857                            CPURegister reg2,
858                            CPURegister reg3,
859                            CPURegister reg4) {
860  // Exclude all registers from the available scratch registers, so
861  // that we are able to use ip below.
862  // TODO: Refactor this function to use UseScratchRegisterScope
863  // for temporary registers below.
864  UseScratchRegisterScope scratch(this);
865  scratch.ExcludeAll();
866  if (generate_simulator_code_) {
867    PushRegister(reg4);
868    PushRegister(reg3);
869    PushRegister(reg2);
870    PushRegister(reg1);
871    Push(RegisterList(r0, r1));
872    StringLiteral* format_literal =
873        new StringLiteral(format, RawLiteral::kDeletedOnPlacementByPool);
874    Adr(r0, format_literal);
875    uint32_t args = (reg4.GetType() << 12) | (reg3.GetType() << 8) |
876                    (reg2.GetType() << 4) | reg1.GetType();
877    Mov(r1, args);
878    Hvc(kPrintfCode);
879    Pop(RegisterList(r0, r1));
880    int size = reg4.GetRegSizeInBytes() + reg3.GetRegSizeInBytes() +
881               reg2.GetRegSizeInBytes() + reg1.GetRegSizeInBytes();
882    Drop(size);
883  } else {
884    // Generate on a native platform => 32 bit environment.
885    // Preserve core registers r0-r3, r12, r14
886    const uint32_t saved_registers_mask =
887        kCallerSavedRegistersMask | (1 << r5.GetCode());
888    Push(RegisterList(saved_registers_mask));
889    // Push VFP registers.
890    Vpush(Untyped64, DRegisterList(d0, 8));
891    if (Has32DRegs()) Vpush(Untyped64, DRegisterList(d16, 16));
892    // Search one register which has been saved and which doesn't need to be
893    // printed.
894    RegisterList available_registers(kCallerSavedRegistersMask);
895    if (reg1.GetType() == CPURegister::kRRegister) {
896      available_registers.Remove(Register(reg1.GetCode()));
897    }
898    if (reg2.GetType() == CPURegister::kRRegister) {
899      available_registers.Remove(Register(reg2.GetCode()));
900    }
901    if (reg3.GetType() == CPURegister::kRRegister) {
902      available_registers.Remove(Register(reg3.GetCode()));
903    }
904    if (reg4.GetType() == CPURegister::kRRegister) {
905      available_registers.Remove(Register(reg4.GetCode()));
906    }
907    Register tmp = available_registers.GetFirstAvailableRegister();
908    VIXL_ASSERT(tmp.GetType() == CPURegister::kRRegister);
909    // Push the flags.
910    Mrs(tmp, APSR);
911    Push(tmp);
912    Vmrs(RegisterOrAPSR_nzcv(tmp.GetCode()), FPSCR);
913    Push(tmp);
914    // Push the registers to print on the stack.
915    PushRegister(reg4);
916    PushRegister(reg3);
917    PushRegister(reg2);
918    PushRegister(reg1);
919    int core_count = 1;
920    int vfp_count = 0;
921    uint32_t printf_type = 0;
922    // Pop the registers to print and store them into r1-r3 and/or d0-d3.
923    // Reg4 may stay into the stack if all the register to print are core
924    // registers.
925    PreparePrintfArgument(reg1, &core_count, &vfp_count, &printf_type);
926    PreparePrintfArgument(reg2, &core_count, &vfp_count, &printf_type);
927    PreparePrintfArgument(reg3, &core_count, &vfp_count, &printf_type);
928    PreparePrintfArgument(reg4, &core_count, &vfp_count, &printf_type);
929    // Ensure that the stack is aligned on 8 bytes.
930    And(r5, sp, 0x7);
931    if (core_count == 5) {
932      // One 32 bit argument (reg4) has been left on the stack =>  align the
933      // stack
934      // before the argument.
935      Pop(r0);
936      Sub(sp, sp, r5);
937      Push(r0);
938    } else {
939      Sub(sp, sp, r5);
940    }
941    // Select the right trampoline depending on the arguments.
942    uintptr_t address;
943    switch (printf_type) {
944      case 0:
945        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRRR);
946        break;
947      case 1:
948        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRRR);
949        break;
950      case 2:
951        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDRR);
952        break;
953      case 3:
954        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDRR);
955        break;
956      case 4:
957        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRDR);
958        break;
959      case 5:
960        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRDR);
961        break;
962      case 6:
963        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDDR);
964        break;
965      case 7:
966        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDDR);
967        break;
968      case 8:
969        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRRD);
970        break;
971      case 9:
972        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRRD);
973        break;
974      case 10:
975        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDRD);
976        break;
977      case 11:
978        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDRD);
979        break;
980      case 12:
981        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRDD);
982        break;
983      case 13:
984        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRDD);
985        break;
986      case 14:
987        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDDD);
988        break;
989      case 15:
990        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDDD);
991        break;
992      default:
993        VIXL_UNREACHABLE();
994        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRRR);
995        break;
996    }
997    StringLiteral* format_literal =
998        new StringLiteral(format, RawLiteral::kDeletedOnPlacementByPool);
999    Adr(r0, format_literal);
1000    Mov(ip, Operand::From(address));
1001    Blx(ip);
1002    // If register reg4 was left on the stack => skip it.
1003    if (core_count == 5) Drop(kRegSizeInBytes);
1004    // Restore the stack as it was before alignment.
1005    Add(sp, sp, r5);
1006    // Restore the flags.
1007    Pop(tmp);
1008    Vmsr(FPSCR, tmp);
1009    Pop(tmp);
1010    Msr(APSR_nzcvqg, tmp);
1011    // Restore the regsisters.
1012    if (Has32DRegs()) Vpop(Untyped64, DRegisterList(d16, 16));
1013    Vpop(Untyped64, DRegisterList(d0, 8));
1014    Pop(RegisterList(saved_registers_mask));
1015  }
1016}
1017
1018
1019void MacroAssembler::PushRegister(CPURegister reg) {
1020  switch (reg.GetType()) {
1021    case CPURegister::kNoRegister:
1022      break;
1023    case CPURegister::kRRegister:
1024      Push(Register(reg.GetCode()));
1025      break;
1026    case CPURegister::kSRegister:
1027      Vpush(Untyped32, SRegisterList(SRegister(reg.GetCode())));
1028      break;
1029    case CPURegister::kDRegister:
1030      Vpush(Untyped64, DRegisterList(DRegister(reg.GetCode())));
1031      break;
1032    case CPURegister::kQRegister:
1033      VIXL_UNIMPLEMENTED();
1034      break;
1035  }
1036}
1037
1038
1039void MacroAssembler::PreparePrintfArgument(CPURegister reg,
1040                                           int* core_count,
1041                                           int* vfp_count,
1042                                           uint32_t* printf_type) {
1043  switch (reg.GetType()) {
1044    case CPURegister::kNoRegister:
1045      break;
1046    case CPURegister::kRRegister:
1047      VIXL_ASSERT(*core_count <= 4);
1048      if (*core_count < 4) Pop(Register(*core_count));
1049      *core_count += 1;
1050      break;
1051    case CPURegister::kSRegister:
1052      VIXL_ASSERT(*vfp_count < 4);
1053      *printf_type |= 1 << (*core_count + *vfp_count - 1);
1054      Vpop(Untyped32, SRegisterList(SRegister(*vfp_count * 2)));
1055      Vcvt(F64, F32, DRegister(*vfp_count), SRegister(*vfp_count * 2));
1056      *vfp_count += 1;
1057      break;
1058    case CPURegister::kDRegister:
1059      VIXL_ASSERT(*vfp_count < 4);
1060      *printf_type |= 1 << (*core_count + *vfp_count - 1);
1061      Vpop(Untyped64, DRegisterList(DRegister(*vfp_count)));
1062      *vfp_count += 1;
1063      break;
1064    case CPURegister::kQRegister:
1065      VIXL_UNIMPLEMENTED();
1066      break;
1067  }
1068}
1069
1070
1071void MacroAssembler::Delegate(InstructionType type,
1072                              InstructionCondROp instruction,
1073                              Condition cond,
1074                              Register rn,
1075                              const Operand& operand) {
1076  VIXL_ASSERT((type == kMovt) || (type == kSxtb16) || (type == kTeq) ||
1077              (type == kUxtb16));
1078
1079  if (type == kMovt) {
1080    VIXL_ABORT_WITH_MSG("`Movt` expects a 16-bit immediate.");
1081  }
1082
1083  // This delegate only supports teq with immediates.
1084  CONTEXT_SCOPE;
1085  if ((type == kTeq) && operand.IsImmediate()) {
1086    UseScratchRegisterScope temps(this);
1087    Register scratch = temps.Acquire();
1088    HandleOutOfBoundsImmediate(cond, scratch, operand.GetImmediate());
1089    CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1090    teq(cond, rn, scratch);
1091    return;
1092  }
1093  Assembler::Delegate(type, instruction, cond, rn, operand);
1094}
1095
1096
1097void MacroAssembler::Delegate(InstructionType type,
1098                              InstructionCondSizeROp instruction,
1099                              Condition cond,
1100                              EncodingSize size,
1101                              Register rn,
1102                              const Operand& operand) {
1103  CONTEXT_SCOPE;
1104  VIXL_ASSERT(size.IsBest());
1105  VIXL_ASSERT((type == kCmn) || (type == kCmp) || (type == kMov) ||
1106              (type == kMovs) || (type == kMvn) || (type == kMvns) ||
1107              (type == kSxtb) || (type == kSxth) || (type == kTst) ||
1108              (type == kUxtb) || (type == kUxth));
1109  if (IsUsingT32() && operand.IsRegisterShiftedRegister()) {
1110    VIXL_ASSERT((type != kMov) || (type != kMovs));
1111    InstructionCondRROp shiftop = NULL;
1112    switch (operand.GetShift().GetType()) {
1113      case LSL:
1114        shiftop = &Assembler::lsl;
1115        break;
1116      case LSR:
1117        shiftop = &Assembler::lsr;
1118        break;
1119      case ASR:
1120        shiftop = &Assembler::asr;
1121        break;
1122      case RRX:
1123        // A RegisterShiftedRegister operand cannot have a shift of type RRX.
1124        VIXL_UNREACHABLE();
1125        break;
1126      case ROR:
1127        shiftop = &Assembler::ror;
1128        break;
1129      default:
1130        VIXL_UNREACHABLE();
1131    }
1132    if (shiftop != NULL) {
1133      UseScratchRegisterScope temps(this);
1134      Register scratch = temps.Acquire();
1135      CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
1136      (this->*shiftop)(cond,
1137                       scratch,
1138                       operand.GetBaseRegister(),
1139                       operand.GetShiftRegister());
1140      (this->*instruction)(cond, size, rn, scratch);
1141      return;
1142    }
1143  }
1144  if (operand.IsImmediate()) {
1145    uint32_t imm = operand.GetImmediate();
1146    switch (type) {
1147      case kMov:
1148      case kMovs:
1149        if (!rn.IsPC()) {
1150          // Immediate is too large, but not using PC, so handle with mov{t}.
1151          HandleOutOfBoundsImmediate(cond, rn, imm);
1152          if (type == kMovs) {
1153            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1154            tst(cond, rn, rn);
1155          }
1156          return;
1157        } else if (type == kMov) {
1158          VIXL_ASSERT(IsUsingA32() || cond.Is(al));
1159          // Immediate is too large and using PC, so handle using a temporary
1160          // register.
1161          UseScratchRegisterScope temps(this);
1162          Register scratch = temps.Acquire();
1163          HandleOutOfBoundsImmediate(al, scratch, imm);
1164          EnsureEmitFor(kMaxInstructionSizeInBytes);
1165          bx(cond, scratch);
1166          return;
1167        }
1168        break;
1169      case kCmn:
1170      case kCmp:
1171        if (IsUsingA32() || !rn.IsPC()) {
1172          UseScratchRegisterScope temps(this);
1173          Register scratch = temps.Acquire();
1174          HandleOutOfBoundsImmediate(cond, scratch, imm);
1175          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1176          (this->*instruction)(cond, size, rn, scratch);
1177          return;
1178        }
1179        break;
1180      case kMvn:
1181      case kMvns:
1182        if (!rn.IsPC()) {
1183          UseScratchRegisterScope temps(this);
1184          Register scratch = temps.Acquire();
1185          HandleOutOfBoundsImmediate(cond, scratch, imm);
1186          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1187          (this->*instruction)(cond, size, rn, scratch);
1188          return;
1189        }
1190        break;
1191      case kTst:
1192        if (IsUsingA32() || !rn.IsPC()) {
1193          UseScratchRegisterScope temps(this);
1194          Register scratch = temps.Acquire();
1195          HandleOutOfBoundsImmediate(cond, scratch, imm);
1196          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1197          (this->*instruction)(cond, size, rn, scratch);
1198          return;
1199        }
1200        break;
1201      default:  // kSxtb, Sxth, Uxtb, Uxth
1202        break;
1203    }
1204  }
1205  Assembler::Delegate(type, instruction, cond, size, rn, operand);
1206}
1207
1208
1209void MacroAssembler::Delegate(InstructionType type,
1210                              InstructionCondRROp instruction,
1211                              Condition cond,
1212                              Register rd,
1213                              Register rn,
1214                              const Operand& operand) {
1215  if ((type == kSxtab) || (type == kSxtab16) || (type == kSxtah) ||
1216      (type == kUxtab) || (type == kUxtab16) || (type == kUxtah) ||
1217      (type == kPkhbt) || (type == kPkhtb)) {
1218    UnimplementedDelegate(type);
1219    return;
1220  }
1221
1222  // This delegate only handles the following instructions.
1223  VIXL_ASSERT((type == kOrn) || (type == kOrns) || (type == kRsc) ||
1224              (type == kRscs));
1225  CONTEXT_SCOPE;
1226
1227  // T32 does not support register shifted register operands, emulate it.
1228  if (IsUsingT32() && operand.IsRegisterShiftedRegister()) {
1229    InstructionCondRROp shiftop = NULL;
1230    switch (operand.GetShift().GetType()) {
1231      case LSL:
1232        shiftop = &Assembler::lsl;
1233        break;
1234      case LSR:
1235        shiftop = &Assembler::lsr;
1236        break;
1237      case ASR:
1238        shiftop = &Assembler::asr;
1239        break;
1240      case RRX:
1241        // A RegisterShiftedRegister operand cannot have a shift of type RRX.
1242        VIXL_UNREACHABLE();
1243        break;
1244      case ROR:
1245        shiftop = &Assembler::ror;
1246        break;
1247      default:
1248        VIXL_UNREACHABLE();
1249    }
1250    if (shiftop != NULL) {
1251      UseScratchRegisterScope temps(this);
1252      Register rm = operand.GetBaseRegister();
1253      Register rs = operand.GetShiftRegister();
1254      // Try to use rd as a scratch register. We can do this if it aliases rs or
1255      // rm (because we read them in the first instruction), but not rn.
1256      if (!rd.Is(rn)) temps.Include(rd);
1257      Register scratch = temps.Acquire();
1258      // TODO: The scope length was measured empirically. We should analyse the
1259      // worst-case size and add targetted tests.
1260      CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1261      (this->*shiftop)(cond, scratch, rm, rs);
1262      (this->*instruction)(cond, rd, rn, scratch);
1263      return;
1264    }
1265  }
1266
1267  // T32 does not have a Rsc instruction, negate the lhs input and turn it into
1268  // an Adc. Adc and Rsc are equivalent using a bitwise NOT:
1269  //   adc rd, rn, operand <-> rsc rd, NOT(rn), operand
1270  if (IsUsingT32() && ((type == kRsc) || (type == kRscs))) {
1271    // The RegisterShiftRegister case should have been handled above.
1272    VIXL_ASSERT(!operand.IsRegisterShiftedRegister());
1273    UseScratchRegisterScope temps(this);
1274    // Try to use rd as a scratch register. We can do this if it aliases rn
1275    // (because we read it in the first instruction), but not rm.
1276    temps.Include(rd);
1277    temps.Exclude(operand);
1278    Register negated_rn = temps.Acquire();
1279    {
1280      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1281      mvn(cond, negated_rn, rn);
1282    }
1283    if (type == kRsc) {
1284      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1285      adc(cond, rd, negated_rn, operand);
1286      return;
1287    }
1288    // TODO: We shouldn't have to specify how much space the next instruction
1289    // needs.
1290    CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1291    adcs(cond, rd, negated_rn, operand);
1292    return;
1293  }
1294
1295  // A32 does not have a Orn instruction, negate the rhs input and turn it into
1296  // a Orr.
1297  if (IsUsingA32() && ((type == kOrn) || (type == kOrns))) {
1298    // TODO: orn r0, r1, imm -> orr r0, r1, neg(imm) if doable
1299    //  mvn r0, r2
1300    //  orr r0, r1, r0
1301    Register scratch;
1302    UseScratchRegisterScope temps(this);
1303    // Try to use rd as a scratch register. We can do this if it aliases rs or
1304    // rm (because we read them in the first instruction), but not rn.
1305    if (!rd.Is(rn)) temps.Include(rd);
1306    scratch = temps.Acquire();
1307    {
1308      // TODO: We shouldn't have to specify how much space the next instruction
1309      // needs.
1310      CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1311      mvn(cond, scratch, operand);
1312    }
1313    if (type == kOrns) {
1314      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1315      orrs(cond, rd, rn, scratch);
1316      return;
1317    }
1318    CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1319    orr(cond, rd, rn, scratch);
1320    return;
1321  }
1322  if (operand.IsImmediate()) {
1323    int32_t imm = operand.GetSignedImmediate();
1324
1325    // If the immediate can be encoded when inverted, turn Orn into Orr.
1326    // Otherwise rely on HandleOutOfBoundsImmediate to generate a series of
1327    // mov.
1328    if (IsUsingT32() && ((type == kOrn) || (type == kOrns)) &&
1329        ImmediateT32::IsImmediateT32(~imm)) {
1330      VIXL_ASSERT((type == kOrn) || (type == kOrns));
1331      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1332      switch (type) {
1333        case kOrn:
1334          orr(cond, rd, rn, ~imm);
1335          return;
1336        case kOrns:
1337          orrs(cond, rd, rn, ~imm);
1338          return;
1339        default:
1340          VIXL_UNREACHABLE();
1341          break;
1342      }
1343    } else {
1344      UseScratchRegisterScope temps(this);
1345      // Allow using the destination as a scratch register if possible.
1346      if (!rd.Is(rn)) temps.Include(rd);
1347      Register scratch = temps.Acquire();
1348      HandleOutOfBoundsImmediate(cond, scratch, imm);
1349      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1350      (this->*instruction)(cond, rd, rn, scratch);
1351      return;
1352    }
1353  }
1354  Assembler::Delegate(type, instruction, cond, rd, rn, operand);
1355}
1356
1357
1358void MacroAssembler::Delegate(InstructionType type,
1359                              InstructionCondSizeRL instruction,
1360                              Condition cond,
1361                              EncodingSize size,
1362                              Register rd,
1363                              Label* label) {
1364  VIXL_ASSERT((type == kLdr) || (type == kAdr));
1365
1366  CONTEXT_SCOPE;
1367  VIXL_ASSERT(size.IsBest());
1368
1369  if ((type == kLdr) && label->IsBound()) {
1370    CodeBufferCheckScope scope(this, 5 * kMaxInstructionSizeInBytes);
1371    UseScratchRegisterScope temps(this);
1372    temps.Include(rd);
1373    uint32_t mask = GetOffsetMask(type, Offset);
1374    ldr(rd, MemOperandComputationHelper(cond, temps.Acquire(), label, mask));
1375    return;
1376  }
1377
1378  Assembler::Delegate(type, instruction, cond, size, rd, label);
1379}
1380
1381
1382void MacroAssembler::Delegate(InstructionType type,
1383                              InstructionCondSizeRROp instruction,
1384                              Condition cond,
1385                              EncodingSize size,
1386                              Register rd,
1387                              Register rn,
1388                              const Operand& operand) {
1389  VIXL_ASSERT(
1390      (type == kAdc) || (type == kAdcs) || (type == kAdd) || (type == kAdds) ||
1391      (type == kAnd) || (type == kAnds) || (type == kAsr) || (type == kAsrs) ||
1392      (type == kBic) || (type == kBics) || (type == kEor) || (type == kEors) ||
1393      (type == kLsl) || (type == kLsls) || (type == kLsr) || (type == kLsrs) ||
1394      (type == kOrr) || (type == kOrrs) || (type == kRor) || (type == kRors) ||
1395      (type == kRsb) || (type == kRsbs) || (type == kSbc) || (type == kSbcs) ||
1396      (type == kSub) || (type == kSubs));
1397
1398  CONTEXT_SCOPE;
1399  VIXL_ASSERT(size.IsBest());
1400  if (IsUsingT32() && operand.IsRegisterShiftedRegister()) {
1401    InstructionCondRROp shiftop = NULL;
1402    switch (operand.GetShift().GetType()) {
1403      case LSL:
1404        shiftop = &Assembler::lsl;
1405        break;
1406      case LSR:
1407        shiftop = &Assembler::lsr;
1408        break;
1409      case ASR:
1410        shiftop = &Assembler::asr;
1411        break;
1412      case RRX:
1413        // A RegisterShiftedRegister operand cannot have a shift of type RRX.
1414        VIXL_UNREACHABLE();
1415        break;
1416      case ROR:
1417        shiftop = &Assembler::ror;
1418        break;
1419      default:
1420        VIXL_UNREACHABLE();
1421    }
1422    if (shiftop != NULL) {
1423      UseScratchRegisterScope temps(this);
1424      Register rm = operand.GetBaseRegister();
1425      Register rs = operand.GetShiftRegister();
1426      // Try to use rd as a scratch register. We can do this if it aliases rs or
1427      // rm (because we read them in the first instruction), but not rn.
1428      if (!rd.Is(rn)) temps.Include(rd);
1429      Register scratch = temps.Acquire();
1430      CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
1431      (this->*shiftop)(cond, scratch, rm, rs);
1432      (this->*instruction)(cond, size, rd, rn, scratch);
1433      return;
1434    }
1435  }
1436  if (operand.IsImmediate()) {
1437    int32_t imm = operand.GetSignedImmediate();
1438    if (ImmediateT32::IsImmediateT32(~imm)) {
1439      if (IsUsingT32()) {
1440        switch (type) {
1441          case kOrr:
1442            orn(cond, rd, rn, ~imm);
1443            return;
1444          case kOrrs:
1445            orns(cond, rd, rn, ~imm);
1446            return;
1447          default:
1448            break;
1449        }
1450      }
1451    }
1452    if (imm < 0) {
1453      InstructionCondSizeRROp asmcb = NULL;
1454      // Add and sub are equivalent using an arithmetic negation:
1455      //   add rd, rn, #imm <-> sub rd, rn, - #imm
1456      // Add and sub with carry are equivalent using a bitwise NOT:
1457      //   adc rd, rn, #imm <-> sbc rd, rn, NOT #imm
1458      switch (type) {
1459        case kAdd:
1460          asmcb = &Assembler::sub;
1461          imm = -imm;
1462          break;
1463        case kAdds:
1464          asmcb = &Assembler::subs;
1465          imm = -imm;
1466          break;
1467        case kSub:
1468          asmcb = &Assembler::add;
1469          imm = -imm;
1470          break;
1471        case kSubs:
1472          asmcb = &Assembler::adds;
1473          imm = -imm;
1474          break;
1475        case kAdc:
1476          asmcb = &Assembler::sbc;
1477          imm = ~imm;
1478          break;
1479        case kAdcs:
1480          asmcb = &Assembler::sbcs;
1481          imm = ~imm;
1482          break;
1483        case kSbc:
1484          asmcb = &Assembler::adc;
1485          imm = ~imm;
1486          break;
1487        case kSbcs:
1488          asmcb = &Assembler::adcs;
1489          imm = ~imm;
1490          break;
1491        default:
1492          break;
1493      }
1494      if (asmcb != NULL) {
1495        CodeBufferCheckScope scope(this, 4 * kMaxInstructionSizeInBytes);
1496        (this->*asmcb)(cond, size, rd, rn, Operand(imm));
1497        return;
1498      }
1499    }
1500    UseScratchRegisterScope temps(this);
1501    // Allow using the destination as a scratch register if possible.
1502    if (!rd.Is(rn)) temps.Include(rd);
1503
1504    if (rn.IsPC()) {
1505      // If we're reading the PC, we need to do it in the first instruction,
1506      // otherwise we'll read the wrong value. We rely on this to handle the
1507      // long-range PC-relative MemOperands which can result from user-managed
1508      // literals.
1509
1510      // Only handle negative offsets. The correct way to handle positive
1511      // offsets isn't clear; does the user want the offset from the start of
1512      // the macro, or from the end (to allow a certain amount of space)?
1513      bool offset_is_negative_or_zero = (imm <= 0);
1514      switch (type) {
1515        case kAdd:
1516        case kAdds:
1517          offset_is_negative_or_zero = (imm <= 0);
1518          break;
1519        case kSub:
1520        case kSubs:
1521          offset_is_negative_or_zero = (imm >= 0);
1522          break;
1523        case kAdc:
1524        case kAdcs:
1525          offset_is_negative_or_zero = (imm < 0);
1526          break;
1527        case kSbc:
1528        case kSbcs:
1529          offset_is_negative_or_zero = (imm > 0);
1530          break;
1531        default:
1532          break;
1533      }
1534      if (offset_is_negative_or_zero) {
1535        {
1536          rn = temps.Acquire();
1537          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1538          mov(cond, rn, pc);
1539        }
1540        // Recurse rather than falling through, to try to get the immediate into
1541        // a single instruction.
1542        CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1543        (this->*instruction)(cond, size, rd, rn, operand);
1544        return;
1545      }
1546    } else {
1547      Register scratch = temps.Acquire();
1548      // TODO: The scope length was measured empirically. We should analyse the
1549      // worst-case size and add targetted tests.
1550      CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1551      mov(cond, scratch, operand.GetImmediate());
1552      (this->*instruction)(cond, size, rd, rn, scratch);
1553      return;
1554    }
1555  }
1556  Assembler::Delegate(type, instruction, cond, size, rd, rn, operand);
1557}
1558
1559
1560void MacroAssembler::Delegate(InstructionType type,
1561                              InstructionRL instruction,
1562                              Register rn,
1563                              Label* label) {
1564  VIXL_ASSERT((type == kCbz) || (type == kCbnz));
1565
1566  CONTEXT_SCOPE;
1567  CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
1568  if (IsUsingA32()) {
1569    if (type == kCbz) {
1570      VIXL_ABORT_WITH_MSG("Cbz is only available for T32.\n");
1571    } else {
1572      VIXL_ABORT_WITH_MSG("Cbnz is only available for T32.\n");
1573    }
1574  } else if (rn.IsLow()) {
1575    switch (type) {
1576      case kCbnz: {
1577        Label done;
1578        cbz(rn, &done);
1579        b(label);
1580        Bind(&done);
1581        return;
1582      }
1583      case kCbz: {
1584        Label done;
1585        cbnz(rn, &done);
1586        b(label);
1587        Bind(&done);
1588        return;
1589      }
1590      default:
1591        break;
1592    }
1593  }
1594  Assembler::Delegate(type, instruction, rn, label);
1595}
1596
1597
1598template <typename T>
1599static inline bool IsI64BitPattern(T imm) {
1600  for (T mask = 0xff << ((sizeof(T) - 1) * 8); mask != 0; mask >>= 8) {
1601    if (((imm & mask) != mask) && ((imm & mask) != 0)) return false;
1602  }
1603  return true;
1604}
1605
1606
1607template <typename T>
1608static inline bool IsI8BitPattern(T imm) {
1609  uint8_t imm8 = imm & 0xff;
1610  for (unsigned rep = sizeof(T) - 1; rep > 0; rep--) {
1611    imm >>= 8;
1612    if ((imm & 0xff) != imm8) return false;
1613  }
1614  return true;
1615}
1616
1617
1618static inline bool CanBeInverted(uint32_t imm32) {
1619  uint32_t fill8 = 0;
1620
1621  if ((imm32 & 0xffffff00) == 0xffffff00) {
1622    //    11111111 11111111 11111111 abcdefgh
1623    return true;
1624  }
1625  if (((imm32 & 0xff) == 0) || ((imm32 & 0xff) == 0xff)) {
1626    fill8 = imm32 & 0xff;
1627    imm32 >>= 8;
1628    if ((imm32 >> 8) == 0xffff) {
1629      //    11111111 11111111 abcdefgh 00000000
1630      // or 11111111 11111111 abcdefgh 11111111
1631      return true;
1632    }
1633    if ((imm32 & 0xff) == fill8) {
1634      imm32 >>= 8;
1635      if ((imm32 >> 8) == 0xff) {
1636        //    11111111 abcdefgh 00000000 00000000
1637        // or 11111111 abcdefgh 11111111 11111111
1638        return true;
1639      }
1640      if ((fill8 == 0xff) && ((imm32 & 0xff) == 0xff)) {
1641        //    abcdefgh 11111111 11111111 11111111
1642        return true;
1643      }
1644    }
1645  }
1646  return false;
1647}
1648
1649
1650template <typename RES, typename T>
1651static inline RES replicate(T imm) {
1652  VIXL_ASSERT((sizeof(RES) > sizeof(T)) &&
1653              (((sizeof(RES) / sizeof(T)) * sizeof(T)) == sizeof(RES)));
1654  RES res = imm;
1655  for (unsigned i = sizeof(RES) / sizeof(T) - 1; i > 0; i--) {
1656    res = (res << (sizeof(T) * 8)) | imm;
1657  }
1658  return res;
1659}
1660
1661
1662void MacroAssembler::Delegate(InstructionType type,
1663                              InstructionCondDtSSop instruction,
1664                              Condition cond,
1665                              DataType dt,
1666                              SRegister rd,
1667                              const SOperand& operand) {
1668  CONTEXT_SCOPE;
1669  if (type == kVmov) {
1670    if (operand.IsImmediate() && dt.Is(F32)) {
1671      const NeonImmediate& neon_imm = operand.GetNeonImmediate();
1672      if (neon_imm.CanConvert<float>()) {
1673        // movw ip, imm16
1674        // movk ip, imm16
1675        // vmov s0, ip
1676        UseScratchRegisterScope temps(this);
1677        Register scratch = temps.Acquire();
1678        float f = neon_imm.GetImmediate<float>();
1679        // TODO: The scope length was measured empirically. We should analyse
1680        // the
1681        // worst-case size and add targetted tests.
1682        CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1683        mov(cond, scratch, FloatToRawbits(f));
1684        vmov(cond, rd, scratch);
1685        return;
1686      }
1687    }
1688  }
1689  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
1690}
1691
1692
1693void MacroAssembler::Delegate(InstructionType type,
1694                              InstructionCondDtDDop instruction,
1695                              Condition cond,
1696                              DataType dt,
1697                              DRegister rd,
1698                              const DOperand& operand) {
1699  CONTEXT_SCOPE;
1700  if (type == kVmov) {
1701    if (operand.IsImmediate()) {
1702      const NeonImmediate& neon_imm = operand.GetNeonImmediate();
1703      switch (dt.GetValue()) {
1704        case I32:
1705          if (neon_imm.CanConvert<uint32_t>()) {
1706            uint32_t imm = neon_imm.GetImmediate<uint32_t>();
1707            // vmov.i32 d0, 0xabababab will translate into vmov.i8 d0, 0xab
1708            if (IsI8BitPattern(imm)) {
1709              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1710              vmov(cond, I8, rd, imm & 0xff);
1711              return;
1712            }
1713            // vmov.i32 d0, 0xff0000ff will translate into
1714            // vmov.i64 d0, 0xff0000ffff0000ff
1715            if (IsI64BitPattern(imm)) {
1716              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1717              vmov(cond, I64, rd, replicate<uint64_t>(imm));
1718              return;
1719            }
1720            // vmov.i32 d0, 0xffab0000 will translate into
1721            // vmvn.i32 d0, 0x0054ffff
1722            if (cond.Is(al) && CanBeInverted(imm)) {
1723              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1724              vmvn(I32, rd, ~imm);
1725              return;
1726            }
1727          }
1728          break;
1729        case I16:
1730          if (neon_imm.CanConvert<uint16_t>()) {
1731            uint16_t imm = neon_imm.GetImmediate<uint16_t>();
1732            // vmov.i16 d0, 0xabab will translate into vmov.i8 d0, 0xab
1733            if (IsI8BitPattern(imm)) {
1734              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1735              vmov(cond, I8, rd, imm & 0xff);
1736              return;
1737            }
1738          }
1739          break;
1740        case I64:
1741          if (neon_imm.CanConvert<uint64_t>()) {
1742            uint64_t imm = neon_imm.GetImmediate<uint64_t>();
1743            // vmov.i64 d0, -1 will translate into vmov.i8 d0, 0xff
1744            if (IsI8BitPattern(imm)) {
1745              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1746              vmov(cond, I8, rd, imm & 0xff);
1747              return;
1748            }
1749            // mov ip, lo(imm64)
1750            // vdup d0, ip
1751            // vdup is prefered to 'vmov d0[0]' as d0[1] does not need to be
1752            // preserved
1753            {
1754              UseScratchRegisterScope temps(this);
1755              Register scratch = temps.Acquire();
1756              {
1757                // TODO: The scope length was measured empirically. We should
1758                // analyse the
1759                // worst-case size and add targetted tests.
1760                CodeBufferCheckScope scope(this,
1761                                           2 * kMaxInstructionSizeInBytes);
1762                mov(cond, scratch, static_cast<uint32_t>(imm & 0xffffffff));
1763              }
1764              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1765              vdup(cond, Untyped32, rd, scratch);
1766            }
1767            // mov ip, hi(imm64)
1768            // vmov d0[1], ip
1769            {
1770              UseScratchRegisterScope temps(this);
1771              Register scratch = temps.Acquire();
1772              {
1773                // TODO: The scope length was measured empirically. We should
1774                // analyse the
1775                // worst-case size and add targetted tests.
1776                CodeBufferCheckScope scope(this,
1777                                           2 * kMaxInstructionSizeInBytes);
1778                mov(cond, scratch, static_cast<uint32_t>(imm >> 32));
1779              }
1780              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1781              vmov(cond, Untyped32, DRegisterLane(rd, 1), scratch);
1782            }
1783            return;
1784          }
1785          break;
1786        default:
1787          break;
1788      }
1789      VIXL_ASSERT(!dt.Is(I8));  // I8 cases should have been handled already.
1790      if ((dt.Is(I16) || dt.Is(I32)) && neon_imm.CanConvert<uint32_t>()) {
1791        // mov ip, imm32
1792        // vdup.16 d0, ip
1793        UseScratchRegisterScope temps(this);
1794        Register scratch = temps.Acquire();
1795        {
1796          CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
1797          mov(cond, scratch, neon_imm.GetImmediate<uint32_t>());
1798        }
1799        DataTypeValue vdup_dt = Untyped32;
1800        switch (dt.GetValue()) {
1801          case I16:
1802            vdup_dt = Untyped16;
1803            break;
1804          case I32:
1805            vdup_dt = Untyped32;
1806            break;
1807          default:
1808            VIXL_UNREACHABLE();
1809        }
1810        CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1811        vdup(cond, vdup_dt, rd, scratch);
1812        return;
1813      }
1814      if (dt.Is(F32) && neon_imm.CanConvert<float>()) {
1815        float f = neon_imm.GetImmediate<float>();
1816        // Punt to vmov.i32
1817        // TODO: The scope length was guessed based on the double case below. We
1818        // should analyse the worst-case size and add targetted tests.
1819        CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1820        vmov(cond, I32, rd, FloatToRawbits(f));
1821        return;
1822      }
1823      if (dt.Is(F64) && neon_imm.CanConvert<double>()) {
1824        // Punt to vmov.i64
1825        double d = neon_imm.GetImmediate<double>();
1826        // TODO: The scope length was measured empirically. We should analyse
1827        // the
1828        // worst-case size and add targetted tests.
1829        CodeBufferCheckScope scope(this, 6 * kMaxInstructionSizeInBytes);
1830        vmov(cond, I64, rd, DoubleToRawbits(d));
1831        return;
1832      }
1833    }
1834  }
1835  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
1836}
1837
1838
1839void MacroAssembler::Delegate(InstructionType type,
1840                              InstructionCondDtQQop instruction,
1841                              Condition cond,
1842                              DataType dt,
1843                              QRegister rd,
1844                              const QOperand& operand) {
1845  CONTEXT_SCOPE;
1846  if (type == kVmov) {
1847    if (operand.IsImmediate()) {
1848      const NeonImmediate& neon_imm = operand.GetNeonImmediate();
1849      switch (dt.GetValue()) {
1850        case I32:
1851          if (neon_imm.CanConvert<uint32_t>()) {
1852            uint32_t imm = neon_imm.GetImmediate<uint32_t>();
1853            // vmov.i32 d0, 0xabababab will translate into vmov.i8 d0, 0xab
1854            if (IsI8BitPattern(imm)) {
1855              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1856              vmov(cond, I8, rd, imm & 0xff);
1857              return;
1858            }
1859            // vmov.i32 d0, 0xff0000ff will translate into
1860            // vmov.i64 d0, 0xff0000ffff0000ff
1861            if (IsI64BitPattern(imm)) {
1862              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1863              vmov(cond, I64, rd, replicate<uint64_t>(imm));
1864              return;
1865            }
1866            // vmov.i32 d0, 0xffab0000 will translate into
1867            // vmvn.i32 d0, 0x0054ffff
1868            if (CanBeInverted(imm)) {
1869              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1870              vmvn(cond, I32, rd, ~imm);
1871              return;
1872            }
1873          }
1874          break;
1875        case I16:
1876          if (neon_imm.CanConvert<uint16_t>()) {
1877            uint16_t imm = neon_imm.GetImmediate<uint16_t>();
1878            // vmov.i16 d0, 0xabab will translate into vmov.i8 d0, 0xab
1879            if (IsI8BitPattern(imm)) {
1880              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1881              vmov(cond, I8, rd, imm & 0xff);
1882              return;
1883            }
1884          }
1885          break;
1886        case I64:
1887          if (neon_imm.CanConvert<uint64_t>()) {
1888            uint64_t imm = neon_imm.GetImmediate<uint64_t>();
1889            // vmov.i64 d0, -1 will translate into vmov.i8 d0, 0xff
1890            if (IsI8BitPattern(imm)) {
1891              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1892              vmov(cond, I8, rd, imm & 0xff);
1893              return;
1894            }
1895            // mov ip, lo(imm64)
1896            // vdup q0, ip
1897            // vdup is prefered to 'vmov d0[0]' as d0[1-3] don't need to be
1898            // preserved
1899            {
1900              UseScratchRegisterScope temps(this);
1901              Register scratch = temps.Acquire();
1902              {
1903                CodeBufferCheckScope scope(this,
1904                                           2 * kMaxInstructionSizeInBytes);
1905                mov(cond, scratch, static_cast<uint32_t>(imm & 0xffffffff));
1906              }
1907              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1908              vdup(cond, Untyped32, rd, scratch);
1909            }
1910            // mov ip, hi(imm64)
1911            // vmov.i32 d0[1], ip
1912            // vmov d1, d0
1913            {
1914              UseScratchRegisterScope temps(this);
1915              Register scratch = temps.Acquire();
1916              {
1917                CodeBufferCheckScope scope(this,
1918                                           2 * kMaxInstructionSizeInBytes);
1919                mov(cond, scratch, static_cast<uint32_t>(imm >> 32));
1920              }
1921              {
1922                CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1923                vmov(cond,
1924                     Untyped32,
1925                     DRegisterLane(rd.GetLowDRegister(), 1),
1926                     scratch);
1927              }
1928              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1929              vmov(cond, F64, rd.GetHighDRegister(), rd.GetLowDRegister());
1930            }
1931            return;
1932          }
1933          break;
1934        default:
1935          break;
1936      }
1937      VIXL_ASSERT(!dt.Is(I8));  // I8 cases should have been handled already.
1938      if ((dt.Is(I16) || dt.Is(I32)) && neon_imm.CanConvert<uint32_t>()) {
1939        // mov ip, imm32
1940        // vdup.16 d0, ip
1941        UseScratchRegisterScope temps(this);
1942        Register scratch = temps.Acquire();
1943        {
1944          CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
1945          mov(cond, scratch, neon_imm.GetImmediate<uint32_t>());
1946        }
1947        DataTypeValue vdup_dt = Untyped32;
1948        switch (dt.GetValue()) {
1949          case I16:
1950            vdup_dt = Untyped16;
1951            break;
1952          case I32:
1953            vdup_dt = Untyped32;
1954            break;
1955          default:
1956            VIXL_UNREACHABLE();
1957        }
1958        CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1959        vdup(cond, vdup_dt, rd, scratch);
1960        return;
1961      }
1962      if (dt.Is(F32) && neon_imm.CanConvert<float>()) {
1963        // Punt to vmov.i64
1964        float f = neon_imm.GetImmediate<float>();
1965        CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1966        vmov(cond, I32, rd, FloatToRawbits(f));
1967        return;
1968      }
1969      if (dt.Is(F64) && neon_imm.CanConvert<double>()) {
1970        // Use vmov to create the double in the low D register, then duplicate
1971        // it into the high D register.
1972        double d = neon_imm.GetImmediate<double>();
1973        CodeBufferCheckScope scope(this, 7 * kMaxInstructionSizeInBytes);
1974        vmov(cond, F64, rd.GetLowDRegister(), d);
1975        vmov(cond, F64, rd.GetHighDRegister(), rd.GetLowDRegister());
1976        return;
1977      }
1978    }
1979  }
1980  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
1981}
1982
1983
1984void MacroAssembler::Delegate(InstructionType type,
1985                              InstructionCondRL instruction,
1986                              Condition cond,
1987                              Register rt,
1988                              Label* label) {
1989  VIXL_ASSERT((type == kLdrb) || (type == kLdrh) || (type == kLdrsb) ||
1990              (type == kLdrsh));
1991
1992  CONTEXT_SCOPE;
1993
1994  if (label->IsBound()) {
1995    CodeBufferCheckScope scope(this, 5 * kMaxInstructionSizeInBytes);
1996    UseScratchRegisterScope temps(this);
1997    temps.Include(rt);
1998    Register scratch = temps.Acquire();
1999    uint32_t mask = GetOffsetMask(type, Offset);
2000    switch (type) {
2001      case kLdrb:
2002        ldrb(rt, MemOperandComputationHelper(cond, scratch, label, mask));
2003        return;
2004      case kLdrh:
2005        ldrh(rt, MemOperandComputationHelper(cond, scratch, label, mask));
2006        return;
2007      case kLdrsb:
2008        ldrsb(rt, MemOperandComputationHelper(cond, scratch, label, mask));
2009        return;
2010      case kLdrsh:
2011        ldrsh(rt, MemOperandComputationHelper(cond, scratch, label, mask));
2012        return;
2013      default:
2014        VIXL_UNREACHABLE();
2015    }
2016    return;
2017  }
2018
2019  Assembler::Delegate(type, instruction, cond, rt, label);
2020}
2021
2022
2023void MacroAssembler::Delegate(InstructionType type,
2024                              InstructionCondRRL instruction,
2025                              Condition cond,
2026                              Register rt,
2027                              Register rt2,
2028                              Label* label) {
2029  VIXL_ASSERT(type == kLdrd);
2030
2031  CONTEXT_SCOPE;
2032
2033  if (label->IsBound()) {
2034    CodeBufferCheckScope scope(this, 6 * kMaxInstructionSizeInBytes);
2035    UseScratchRegisterScope temps(this);
2036    temps.Include(rt, rt2);
2037    Register scratch = temps.Acquire();
2038    uint32_t mask = GetOffsetMask(type, Offset);
2039    ldrd(rt, rt2, MemOperandComputationHelper(cond, scratch, label, mask));
2040    return;
2041  }
2042
2043  Assembler::Delegate(type, instruction, cond, rt, rt2, label);
2044}
2045
2046
2047void MacroAssembler::Delegate(InstructionType type,
2048                              InstructionCondSizeRMop instruction,
2049                              Condition cond,
2050                              EncodingSize size,
2051                              Register rd,
2052                              const MemOperand& operand) {
2053  CONTEXT_SCOPE;
2054  VIXL_ASSERT(size.IsBest());
2055  VIXL_ASSERT((type == kLdr) || (type == kLdrb) || (type == kLdrh) ||
2056              (type == kLdrsb) || (type == kLdrsh) || (type == kStr) ||
2057              (type == kStrb) || (type == kStrh));
2058  if (operand.IsImmediate()) {
2059    const Register& rn = operand.GetBaseRegister();
2060    AddrMode addrmode = operand.GetAddrMode();
2061    int32_t offset = operand.GetOffsetImmediate();
2062    uint32_t mask = GetOffsetMask(type, addrmode);
2063    bool negative;
2064    // Try to maximize the offset use by the MemOperand (load_store_offset).
2065    // Add or subtract the part which can't be used by the MemOperand
2066    // (add_sub_offset).
2067    int32_t add_sub_offset;
2068    int32_t load_store_offset;
2069    load_store_offset = offset & mask;
2070    if (offset >= 0) {
2071      negative = false;
2072      add_sub_offset = offset & ~mask;
2073    } else {
2074      negative = true;
2075      add_sub_offset = -offset & ~mask;
2076      if (load_store_offset > 0) add_sub_offset += mask + 1;
2077    }
2078    switch (addrmode) {
2079      case PreIndex:
2080        // Avoid the unpredictable case 'str r0, [r0, imm]!'
2081        if (!rn.Is(rd)) {
2082          // Pre-Indexed case:
2083          // ldr r0, [r1, 12345]! will translate into
2084          //   add r1, r1, 12345
2085          //   ldr r0, [r1]
2086          {
2087            CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2088            if (negative) {
2089              sub(cond, rn, rn, add_sub_offset);
2090            } else {
2091              add(cond, rn, rn, add_sub_offset);
2092            }
2093          }
2094          {
2095            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2096            (this->*instruction)(cond,
2097                                 size,
2098                                 rd,
2099                                 MemOperand(rn, load_store_offset, PreIndex));
2100          }
2101          return;
2102        }
2103        break;
2104      case Offset: {
2105        UseScratchRegisterScope temps(this);
2106        // Allow using the destination as a scratch register if possible.
2107        if ((type != kStr) && (type != kStrb) && (type != kStrh) &&
2108            !rd.Is(rn)) {
2109          temps.Include(rd);
2110        }
2111        Register scratch = temps.Acquire();
2112        // Offset case:
2113        // ldr r0, [r1, 12345] will translate into
2114        //   add r0, r1, 12345
2115        //   ldr r0, [r0]
2116        {
2117          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2118          if (negative) {
2119            sub(cond, scratch, rn, add_sub_offset);
2120          } else {
2121            add(cond, scratch, rn, add_sub_offset);
2122          }
2123        }
2124        {
2125          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2126          (this->*instruction)(cond,
2127                               size,
2128                               rd,
2129                               MemOperand(scratch, load_store_offset));
2130        }
2131        return;
2132      }
2133      case PostIndex:
2134        // Avoid the unpredictable case 'ldr r0, [r0], imm'
2135        if (!rn.Is(rd)) {
2136          // Post-indexed case:
2137          // ldr r0. [r1], imm32 will translate into
2138          //   ldr r0, [r1]
2139          //   movw ip. imm32 & 0xffffffff
2140          //   movt ip, imm32 >> 16
2141          //   add r1, r1, ip
2142          {
2143            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2144            (this->*instruction)(cond,
2145                                 size,
2146                                 rd,
2147                                 MemOperand(rn, load_store_offset, PostIndex));
2148          }
2149          {
2150            CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2151            if (negative) {
2152              sub(cond, rn, rn, add_sub_offset);
2153            } else {
2154              add(cond, rn, rn, add_sub_offset);
2155            }
2156          }
2157          return;
2158        }
2159        break;
2160    }
2161  } else if (operand.IsPlainRegister()) {
2162    const Register& rn = operand.GetBaseRegister();
2163    AddrMode addrmode = operand.GetAddrMode();
2164    const Register& rm = operand.GetOffsetRegister();
2165    if (rm.IsPC()) {
2166      VIXL_ABORT_WITH_MSG(
2167          "The MacroAssembler does not convert loads and stores with a PC "
2168          "offset register.\n");
2169    }
2170    if (rn.IsPC()) {
2171      if (addrmode == Offset) {
2172        if (IsUsingT32()) {
2173          VIXL_ABORT_WITH_MSG(
2174              "The MacroAssembler does not convert loads and stores with a PC "
2175              "base register for T32.\n");
2176        }
2177      } else {
2178        VIXL_ABORT_WITH_MSG(
2179            "The MacroAssembler does not convert loads and stores with a PC "
2180            "base register in pre-index or post-index mode.\n");
2181      }
2182    }
2183    switch (addrmode) {
2184      case PreIndex:
2185        // Avoid the unpredictable case 'str r0, [r0, imm]!'
2186        if (!rn.Is(rd)) {
2187          // Pre-Indexed case:
2188          // ldr r0, [r1, r2]! will translate into
2189          //   add r1, r1, r2
2190          //   ldr r0, [r1]
2191          {
2192            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2193            if (operand.GetSign().IsPlus()) {
2194              add(cond, rn, rn, rm);
2195            } else {
2196              sub(cond, rn, rn, rm);
2197            }
2198          }
2199          {
2200            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2201            (this->*instruction)(cond, size, rd, MemOperand(rn, Offset));
2202          }
2203          return;
2204        }
2205        break;
2206      case Offset: {
2207        UseScratchRegisterScope temps(this);
2208        // Allow using the destination as a scratch register if this is not a
2209        // store.
2210        // Avoid using PC as a temporary as this has side-effects.
2211        if ((type != kStr) && (type != kStrb) && (type != kStrh) &&
2212            !rd.IsPC()) {
2213          temps.Include(rd);
2214        }
2215        Register scratch = temps.Acquire();
2216        // Offset case:
2217        // ldr r0, [r1, r2] will translate into
2218        //   add r0, r1, r2
2219        //   ldr r0, [r0]
2220        {
2221          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2222          if (operand.GetSign().IsPlus()) {
2223            add(cond, scratch, rn, rm);
2224          } else {
2225            sub(cond, scratch, rn, rm);
2226          }
2227        }
2228        {
2229          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2230          (this->*instruction)(cond, size, rd, MemOperand(scratch, Offset));
2231        }
2232        return;
2233      }
2234      case PostIndex:
2235        // Avoid the unpredictable case 'ldr r0, [r0], imm'
2236        if (!rn.Is(rd)) {
2237          // Post-indexed case:
2238          // ldr r0. [r1], r2 will translate into
2239          //   ldr r0, [r1]
2240          //   add r1, r1, r2
2241          {
2242            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2243            (this->*instruction)(cond, size, rd, MemOperand(rn, Offset));
2244          }
2245          {
2246            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2247            if (operand.GetSign().IsPlus()) {
2248              add(cond, rn, rn, rm);
2249            } else {
2250              sub(cond, rn, rn, rm);
2251            }
2252          }
2253          return;
2254        }
2255        break;
2256    }
2257  }
2258  Assembler::Delegate(type, instruction, cond, size, rd, operand);
2259}
2260
2261
2262void MacroAssembler::Delegate(InstructionType type,
2263                              InstructionCondRRMop instruction,
2264                              Condition cond,
2265                              Register rt,
2266                              Register rt2,
2267                              const MemOperand& operand) {
2268  if ((type == kLdaexd) || (type == kLdrexd) || (type == kStlex) ||
2269      (type == kStlexb) || (type == kStlexh) || (type == kStrex) ||
2270      (type == kStrexb) || (type == kStrexh)) {
2271    UnimplementedDelegate(type);
2272    return;
2273  }
2274
2275  VIXL_ASSERT((type == kLdrd) || (type == kStrd));
2276
2277  CONTEXT_SCOPE;
2278
2279  // TODO: Should we allow these cases?
2280  if (IsUsingA32()) {
2281    // The first register needs to be even.
2282    if ((rt.GetCode() & 1) != 0) {
2283      UnimplementedDelegate(type);
2284      return;
2285    }
2286    // Registers need to be adjacent.
2287    if (((rt.GetCode() + 1) % kNumberOfRegisters) != rt2.GetCode()) {
2288      UnimplementedDelegate(type);
2289      return;
2290    }
2291    // LDRD lr, pc [...] is not allowed.
2292    if (rt.Is(lr)) {
2293      UnimplementedDelegate(type);
2294      return;
2295    }
2296  }
2297
2298  if (operand.IsImmediate()) {
2299    const Register& rn = operand.GetBaseRegister();
2300    AddrMode addrmode = operand.GetAddrMode();
2301    int32_t offset = operand.GetOffsetImmediate();
2302    switch (addrmode) {
2303      case PreIndex: {
2304        // Allow using the destinations as a scratch registers if possible.
2305        UseScratchRegisterScope temps(this);
2306        if (type == kLdrd) {
2307          if (!rt.Is(rn)) temps.Include(rt);
2308          if (!rt2.Is(rn)) temps.Include(rt2);
2309        }
2310
2311        // Pre-Indexed case:
2312        // ldrd r0, r1, [r2, 12345]! will translate into
2313        //   add r2, 12345
2314        //   ldrd r0, r1, [r2]
2315        {
2316          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2317          add(cond, rn, rn, offset);
2318        }
2319        {
2320          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2321          (this->*instruction)(cond, rt, rt2, MemOperand(rn, Offset));
2322        }
2323        return;
2324      }
2325      case Offset: {
2326        UseScratchRegisterScope temps(this);
2327        // Allow using the destinations as a scratch registers if possible.
2328        if (type == kLdrd) {
2329          if (!rt.Is(rn)) temps.Include(rt);
2330          if (!rt2.Is(rn)) temps.Include(rt2);
2331        }
2332        Register scratch = temps.Acquire();
2333        // Offset case:
2334        // ldrd r0, r1, [r2, 12345] will translate into
2335        //   add r0, r2, 12345
2336        //   ldrd r0, r1, [r0]
2337        {
2338          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2339          add(cond, scratch, rn, offset);
2340        }
2341        {
2342          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2343          (this->*instruction)(cond, rt, rt2, MemOperand(scratch, Offset));
2344        }
2345        return;
2346      }
2347      case PostIndex:
2348        // Avoid the unpredictable case 'ldrd r0, r1, [r0], imm'
2349        if (!rn.Is(rt) && !rn.Is(rt2)) {
2350          // Post-indexed case:
2351          // ldrd r0, r1, [r2], imm32 will translate into
2352          //   ldrd r0, r1, [r2]
2353          //   movw ip. imm32 & 0xffffffff
2354          //   movt ip, imm32 >> 16
2355          //   add r2, ip
2356          {
2357            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2358            (this->*instruction)(cond, rt, rt2, MemOperand(rn, Offset));
2359          }
2360          {
2361            CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2362            add(cond, rn, rn, offset);
2363          }
2364          return;
2365        }
2366        break;
2367    }
2368  }
2369  if (operand.IsPlainRegister()) {
2370    const Register& rn = operand.GetBaseRegister();
2371    const Register& rm = operand.GetOffsetRegister();
2372    AddrMode addrmode = operand.GetAddrMode();
2373    switch (addrmode) {
2374      case PreIndex:
2375        // ldrd r0, r1, [r2, r3]! will translate into
2376        //   add r2, r3
2377        //   ldrd r0, r1, [r2]
2378        {
2379          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2380          if (operand.GetSign().IsPlus()) {
2381            add(cond, rn, rn, rm);
2382          } else {
2383            sub(cond, rn, rn, rm);
2384          }
2385        }
2386        {
2387          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2388          (this->*instruction)(cond, rt, rt2, MemOperand(rn, Offset));
2389        }
2390        return;
2391      case PostIndex:
2392        // ldrd r0, r1, [r2], r3 will translate into
2393        //   ldrd r0, r1, [r2]
2394        //   add r2, r3
2395        {
2396          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2397          (this->*instruction)(cond, rt, rt2, MemOperand(rn, Offset));
2398        }
2399        {
2400          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2401          if (operand.GetSign().IsPlus()) {
2402            add(cond, rn, rn, rm);
2403          } else {
2404            sub(cond, rn, rn, rm);
2405          }
2406        }
2407        return;
2408      case Offset: {
2409        UseScratchRegisterScope temps(this);
2410        // Allow using the destinations as a scratch registers if possible.
2411        if (type == kLdrd) {
2412          if (!rt.Is(rn)) temps.Include(rt);
2413          if (!rt2.Is(rn)) temps.Include(rt2);
2414        }
2415        Register scratch = temps.Acquire();
2416        // Offset case:
2417        // ldrd r0, r1, [r2, r3] will translate into
2418        //   add r0, r2, r3
2419        //   ldrd r0, r1, [r0]
2420        {
2421          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2422          if (operand.GetSign().IsPlus()) {
2423            add(cond, scratch, rn, rm);
2424          } else {
2425            sub(cond, scratch, rn, rm);
2426          }
2427        }
2428        {
2429          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2430          (this->*instruction)(cond, rt, rt2, MemOperand(scratch, Offset));
2431        }
2432        return;
2433      }
2434    }
2435  }
2436  Assembler::Delegate(type, instruction, cond, rt, rt2, operand);
2437}
2438
2439
2440void MacroAssembler::Delegate(InstructionType type,
2441                              InstructionCondDtSMop instruction,
2442                              Condition cond,
2443                              DataType dt,
2444                              SRegister rd,
2445                              const MemOperand& operand) {
2446  CONTEXT_SCOPE;
2447  if (operand.IsImmediate()) {
2448    const Register& rn = operand.GetBaseRegister();
2449    AddrMode addrmode = operand.GetAddrMode();
2450    int32_t offset = operand.GetOffsetImmediate();
2451    VIXL_ASSERT(((offset > 0) && operand.GetSign().IsPlus()) ||
2452                ((offset < 0) && operand.GetSign().IsMinus()) || (offset == 0));
2453    if (rn.IsPC()) {
2454      VIXL_ABORT_WITH_MSG(
2455          "The MacroAssembler does not convert vldr or vstr with a PC base "
2456          "register.\n");
2457    }
2458    switch (addrmode) {
2459      case PreIndex:
2460        // Pre-Indexed case:
2461        // vldr.32 s0, [r1, 12345]! will translate into
2462        //   add r1, 12345
2463        //   vldr.32 s0, [r1]
2464        if (offset != 0) {
2465          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2466          add(cond, rn, rn, offset);
2467        }
2468        {
2469          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2470          (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
2471        }
2472        return;
2473      case Offset: {
2474        UseScratchRegisterScope temps(this);
2475        Register scratch = temps.Acquire();
2476        // Offset case:
2477        // vldr.32 s0, [r1, 12345] will translate into
2478        //   add ip, r1, 12345
2479        //   vldr.32 s0, [ip]
2480        {
2481          VIXL_ASSERT(offset != 0);
2482          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2483          add(cond, scratch, rn, offset);
2484        }
2485        {
2486          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2487          (this->*instruction)(cond, dt, rd, MemOperand(scratch, Offset));
2488        }
2489        return;
2490      }
2491      case PostIndex:
2492        // Post-indexed case:
2493        // vldr.32 s0, [r1], imm32 will translate into
2494        //   vldr.32 s0, [r1]
2495        //   movw ip. imm32 & 0xffffffff
2496        //   movt ip, imm32 >> 16
2497        //   add r1, ip
2498        {
2499          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2500          (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
2501        }
2502        if (offset != 0) {
2503          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2504          add(cond, rn, rn, offset);
2505        }
2506        return;
2507    }
2508  }
2509  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
2510}
2511
2512
2513void MacroAssembler::Delegate(InstructionType type,
2514                              InstructionCondDtDMop instruction,
2515                              Condition cond,
2516                              DataType dt,
2517                              DRegister rd,
2518                              const MemOperand& operand) {
2519  CONTEXT_SCOPE;
2520  if (operand.IsImmediate()) {
2521    const Register& rn = operand.GetBaseRegister();
2522    AddrMode addrmode = operand.GetAddrMode();
2523    int32_t offset = operand.GetOffsetImmediate();
2524    VIXL_ASSERT(((offset > 0) && operand.GetSign().IsPlus()) ||
2525                ((offset < 0) && operand.GetSign().IsMinus()) || (offset == 0));
2526    if (rn.IsPC()) {
2527      VIXL_ABORT_WITH_MSG(
2528          "The MacroAssembler does not convert vldr or vstr with a PC base "
2529          "register.\n");
2530    }
2531    switch (addrmode) {
2532      case PreIndex:
2533        // Pre-Indexed case:
2534        // vldr.64 d0, [r1, 12345]! will translate into
2535        //   add r1, 12345
2536        //   vldr.64 d0, [r1]
2537        if (offset != 0) {
2538          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2539          add(cond, rn, rn, offset);
2540        }
2541        {
2542          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2543          (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
2544        }
2545        return;
2546      case Offset: {
2547        UseScratchRegisterScope temps(this);
2548        Register scratch = temps.Acquire();
2549        // Offset case:
2550        // vldr.64 d0, [r1, 12345] will translate into
2551        //   add ip, r1, 12345
2552        //   vldr.32 s0, [ip]
2553        {
2554          VIXL_ASSERT(offset != 0);
2555          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2556          add(cond, scratch, rn, offset);
2557        }
2558        {
2559          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2560          (this->*instruction)(cond, dt, rd, MemOperand(scratch, Offset));
2561        }
2562        return;
2563      }
2564      case PostIndex:
2565        // Post-indexed case:
2566        // vldr.64 d0. [r1], imm32 will translate into
2567        //   vldr.64 d0, [r1]
2568        //   movw ip. imm32 & 0xffffffff
2569        //   movt ip, imm32 >> 16
2570        //   add r1, ip
2571        {
2572          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2573          (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
2574        }
2575        if (offset != 0) {
2576          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2577          add(cond, rn, rn, offset);
2578        }
2579        return;
2580    }
2581  }
2582  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
2583}
2584
2585
2586void MacroAssembler::Delegate(InstructionType type,
2587                              InstructionCondMsrOp instruction,
2588                              Condition cond,
2589                              MaskedSpecialRegister spec_reg,
2590                              const Operand& operand) {
2591  USE(type);
2592  VIXL_ASSERT(type == kMsr);
2593  if (operand.IsImmediate()) {
2594    UseScratchRegisterScope temps(this);
2595    Register scratch = temps.Acquire();
2596    {
2597      CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
2598      mov(cond, scratch, operand);
2599    }
2600    CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2601    msr(cond, spec_reg, scratch);
2602    return;
2603  }
2604  Assembler::Delegate(type, instruction, cond, spec_reg, operand);
2605}
2606
2607
2608void MacroAssembler::Delegate(InstructionType type,
2609                              InstructionCondDtDL instruction,
2610                              Condition cond,
2611                              DataType dt,
2612                              DRegister rd,
2613                              Label* label) {
2614  VIXL_ASSERT(type == kVldr);
2615
2616  CONTEXT_SCOPE;
2617
2618  if (label->IsBound()) {
2619    CodeBufferCheckScope scope(this, 5 * kMaxInstructionSizeInBytes);
2620    UseScratchRegisterScope temps(this);
2621    Register scratch = temps.Acquire();
2622    uint32_t mask = GetOffsetMask(type, Offset);
2623    vldr(dt, rd, MemOperandComputationHelper(cond, scratch, label, mask));
2624    return;
2625  }
2626
2627  Assembler::Delegate(type, instruction, cond, dt, rd, label);
2628}
2629
2630
2631void MacroAssembler::Delegate(InstructionType type,
2632                              InstructionCondDtSL instruction,
2633                              Condition cond,
2634                              DataType dt,
2635                              SRegister rd,
2636                              Label* label) {
2637  VIXL_ASSERT(type == kVldr);
2638
2639  CONTEXT_SCOPE;
2640
2641  if (label->IsBound()) {
2642    CodeBufferCheckScope scope(this, 5 * kMaxInstructionSizeInBytes);
2643    UseScratchRegisterScope temps(this);
2644    Register scratch = temps.Acquire();
2645    uint32_t mask = GetOffsetMask(type, Offset);
2646    vldr(dt, rd, MemOperandComputationHelper(cond, scratch, label, mask));
2647    return;
2648  }
2649
2650  Assembler::Delegate(type, instruction, cond, dt, rd, label);
2651}
2652
2653
2654#undef CONTEXT_SCOPE
2655#undef TOSTRING
2656#undef STRINGIFY
2657
2658// Start of generated code.
2659// End of generated code.
2660}  // namespace aarch32
2661}  // namespace vixl
2662