macro-assembler-aarch32.cc revision b2838fee50499a1c7963167b0d8144552f194e15
1// Copyright 2015, VIXL authors
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are met:
6//
7//   * Redistributions of source code must retain the above copyright notice,
8//     this list of conditions and the following disclaimer.
9//   * Redistributions in binary form must reproduce the above copyright
10//     notice, this list of conditions and the following disclaimer in the
11//     documentation and/or other materials provided with the distribution.
12//   * Neither the name of ARM Limited nor the names of its contributors may
13//     be used to endorse or promote products derived from this software
14//     without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26// POSSIBILITY OF SUCH DAMAGE.
27
28#include "aarch32/macro-assembler-aarch32.h"
29
30#define STRINGIFY(x) #x
31#define TOSTRING(x) STRINGIFY(x)
32
33#define CONTEXT_SCOPE \
34  ContextScope context(this, __FILE__ ":" TOSTRING(__LINE__))
35
36namespace vixl {
37namespace aarch32 {
38
39void UseScratchRegisterScope::Open(MacroAssembler* masm) {
40  VIXL_ASSERT(masm_ == NULL);
41  VIXL_ASSERT(masm != NULL);
42  masm_ = masm;
43
44  old_available_ = masm_->GetScratchRegisterList()->GetList();
45  old_available_vfp_ = masm_->GetScratchVRegisterList()->GetList();
46
47  parent_ = masm->GetCurrentScratchRegisterScope();
48  masm->SetCurrentScratchRegisterScope(this);
49}
50
51
52void UseScratchRegisterScope::Close() {
53  if (masm_ != NULL) {
54    // Ensure that scopes nest perfectly, and do not outlive their parents.
55    // This is a run-time check because the order of destruction of objects in
56    // the _same_ scope is implementation-defined, and is likely to change in
57    // optimised builds.
58    VIXL_CHECK(masm_->GetCurrentScratchRegisterScope() == this);
59    masm_->SetCurrentScratchRegisterScope(parent_);
60
61    masm_->GetScratchRegisterList()->SetList(old_available_);
62    masm_->GetScratchVRegisterList()->SetList(old_available_vfp_);
63
64    masm_ = NULL;
65  }
66}
67
68
69bool UseScratchRegisterScope::IsAvailable(const Register& reg) const {
70  VIXL_ASSERT(masm_ != NULL);
71  VIXL_ASSERT(reg.IsValid());
72  return masm_->GetScratchRegisterList()->Includes(reg);
73}
74
75
76bool UseScratchRegisterScope::IsAvailable(const VRegister& reg) const {
77  VIXL_ASSERT(masm_ != NULL);
78  VIXL_ASSERT(reg.IsValid());
79  return masm_->GetScratchVRegisterList()->IncludesAllOf(reg);
80}
81
82
83Register UseScratchRegisterScope::Acquire() {
84  VIXL_ASSERT(masm_ != NULL);
85  Register reg = masm_->GetScratchRegisterList()->GetFirstAvailableRegister();
86  VIXL_CHECK(reg.IsValid());
87  masm_->GetScratchRegisterList()->Remove(reg);
88  return reg;
89}
90
91
92VRegister UseScratchRegisterScope::AcquireV(unsigned size_in_bits) {
93  switch (size_in_bits) {
94    case kSRegSizeInBits:
95      return AcquireS();
96    case kDRegSizeInBits:
97      return AcquireD();
98    case kQRegSizeInBits:
99      return AcquireQ();
100    default:
101      VIXL_UNREACHABLE();
102      return NoVReg;
103  }
104}
105
106
107QRegister UseScratchRegisterScope::AcquireQ() {
108  VIXL_ASSERT(masm_ != NULL);
109  QRegister reg =
110      masm_->GetScratchVRegisterList()->GetFirstAvailableQRegister();
111  VIXL_CHECK(reg.IsValid());
112  masm_->GetScratchVRegisterList()->Remove(reg);
113  return reg;
114}
115
116
117DRegister UseScratchRegisterScope::AcquireD() {
118  VIXL_ASSERT(masm_ != NULL);
119  DRegister reg =
120      masm_->GetScratchVRegisterList()->GetFirstAvailableDRegister();
121  VIXL_CHECK(reg.IsValid());
122  masm_->GetScratchVRegisterList()->Remove(reg);
123  return reg;
124}
125
126
127SRegister UseScratchRegisterScope::AcquireS() {
128  VIXL_ASSERT(masm_ != NULL);
129  SRegister reg =
130      masm_->GetScratchVRegisterList()->GetFirstAvailableSRegister();
131  VIXL_CHECK(reg.IsValid());
132  masm_->GetScratchVRegisterList()->Remove(reg);
133  return reg;
134}
135
136
137void UseScratchRegisterScope::Release(const Register& reg) {
138  VIXL_ASSERT(masm_ != NULL);
139  VIXL_ASSERT(reg.IsValid());
140  VIXL_ASSERT(!masm_->GetScratchRegisterList()->Includes(reg));
141  masm_->GetScratchRegisterList()->Combine(reg);
142}
143
144
145void UseScratchRegisterScope::Release(const VRegister& reg) {
146  VIXL_ASSERT(masm_ != NULL);
147  VIXL_ASSERT(reg.IsValid());
148  VIXL_ASSERT(!masm_->GetScratchVRegisterList()->IncludesAliasOf(reg));
149  masm_->GetScratchVRegisterList()->Combine(reg);
150}
151
152
153void UseScratchRegisterScope::Include(const RegisterList& list) {
154  VIXL_ASSERT(masm_ != NULL);
155  RegisterList excluded_registers(sp, lr, pc);
156  uint32_t mask = list.GetList() & ~excluded_registers.GetList();
157  RegisterList* available = masm_->GetScratchRegisterList();
158  available->SetList(available->GetList() | mask);
159}
160
161
162void UseScratchRegisterScope::Include(const VRegisterList& list) {
163  VIXL_ASSERT(masm_ != NULL);
164  VRegisterList* available = masm_->GetScratchVRegisterList();
165  available->SetList(available->GetList() | list.GetList());
166}
167
168
169void UseScratchRegisterScope::Exclude(const RegisterList& list) {
170  VIXL_ASSERT(masm_ != NULL);
171  RegisterList* available = masm_->GetScratchRegisterList();
172  available->SetList(available->GetList() & ~list.GetList());
173}
174
175
176void UseScratchRegisterScope::Exclude(const VRegisterList& list) {
177  VIXL_ASSERT(masm_ != NULL);
178  VRegisterList* available = masm_->GetScratchVRegisterList();
179  available->SetList(available->GetList() & ~list.GetList());
180}
181
182
183void UseScratchRegisterScope::Exclude(const Operand& operand) {
184  if (operand.IsImmediateShiftedRegister()) {
185    Exclude(operand.GetBaseRegister());
186  } else if (operand.IsRegisterShiftedRegister()) {
187    Exclude(operand.GetBaseRegister(), operand.GetShiftRegister());
188  } else {
189    VIXL_ASSERT(operand.IsImmediate());
190  }
191}
192
193
194void UseScratchRegisterScope::ExcludeAll() {
195  VIXL_ASSERT(masm_ != NULL);
196  masm_->GetScratchRegisterList()->SetList(0);
197  masm_->GetScratchVRegisterList()->SetList(0);
198}
199
200
201void VeneerPoolManager::AddLabel(Label* label) {
202  if (last_label_reference_offset_ != 0) {
203    // If the pool grows faster than the instruction stream, we must adjust
204    // the checkpoint to compensate. The veneer pool entries take 32 bits, so
205    // this can only occur when two consecutive 16-bit instructions add veneer
206    // pool entries.
207    // This is typically the case for cbz and cbnz (other forward branches
208    // have a 32 bit variant which is always used).
209    if (last_label_reference_offset_ + 2 * k16BitT32InstructionSizeInBytes ==
210        static_cast<uint32_t>(masm_->GetCursorOffset())) {
211      // We found two 16 bit forward branches generated one after the other.
212      // That means that the pool will grow by one 32-bit branch when
213      // the cursor offset will move forward by only one 16-bit branch.
214      // Update the near checkpoint margin to manage the difference.
215      near_checkpoint_margin_ +=
216          k32BitT32InstructionSizeInBytes - k16BitT32InstructionSizeInBytes;
217    }
218  }
219  Label::ForwardReference& back = label->GetBackForwardRef();
220  VIXL_ASSERT(back.GetMaxForwardDistance() >= kCbzCbnzRange);
221  if (!label->IsInVeneerPool()) {
222    if (back.GetMaxForwardDistance() <= kNearLabelRange) {
223      near_labels_.push_back(label);
224      label->SetVeneerPoolManager(this, true);
225    } else {
226      far_labels_.push_back(label);
227      label->SetVeneerPoolManager(this, false);
228    }
229  } else if (back.GetMaxForwardDistance() <= kNearLabelRange) {
230    if (!label->IsNear()) {
231      far_labels_.remove(label);
232      near_labels_.push_back(label);
233      label->SetVeneerPoolManager(this, true);
234    }
235  }
236
237  back.SetIsBranch();
238  last_label_reference_offset_ = back.GetLocation();
239  label->UpdateCheckpoint();
240  Label::Offset tmp = label->GetCheckpoint();
241  if (label->IsNear()) {
242    if (near_checkpoint_ > tmp) near_checkpoint_ = tmp;
243    if (max_near_checkpoint_ >= tmp) {
244      // This checkpoint is before some already in the near list. That means
245      // that the veneer (if needed) will be emitted before some of the veneers
246      // already in the list. We adjust the margin with the size of a veneer
247      // branch.
248      near_checkpoint_margin_ += k32BitT32InstructionSizeInBytes;
249    } else {
250      max_near_checkpoint_ = tmp;
251    }
252  } else {
253    if (far_checkpoint_ > tmp) far_checkpoint_ = tmp;
254  }
255  // Always compute the global checkpoint as, adding veneers shorten the
256  // literals' checkpoint.
257  masm_->ComputeCheckpoint();
258}
259
260
261void VeneerPoolManager::RemoveLabel(Label* label) {
262  label->ClearVeneerPoolManager();
263  std::list<Label*>& list = label->IsNear() ? near_labels_ : far_labels_;
264  Label::Offset* checkpoint_reference =
265      label->IsNear() ? &near_checkpoint_ : &far_checkpoint_;
266  if (label->GetCheckpoint() == *checkpoint_reference) {
267    // We have to compute checkpoint again.
268    *checkpoint_reference = Label::kMaxOffset;
269    for (std::list<Label*>::iterator it = list.begin(); it != list.end();) {
270      if (*it == label) {
271        it = list.erase(it);
272      } else {
273        *checkpoint_reference =
274            std::min(*checkpoint_reference, (*it)->GetCheckpoint());
275        ++it;
276      }
277    }
278    masm_->ComputeCheckpoint();
279  } else {
280    // We only have to remove the label from the list.
281    list.remove(label);
282  }
283}
284
285
286void VeneerPoolManager::EmitLabel(Label* label, Label::Offset emitted_target) {
287  // Define the veneer.
288  Label veneer;
289  masm_->Bind(&veneer);
290  Label::Offset label_checkpoint = Label::kMaxOffset;
291  // Check all uses of this label.
292  for (Label::ForwardRefList::iterator ref = label->GetFirstForwardRef();
293       ref != label->GetEndForwardRef();) {
294    if (ref->IsBranch()) {
295      if (ref->GetCheckpoint() <= emitted_target) {
296        // Use the veneer.
297        masm_->EncodeLabelFor(*ref, &veneer);
298        ref = label->Erase(ref);
299      } else {
300        // Don't use the veneer => update checkpoint.
301        label_checkpoint = std::min(label_checkpoint, ref->GetCheckpoint());
302        ++ref;
303      }
304    } else {
305      ++ref;
306    }
307  }
308  label->SetCheckpoint(label_checkpoint);
309  if (label->IsNear()) {
310    near_checkpoint_ = std::min(near_checkpoint_, label_checkpoint);
311  } else {
312    far_checkpoint_ = std::min(far_checkpoint_, label_checkpoint);
313  }
314  // Generate the veneer.
315  masm_->B(label);
316}
317
318
319void VeneerPoolManager::Emit(Label::Offset target) {
320  VIXL_ASSERT(!IsBlocked());
321  // Sort labels (regarding their checkpoint) to avoid that a veneer
322  // becomes out of range.
323  near_labels_.sort(Label::CompareLabels);
324  far_labels_.sort(Label::CompareLabels);
325  // To avoid too many veneers, generate veneers which will be necessary soon.
326  static const size_t kVeneerEmissionMargin = 1 * KBytes;
327  // To avoid too many veneers, use generated veneers for other not too far
328  // uses.
329  static const size_t kVeneerEmittedMargin = 2 * KBytes;
330  Label::Offset emitted_target = target + kVeneerEmittedMargin;
331  target += kVeneerEmissionMargin;
332  // Reset the checkpoints. They will be computed again in the loop.
333  near_checkpoint_ = Label::kMaxOffset;
334  far_checkpoint_ = Label::kMaxOffset;
335  max_near_checkpoint_ = 0;
336  near_checkpoint_margin_ = 0;
337  for (std::list<Label*>::iterator it = near_labels_.begin();
338       it != near_labels_.end();) {
339    Label* label = *it;
340    // Move the label from the near list to the far list as it will be needed in
341    // the far list (as the veneer will generate a far branch).
342    // The label is pushed at the end of the list. The list remains sorted as
343    // we use an unconditional jump which has the biggest range. However, it
344    // wouldn't be a problem if the items at the end of the list were not
345    // sorted as they won't be used by this generation (their range will be
346    // greater than kVeneerEmittedMargin).
347    it = near_labels_.erase(it);
348    far_labels_.push_back(label);
349    label->SetVeneerPoolManager(this, false);
350    EmitLabel(label, emitted_target);
351  }
352  for (std::list<Label*>::iterator it = far_labels_.begin();
353       it != far_labels_.end();) {
354    // The labels are sorted. As soon as a veneer is not needed, we can stop.
355    if ((*it)->GetCheckpoint() > target) {
356      far_checkpoint_ = std::min(far_checkpoint_, (*it)->GetCheckpoint());
357      break;
358    }
359    // Even if we no longer have use of this label, we can keep it in the list
360    // as the next "B" would add it back.
361    EmitLabel(*it, emitted_target);
362    ++it;
363  }
364#ifdef VIXL_DEBUG
365  for (std::list<Label*>::iterator it = near_labels_.begin();
366       it != near_labels_.end();
367       ++it) {
368    VIXL_ASSERT((*it)->GetCheckpoint() >= near_checkpoint_);
369  }
370  for (std::list<Label*>::iterator it = far_labels_.begin();
371       it != far_labels_.end();
372       ++it) {
373    VIXL_ASSERT((*it)->GetCheckpoint() >= far_checkpoint_);
374  }
375#endif
376  masm_->ComputeCheckpoint();
377}
378
379
380// We use a subclass to access the protected `ExactAssemblyScope` constructor
381// giving us control over the pools, and make the constructor private to limit
382// usage to code paths emitting pools.
383class ExactAssemblyScopeWithoutPoolsCheck : public ExactAssemblyScope {
384 private:
385  ExactAssemblyScopeWithoutPoolsCheck(MacroAssembler* masm,
386                                      size_t size,
387                                      SizePolicy size_policy = kExactSize)
388      : ExactAssemblyScope(masm,
389                           size,
390                           size_policy,
391                           ExactAssemblyScope::kIgnorePools) {}
392
393  friend void MacroAssembler::EmitLiteralPool(LiteralPool* const literal_pool,
394                                              EmitOption option);
395
396  // TODO: `PerformEnsureEmit` is `private`, so we have to make the
397  // `MacroAssembler` a friend.
398  friend class MacroAssembler;
399};
400
401
402void MacroAssembler::PerformEnsureEmit(Label::Offset target, uint32_t size) {
403  if (!doing_veneer_pool_generation_) {
404    EmitOption option = kBranchRequired;
405    Label after_pools;
406    Label::Offset literal_target = GetTargetForLiteralEmission();
407    VIXL_ASSERT(literal_target >= 0);
408    bool generate_veneers = target > veneer_pool_manager_.GetCheckpoint();
409    if (target > literal_target) {
410      // We will generate the literal pool. Generate all the veneers which
411      // would become out of range.
412      size_t literal_pool_size = literal_pool_manager_.GetLiteralPoolSize() +
413                                 kMaxInstructionSizeInBytes;
414      VIXL_ASSERT(IsInt32(literal_pool_size));
415      Label::Offset veneers_target =
416          AlignUp(target + static_cast<Label::Offset>(literal_pool_size), 4);
417      VIXL_ASSERT(veneers_target >= 0);
418      if (veneers_target > veneer_pool_manager_.GetCheckpoint()) {
419        generate_veneers = true;
420      }
421    }
422    if (generate_veneers) {
423      {
424        ExactAssemblyScopeWithoutPoolsCheck
425            guard(this,
426                  kMaxInstructionSizeInBytes,
427                  ExactAssemblyScope::kMaximumSize);
428        b(&after_pools);
429      }
430      doing_veneer_pool_generation_ = true;
431      veneer_pool_manager_.Emit(target);
432      doing_veneer_pool_generation_ = false;
433      option = kNoBranchRequired;
434    }
435    // Check if the macro-assembler's internal literal pool should be emitted
436    // to avoid any overflow. If we already generated the veneers, we can
437    // emit the pool (the branch is already done).
438    if ((target > literal_target) || (option == kNoBranchRequired)) {
439      EmitLiteralPool(option);
440    }
441    BindHelper(&after_pools);
442  }
443  if (GetBuffer()->IsManaged()) {
444    bool grow_requested;
445    GetBuffer()->EnsureSpaceFor(size, &grow_requested);
446    if (grow_requested) ComputeCheckpoint();
447  }
448}
449
450
451void MacroAssembler::ComputeCheckpoint() {
452  checkpoint_ = AlignDown(std::min(veneer_pool_manager_.GetCheckpoint(),
453                                   GetTargetForLiteralEmission()),
454                          4);
455  size_t buffer_size = GetBuffer()->GetCapacity();
456  VIXL_ASSERT(IsInt32(buffer_size));
457  Label::Offset buffer_checkpoint = static_cast<Label::Offset>(buffer_size);
458  checkpoint_ = std::min(checkpoint_, buffer_checkpoint);
459}
460
461
462void MacroAssembler::EmitLiteralPool(LiteralPool* const literal_pool,
463                                     EmitOption option) {
464  if (literal_pool->GetSize() > 0) {
465#ifdef VIXL_DEBUG
466    for (LiteralPool::RawLiteralListIterator literal_it =
467             literal_pool->GetFirst();
468         literal_it != literal_pool->GetEnd();
469         literal_it++) {
470      RawLiteral* literal = *literal_it;
471      VIXL_ASSERT(GetCursorOffset() < literal->GetCheckpoint());
472    }
473#endif
474    Label after_literal;
475    if (option == kBranchRequired) {
476      GetBuffer()->EnsureSpaceFor(kMaxInstructionSizeInBytes);
477      VIXL_ASSERT(!AllowAssembler());
478      {
479        ExactAssemblyScopeWithoutPoolsCheck
480            guard(this,
481                  kMaxInstructionSizeInBytes,
482                  ExactAssemblyScope::kMaximumSize);
483        b(&after_literal);
484      }
485    }
486    GetBuffer()->Align();
487    GetBuffer()->EnsureSpaceFor(literal_pool->GetSize());
488    for (LiteralPool::RawLiteralListIterator it = literal_pool->GetFirst();
489         it != literal_pool->GetEnd();
490         it++) {
491      PlaceHelper(*it);
492      GetBuffer()->Align();
493    }
494    if (option == kBranchRequired) BindHelper(&after_literal);
495    literal_pool->Clear();
496  }
497}
498
499
500void MacroAssembler::Switch(Register reg, JumpTableBase* table) {
501  // 32-bit table A32:
502  // adr ip, table
503  // add ip, r1, lsl 2
504  // ldr ip, [ip]
505  // jmp: add pc, pc, ip, lsl 2
506  // table:
507  // .int (case_0 - (jmp + 8)) >> 2
508  // .int (case_1 - (jmp + 8)) >> 2
509  // .int (case_2 - (jmp + 8)) >> 2
510
511  // 16-bit table T32:
512  // adr ip, table
513  // jmp: tbh ip, r1
514  // table:
515  // .short (case_0 - (jmp + 4)) >> 1
516  // .short (case_1 - (jmp + 4)) >> 1
517  // .short (case_2 - (jmp + 4)) >> 1
518  // case_0:
519  //   ...
520  //   b end_switch
521  // case_1:
522  //   ...
523  //   b end_switch
524  // ...
525  // end_switch:
526  Label jump_table;
527  UseScratchRegisterScope temps(this);
528  Register scratch = temps.Acquire();
529  int table_size = AlignUp(table->GetTableSizeInBytes(), 4);
530
531  // Jump to default if reg is not in [0, table->GetLength()[
532  Cmp(reg, table->GetLength());
533  B(ge, table->GetDefaultLabel());
534
535  Adr(scratch, &jump_table);
536  if (IsUsingA32()) {
537    Add(scratch, scratch, Operand(reg, LSL, table->GetOffsetShift()));
538    switch (table->GetOffsetShift()) {
539      case 0:
540        Ldrb(scratch, MemOperand(scratch));
541        break;
542      case 1:
543        Ldrh(scratch, MemOperand(scratch));
544        break;
545      case 2:
546        Ldr(scratch, MemOperand(scratch));
547        break;
548      default:
549        VIXL_ABORT_WITH_MSG("Unsupported jump table size.\n");
550    }
551    // Emit whatever needs to be emitted if we want to
552    // correctly record the position of the branch instruction
553    uint32_t branch_location = GetCursorOffset();
554    table->SetBranchLocation(branch_location + GetArchitectureStatePCOffset());
555    ExactAssemblyScope scope(this,
556                             table_size + kA32InstructionSizeInBytes,
557                             ExactAssemblyScope::kMaximumSize);
558    add(pc, pc, Operand(scratch, LSL, 2));
559    VIXL_ASSERT((GetCursorOffset() - branch_location) == 4);
560    bind(&jump_table);
561    GenerateSwitchTable(table, table_size);
562  } else {
563    // Thumb mode - We have tbb and tbh to do this for 8 or 16bit offsets.
564    //  But for 32bit offsets, we use the same coding as for A32
565    if (table->GetOffsetShift() == 2) {
566      // 32bit offsets
567      Add(scratch, scratch, Operand(reg, LSL, 2));
568      Ldr(scratch, MemOperand(scratch));
569      // Cannot use add pc, pc, r lsl 1 as this is unpredictable in T32,
570      // so let's do the shift before
571      Lsl(scratch, scratch, 1);
572      // Emit whatever needs to be emitted if we want to
573      // correctly record the position of the branch instruction
574      uint32_t branch_location = GetCursorOffset();
575      table->SetBranchLocation(branch_location +
576                               GetArchitectureStatePCOffset());
577      ExactAssemblyScope scope(this,
578                               table_size + kMaxInstructionSizeInBytes,
579                               ExactAssemblyScope::kMaximumSize);
580      add(pc, pc, scratch);
581      // add pc, pc, rm fits in 16bit T2 (except for rm = sp)
582      VIXL_ASSERT((GetCursorOffset() - branch_location) == 2);
583      bind(&jump_table);
584      GenerateSwitchTable(table, table_size);
585    } else {
586      VIXL_ASSERT((table->GetOffsetShift() == 0) ||
587                  (table->GetOffsetShift() == 1));
588      // Emit whatever needs to be emitted if we want to
589      // correctly record the position of the branch instruction
590      uint32_t branch_location = GetCursorOffset();
591      table->SetBranchLocation(branch_location +
592                               GetArchitectureStatePCOffset());
593      ExactAssemblyScope scope(this,
594                               table_size + kMaxInstructionSizeInBytes,
595                               ExactAssemblyScope::kMaximumSize);
596      if (table->GetOffsetShift() == 0) {
597        // 8bit offsets
598        tbb(scratch, reg);
599      } else {
600        // 16bit offsets
601        tbh(scratch, reg);
602      }
603      // tbb/tbh is a 32bit instruction
604      VIXL_ASSERT((GetCursorOffset() - branch_location) == 4);
605      bind(&jump_table);
606      GenerateSwitchTable(table, table_size);
607    }
608  }
609}
610
611
612void MacroAssembler::GenerateSwitchTable(JumpTableBase* table, int table_size) {
613  table->BindTable(GetCursorOffset());
614  for (int i = 0; i < table_size / 4; i++) {
615    GetBuffer()->Emit32(0);
616  }
617}
618
619
620// switch/case/default : case
621// case_index is assumed to be < table->GetLength()
622// which is checked in JumpTable::Link and Table::SetPresenceBit
623void MacroAssembler::Case(JumpTableBase* table, int case_index) {
624  table->Link(this, case_index, GetCursorOffset());
625  table->SetPresenceBitForCase(case_index);
626}
627
628// switch/case/default : default
629void MacroAssembler::Default(JumpTableBase* table) {
630  Bind(table->GetDefaultLabel());
631}
632
633// switch/case/default : break
634void MacroAssembler::Break(JumpTableBase* table) { B(table->GetEndLabel()); }
635
636// switch/case/default : finalize
637// Manage the default path, mosstly. All empty offsets in the jumptable
638// will point to default.
639// All values not in [0, table->GetLength()[ are already pointing here anyway.
640void MacroAssembler::EndSwitch(JumpTableBase* table) { table->Finalize(this); }
641
642void MacroAssembler::HandleOutOfBoundsImmediate(Condition cond,
643                                                Register tmp,
644                                                uint32_t imm) {
645  if (IsUintN(16, imm)) {
646    CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
647    mov(cond, tmp, imm & 0xffff);
648    return;
649  }
650  if (IsUsingT32()) {
651    if (ImmediateT32::IsImmediateT32(~imm)) {
652      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
653      mvn(cond, tmp, ~imm);
654      return;
655    }
656  } else {
657    if (ImmediateA32::IsImmediateA32(~imm)) {
658      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
659      mvn(cond, tmp, ~imm);
660      return;
661    }
662  }
663  CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
664  mov(cond, tmp, imm & 0xffff);
665  movt(cond, tmp, imm >> 16);
666}
667
668
669void MacroAssembler::PadToMinimumBranchRange(Label* label) {
670  const Label::ForwardReference* last_reference = label->GetForwardRefBack();
671  if ((last_reference != NULL) && last_reference->IsUsingT32()) {
672    uint32_t location = last_reference->GetLocation();
673    if (location + k16BitT32InstructionSizeInBytes ==
674        static_cast<uint32_t>(GetCursorOffset())) {
675      uint16_t* instr_ptr = buffer_.GetOffsetAddress<uint16_t*>(location);
676      if ((instr_ptr[0] & kCbzCbnzMask) == kCbzCbnzValue) {
677        VIXL_ASSERT(!InITBlock());
678        // A Cbz or a Cbnz can't jump immediately after the instruction. If the
679        // target is immediately after the Cbz or Cbnz, we insert a nop to
680        // avoid that.
681        EmitT32_16(k16BitT32NopOpcode);
682      }
683    }
684  }
685}
686
687
688MemOperand MacroAssembler::MemOperandComputationHelper(
689    Condition cond,
690    Register scratch,
691    Register base,
692    uint32_t offset,
693    uint32_t extra_offset_mask) {
694  VIXL_ASSERT(!AliasesAvailableScratchRegister(scratch));
695  VIXL_ASSERT(!AliasesAvailableScratchRegister(base));
696  VIXL_ASSERT(allow_macro_instructions_);
697  VIXL_ASSERT(OutsideITBlock());
698
699  // Check for the simple pass-through case.
700  if ((offset & extra_offset_mask) == offset) return MemOperand(base, offset);
701
702  MacroEmissionCheckScope guard(this);
703  ITScope it_scope(this, &cond);
704
705  uint32_t load_store_offset = offset & extra_offset_mask;
706  uint32_t add_offset = offset & ~extra_offset_mask;
707  if ((add_offset != 0) &&
708      (IsModifiedImmediate(offset) || IsModifiedImmediate(-offset))) {
709    load_store_offset = 0;
710    add_offset = offset;
711  }
712
713  if (base.IsPC()) {
714    // Special handling for PC bases. We must read the PC in the first
715    // instruction (and only in that instruction), and we must also take care to
716    // keep the same address calculation as loads and stores. For T32, that
717    // means using something like ADR, which uses AlignDown(PC, 4).
718
719    // We don't handle positive offsets from PC because the intention is not
720    // clear; does the user expect the offset from the current
721    // GetCursorOffset(), or to allow a certain amount of space after the
722    // instruction?
723    VIXL_ASSERT((offset & 0x80000000) != 0);
724    if (IsUsingT32()) {
725      // T32: make the first instruction "SUB (immediate, from PC)" -- an alias
726      // of ADR -- to get behaviour like loads and stores. This ADR can handle
727      // at least as much offset as the load_store_offset so it can replace it.
728
729      uint32_t sub_pc_offset = (-offset) & 0xfff;
730      load_store_offset = (offset + sub_pc_offset) & extra_offset_mask;
731      add_offset = (offset + sub_pc_offset) & ~extra_offset_mask;
732
733      ExactAssemblyScope scope(this, k32BitT32InstructionSizeInBytes);
734      sub(cond, scratch, base, sub_pc_offset);
735
736      if (add_offset == 0) return MemOperand(scratch, load_store_offset);
737
738      // The rest of the offset can be generated in the usual way.
739      base = scratch;
740    }
741    // A32 can use any SUB instruction, so we don't have to do anything special
742    // here except to ensure that we read the PC first.
743  }
744
745  add(cond, scratch, base, add_offset);
746  return MemOperand(scratch, load_store_offset);
747}
748
749
750uint32_t MacroAssembler::GetOffsetMask(InstructionType type,
751                                       AddrMode addrmode) {
752  switch (type) {
753    case kLdr:
754    case kLdrb:
755    case kStr:
756    case kStrb:
757      if (IsUsingA32() || (addrmode == Offset)) {
758        return 0xfff;
759      } else {
760        return 0xff;
761      }
762    case kLdrsb:
763    case kLdrh:
764    case kLdrsh:
765    case kStrh:
766      if (IsUsingT32() && (addrmode == Offset)) {
767        return 0xfff;
768      } else {
769        return 0xff;
770      }
771    case kVldr:
772    case kVstr:
773      return 0x3fc;
774    case kLdrd:
775    case kStrd:
776      if (IsUsingA32()) {
777        return 0xff;
778      } else {
779        return 0x3fc;
780      }
781    default:
782      VIXL_UNREACHABLE();
783      return 0;
784  }
785}
786
787
788HARDFLOAT void PrintfTrampolineRRRR(
789    const char* format, uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
790  printf(format, a, b, c, d);
791}
792
793
794HARDFLOAT void PrintfTrampolineRRRD(
795    const char* format, uint32_t a, uint32_t b, uint32_t c, double d) {
796  printf(format, a, b, c, d);
797}
798
799
800HARDFLOAT void PrintfTrampolineRRDR(
801    const char* format, uint32_t a, uint32_t b, double c, uint32_t d) {
802  printf(format, a, b, c, d);
803}
804
805
806HARDFLOAT void PrintfTrampolineRRDD(
807    const char* format, uint32_t a, uint32_t b, double c, double d) {
808  printf(format, a, b, c, d);
809}
810
811
812HARDFLOAT void PrintfTrampolineRDRR(
813    const char* format, uint32_t a, double b, uint32_t c, uint32_t d) {
814  printf(format, a, b, c, d);
815}
816
817
818HARDFLOAT void PrintfTrampolineRDRD(
819    const char* format, uint32_t a, double b, uint32_t c, double d) {
820  printf(format, a, b, c, d);
821}
822
823
824HARDFLOAT void PrintfTrampolineRDDR(
825    const char* format, uint32_t a, double b, double c, uint32_t d) {
826  printf(format, a, b, c, d);
827}
828
829
830HARDFLOAT void PrintfTrampolineRDDD(
831    const char* format, uint32_t a, double b, double c, double d) {
832  printf(format, a, b, c, d);
833}
834
835
836HARDFLOAT void PrintfTrampolineDRRR(
837    const char* format, double a, uint32_t b, uint32_t c, uint32_t d) {
838  printf(format, a, b, c, d);
839}
840
841
842HARDFLOAT void PrintfTrampolineDRRD(
843    const char* format, double a, uint32_t b, uint32_t c, double d) {
844  printf(format, a, b, c, d);
845}
846
847
848HARDFLOAT void PrintfTrampolineDRDR(
849    const char* format, double a, uint32_t b, double c, uint32_t d) {
850  printf(format, a, b, c, d);
851}
852
853
854HARDFLOAT void PrintfTrampolineDRDD(
855    const char* format, double a, uint32_t b, double c, double d) {
856  printf(format, a, b, c, d);
857}
858
859
860HARDFLOAT void PrintfTrampolineDDRR(
861    const char* format, double a, double b, uint32_t c, uint32_t d) {
862  printf(format, a, b, c, d);
863}
864
865
866HARDFLOAT void PrintfTrampolineDDRD(
867    const char* format, double a, double b, uint32_t c, double d) {
868  printf(format, a, b, c, d);
869}
870
871
872HARDFLOAT void PrintfTrampolineDDDR(
873    const char* format, double a, double b, double c, uint32_t d) {
874  printf(format, a, b, c, d);
875}
876
877
878HARDFLOAT void PrintfTrampolineDDDD(
879    const char* format, double a, double b, double c, double d) {
880  printf(format, a, b, c, d);
881}
882
883
884void MacroAssembler::Printf(const char* format,
885                            CPURegister reg1,
886                            CPURegister reg2,
887                            CPURegister reg3,
888                            CPURegister reg4) {
889  // Exclude all registers from the available scratch registers, so
890  // that we are able to use ip below.
891  // TODO: Refactor this function to use UseScratchRegisterScope
892  // for temporary registers below.
893  UseScratchRegisterScope scratch(this);
894  scratch.ExcludeAll();
895  if (generate_simulator_code_) {
896    PushRegister(reg4);
897    PushRegister(reg3);
898    PushRegister(reg2);
899    PushRegister(reg1);
900    Push(RegisterList(r0, r1));
901    StringLiteral* format_literal =
902        new StringLiteral(format, RawLiteral::kDeletedOnPlacementByPool);
903    Adr(r0, format_literal);
904    uint32_t args = (reg4.GetType() << 12) | (reg3.GetType() << 8) |
905                    (reg2.GetType() << 4) | reg1.GetType();
906    Mov(r1, args);
907    Hvc(kPrintfCode);
908    Pop(RegisterList(r0, r1));
909    int size = reg4.GetRegSizeInBytes() + reg3.GetRegSizeInBytes() +
910               reg2.GetRegSizeInBytes() + reg1.GetRegSizeInBytes();
911    Drop(size);
912  } else {
913    // Generate on a native platform => 32 bit environment.
914    // Preserve core registers r0-r3, r12, r14
915    const uint32_t saved_registers_mask =
916        kCallerSavedRegistersMask | (1 << r5.GetCode());
917    Push(RegisterList(saved_registers_mask));
918    // Push VFP registers.
919    Vpush(Untyped64, DRegisterList(d0, 8));
920    if (Has32DRegs()) Vpush(Untyped64, DRegisterList(d16, 16));
921    // Search one register which has been saved and which doesn't need to be
922    // printed.
923    RegisterList available_registers(kCallerSavedRegistersMask);
924    if (reg1.GetType() == CPURegister::kRRegister) {
925      available_registers.Remove(Register(reg1.GetCode()));
926    }
927    if (reg2.GetType() == CPURegister::kRRegister) {
928      available_registers.Remove(Register(reg2.GetCode()));
929    }
930    if (reg3.GetType() == CPURegister::kRRegister) {
931      available_registers.Remove(Register(reg3.GetCode()));
932    }
933    if (reg4.GetType() == CPURegister::kRRegister) {
934      available_registers.Remove(Register(reg4.GetCode()));
935    }
936    Register tmp = available_registers.GetFirstAvailableRegister();
937    VIXL_ASSERT(tmp.GetType() == CPURegister::kRRegister);
938    // Push the flags.
939    Mrs(tmp, APSR);
940    Push(tmp);
941    Vmrs(RegisterOrAPSR_nzcv(tmp.GetCode()), FPSCR);
942    Push(tmp);
943    // Push the registers to print on the stack.
944    PushRegister(reg4);
945    PushRegister(reg3);
946    PushRegister(reg2);
947    PushRegister(reg1);
948    int core_count = 1;
949    int vfp_count = 0;
950    uint32_t printf_type = 0;
951    // Pop the registers to print and store them into r1-r3 and/or d0-d3.
952    // Reg4 may stay into the stack if all the register to print are core
953    // registers.
954    PreparePrintfArgument(reg1, &core_count, &vfp_count, &printf_type);
955    PreparePrintfArgument(reg2, &core_count, &vfp_count, &printf_type);
956    PreparePrintfArgument(reg3, &core_count, &vfp_count, &printf_type);
957    PreparePrintfArgument(reg4, &core_count, &vfp_count, &printf_type);
958    // Ensure that the stack is aligned on 8 bytes.
959    And(r5, sp, 0x7);
960    if (core_count == 5) {
961      // One 32 bit argument (reg4) has been left on the stack =>  align the
962      // stack
963      // before the argument.
964      Pop(r0);
965      Sub(sp, sp, r5);
966      Push(r0);
967    } else {
968      Sub(sp, sp, r5);
969    }
970    // Select the right trampoline depending on the arguments.
971    uintptr_t address;
972    switch (printf_type) {
973      case 0:
974        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRRR);
975        break;
976      case 1:
977        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRRR);
978        break;
979      case 2:
980        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDRR);
981        break;
982      case 3:
983        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDRR);
984        break;
985      case 4:
986        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRDR);
987        break;
988      case 5:
989        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRDR);
990        break;
991      case 6:
992        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDDR);
993        break;
994      case 7:
995        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDDR);
996        break;
997      case 8:
998        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRRD);
999        break;
1000      case 9:
1001        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRRD);
1002        break;
1003      case 10:
1004        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDRD);
1005        break;
1006      case 11:
1007        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDRD);
1008        break;
1009      case 12:
1010        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRDD);
1011        break;
1012      case 13:
1013        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDRDD);
1014        break;
1015      case 14:
1016        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRDDD);
1017        break;
1018      case 15:
1019        address = reinterpret_cast<uintptr_t>(PrintfTrampolineDDDD);
1020        break;
1021      default:
1022        VIXL_UNREACHABLE();
1023        address = reinterpret_cast<uintptr_t>(PrintfTrampolineRRRR);
1024        break;
1025    }
1026    StringLiteral* format_literal =
1027        new StringLiteral(format, RawLiteral::kDeletedOnPlacementByPool);
1028    Adr(r0, format_literal);
1029    Mov(ip, Operand::From(address));
1030    Blx(ip);
1031    // If register reg4 was left on the stack => skip it.
1032    if (core_count == 5) Drop(kRegSizeInBytes);
1033    // Restore the stack as it was before alignment.
1034    Add(sp, sp, r5);
1035    // Restore the flags.
1036    Pop(tmp);
1037    Vmsr(FPSCR, tmp);
1038    Pop(tmp);
1039    Msr(APSR_nzcvqg, tmp);
1040    // Restore the regsisters.
1041    if (Has32DRegs()) Vpop(Untyped64, DRegisterList(d16, 16));
1042    Vpop(Untyped64, DRegisterList(d0, 8));
1043    Pop(RegisterList(saved_registers_mask));
1044  }
1045}
1046
1047
1048void MacroAssembler::PushRegister(CPURegister reg) {
1049  switch (reg.GetType()) {
1050    case CPURegister::kNoRegister:
1051      break;
1052    case CPURegister::kRRegister:
1053      Push(Register(reg.GetCode()));
1054      break;
1055    case CPURegister::kSRegister:
1056      Vpush(Untyped32, SRegisterList(SRegister(reg.GetCode())));
1057      break;
1058    case CPURegister::kDRegister:
1059      Vpush(Untyped64, DRegisterList(DRegister(reg.GetCode())));
1060      break;
1061    case CPURegister::kQRegister:
1062      VIXL_UNIMPLEMENTED();
1063      break;
1064  }
1065}
1066
1067
1068void MacroAssembler::PreparePrintfArgument(CPURegister reg,
1069                                           int* core_count,
1070                                           int* vfp_count,
1071                                           uint32_t* printf_type) {
1072  switch (reg.GetType()) {
1073    case CPURegister::kNoRegister:
1074      break;
1075    case CPURegister::kRRegister:
1076      VIXL_ASSERT(*core_count <= 4);
1077      if (*core_count < 4) Pop(Register(*core_count));
1078      *core_count += 1;
1079      break;
1080    case CPURegister::kSRegister:
1081      VIXL_ASSERT(*vfp_count < 4);
1082      *printf_type |= 1 << (*core_count + *vfp_count - 1);
1083      Vpop(Untyped32, SRegisterList(SRegister(*vfp_count * 2)));
1084      Vcvt(F64, F32, DRegister(*vfp_count), SRegister(*vfp_count * 2));
1085      *vfp_count += 1;
1086      break;
1087    case CPURegister::kDRegister:
1088      VIXL_ASSERT(*vfp_count < 4);
1089      *printf_type |= 1 << (*core_count + *vfp_count - 1);
1090      Vpop(Untyped64, DRegisterList(DRegister(*vfp_count)));
1091      *vfp_count += 1;
1092      break;
1093    case CPURegister::kQRegister:
1094      VIXL_UNIMPLEMENTED();
1095      break;
1096  }
1097}
1098
1099
1100void MacroAssembler::Delegate(InstructionType type,
1101                              InstructionCondROp instruction,
1102                              Condition cond,
1103                              Register rn,
1104                              const Operand& operand) {
1105  VIXL_ASSERT((type == kMovt) || (type == kSxtb16) || (type == kTeq) ||
1106              (type == kUxtb16));
1107
1108  if (type == kMovt) {
1109    VIXL_ABORT_WITH_MSG("`Movt` expects a 16-bit immediate.\n");
1110  }
1111
1112  // This delegate only supports teq with immediates.
1113  CONTEXT_SCOPE;
1114  if ((type == kTeq) && operand.IsImmediate()) {
1115    UseScratchRegisterScope temps(this);
1116    Register scratch = temps.Acquire();
1117    HandleOutOfBoundsImmediate(cond, scratch, operand.GetImmediate());
1118    CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1119    teq(cond, rn, scratch);
1120    return;
1121  }
1122  Assembler::Delegate(type, instruction, cond, rn, operand);
1123}
1124
1125
1126void MacroAssembler::Delegate(InstructionType type,
1127                              InstructionCondSizeROp instruction,
1128                              Condition cond,
1129                              EncodingSize size,
1130                              Register rn,
1131                              const Operand& operand) {
1132  CONTEXT_SCOPE;
1133  VIXL_ASSERT(size.IsBest());
1134  VIXL_ASSERT((type == kCmn) || (type == kCmp) || (type == kMov) ||
1135              (type == kMovs) || (type == kMvn) || (type == kMvns) ||
1136              (type == kSxtb) || (type == kSxth) || (type == kTst) ||
1137              (type == kUxtb) || (type == kUxth));
1138  if (IsUsingT32() && operand.IsRegisterShiftedRegister()) {
1139    VIXL_ASSERT((type != kMov) || (type != kMovs));
1140    InstructionCondRROp shiftop = NULL;
1141    switch (operand.GetShift().GetType()) {
1142      case LSL:
1143        shiftop = &Assembler::lsl;
1144        break;
1145      case LSR:
1146        shiftop = &Assembler::lsr;
1147        break;
1148      case ASR:
1149        shiftop = &Assembler::asr;
1150        break;
1151      case RRX:
1152        // A RegisterShiftedRegister operand cannot have a shift of type RRX.
1153        VIXL_UNREACHABLE();
1154        break;
1155      case ROR:
1156        shiftop = &Assembler::ror;
1157        break;
1158      default:
1159        VIXL_UNREACHABLE();
1160    }
1161    if (shiftop != NULL) {
1162      UseScratchRegisterScope temps(this);
1163      Register scratch = temps.Acquire();
1164      CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
1165      (this->*shiftop)(cond,
1166                       scratch,
1167                       operand.GetBaseRegister(),
1168                       operand.GetShiftRegister());
1169      (this->*instruction)(cond, size, rn, scratch);
1170      return;
1171    }
1172  }
1173  if (operand.IsImmediate()) {
1174    uint32_t imm = operand.GetImmediate();
1175    switch (type) {
1176      case kMov:
1177      case kMovs:
1178        if (!rn.IsPC()) {
1179          // Immediate is too large, but not using PC, so handle with mov{t}.
1180          HandleOutOfBoundsImmediate(cond, rn, imm);
1181          if (type == kMovs) {
1182            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1183            tst(cond, rn, rn);
1184          }
1185          return;
1186        } else if (type == kMov) {
1187          VIXL_ASSERT(IsUsingA32() || cond.Is(al));
1188          // Immediate is too large and using PC, so handle using a temporary
1189          // register.
1190          UseScratchRegisterScope temps(this);
1191          Register scratch = temps.Acquire();
1192          HandleOutOfBoundsImmediate(al, scratch, imm);
1193          EnsureEmitFor(kMaxInstructionSizeInBytes);
1194          bx(cond, scratch);
1195          return;
1196        }
1197        break;
1198      case kCmn:
1199      case kCmp:
1200        if (IsUsingA32() || !rn.IsPC()) {
1201          UseScratchRegisterScope temps(this);
1202          Register scratch = temps.Acquire();
1203          HandleOutOfBoundsImmediate(cond, scratch, imm);
1204          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1205          (this->*instruction)(cond, size, rn, scratch);
1206          return;
1207        }
1208        break;
1209      case kMvn:
1210      case kMvns:
1211        if (!rn.IsPC()) {
1212          UseScratchRegisterScope temps(this);
1213          Register scratch = temps.Acquire();
1214          HandleOutOfBoundsImmediate(cond, scratch, imm);
1215          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1216          (this->*instruction)(cond, size, rn, scratch);
1217          return;
1218        }
1219        break;
1220      case kTst:
1221        if (IsUsingA32() || !rn.IsPC()) {
1222          UseScratchRegisterScope temps(this);
1223          Register scratch = temps.Acquire();
1224          HandleOutOfBoundsImmediate(cond, scratch, imm);
1225          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1226          (this->*instruction)(cond, size, rn, scratch);
1227          return;
1228        }
1229        break;
1230      default:  // kSxtb, Sxth, Uxtb, Uxth
1231        break;
1232    }
1233  }
1234  Assembler::Delegate(type, instruction, cond, size, rn, operand);
1235}
1236
1237
1238void MacroAssembler::Delegate(InstructionType type,
1239                              InstructionCondRROp instruction,
1240                              Condition cond,
1241                              Register rd,
1242                              Register rn,
1243                              const Operand& operand) {
1244  if ((type == kSxtab) || (type == kSxtab16) || (type == kSxtah) ||
1245      (type == kUxtab) || (type == kUxtab16) || (type == kUxtah) ||
1246      (type == kPkhbt) || (type == kPkhtb)) {
1247    UnimplementedDelegate(type);
1248    return;
1249  }
1250
1251  // This delegate only handles the following instructions.
1252  VIXL_ASSERT((type == kOrn) || (type == kOrns) || (type == kRsc) ||
1253              (type == kRscs));
1254  CONTEXT_SCOPE;
1255
1256  // T32 does not support register shifted register operands, emulate it.
1257  if (IsUsingT32() && operand.IsRegisterShiftedRegister()) {
1258    InstructionCondRROp shiftop = NULL;
1259    switch (operand.GetShift().GetType()) {
1260      case LSL:
1261        shiftop = &Assembler::lsl;
1262        break;
1263      case LSR:
1264        shiftop = &Assembler::lsr;
1265        break;
1266      case ASR:
1267        shiftop = &Assembler::asr;
1268        break;
1269      case RRX:
1270        // A RegisterShiftedRegister operand cannot have a shift of type RRX.
1271        VIXL_UNREACHABLE();
1272        break;
1273      case ROR:
1274        shiftop = &Assembler::ror;
1275        break;
1276      default:
1277        VIXL_UNREACHABLE();
1278    }
1279    if (shiftop != NULL) {
1280      UseScratchRegisterScope temps(this);
1281      Register rm = operand.GetBaseRegister();
1282      Register rs = operand.GetShiftRegister();
1283      // Try to use rd as a scratch register. We can do this if it aliases rs or
1284      // rm (because we read them in the first instruction), but not rn.
1285      if (!rd.Is(rn)) temps.Include(rd);
1286      Register scratch = temps.Acquire();
1287      // TODO: The scope length was measured empirically. We should analyse the
1288      // worst-case size and add targetted tests.
1289      CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1290      (this->*shiftop)(cond, scratch, rm, rs);
1291      (this->*instruction)(cond, rd, rn, scratch);
1292      return;
1293    }
1294  }
1295
1296  // T32 does not have a Rsc instruction, negate the lhs input and turn it into
1297  // an Adc. Adc and Rsc are equivalent using a bitwise NOT:
1298  //   adc rd, rn, operand <-> rsc rd, NOT(rn), operand
1299  if (IsUsingT32() && ((type == kRsc) || (type == kRscs))) {
1300    // The RegisterShiftRegister case should have been handled above.
1301    VIXL_ASSERT(!operand.IsRegisterShiftedRegister());
1302    UseScratchRegisterScope temps(this);
1303    // Try to use rd as a scratch register. We can do this if it aliases rn
1304    // (because we read it in the first instruction), but not rm.
1305    temps.Include(rd);
1306    temps.Exclude(operand);
1307    Register negated_rn = temps.Acquire();
1308    {
1309      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1310      mvn(cond, negated_rn, rn);
1311    }
1312    if (type == kRsc) {
1313      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1314      adc(cond, rd, negated_rn, operand);
1315      return;
1316    }
1317    // TODO: We shouldn't have to specify how much space the next instruction
1318    // needs.
1319    CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1320    adcs(cond, rd, negated_rn, operand);
1321    return;
1322  }
1323
1324  if (operand.IsImmediate()) {
1325    // If the immediate can be encoded when inverted, turn Orn into Orr.
1326    // Otherwise rely on HandleOutOfBoundsImmediate to generate a series of
1327    // mov.
1328    int32_t imm = operand.GetSignedImmediate();
1329    if (((type == kOrn) || (type == kOrns)) && IsModifiedImmediate(~imm)) {
1330      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1331      switch (type) {
1332        case kOrn:
1333          orr(cond, rd, rn, ~imm);
1334          return;
1335        case kOrns:
1336          orrs(cond, rd, rn, ~imm);
1337          return;
1338        default:
1339          VIXL_UNREACHABLE();
1340          break;
1341      }
1342    }
1343  }
1344
1345  // A32 does not have a Orn instruction, negate the rhs input and turn it into
1346  // a Orr.
1347  if (IsUsingA32() && ((type == kOrn) || (type == kOrns))) {
1348    // TODO: orn r0, r1, imm -> orr r0, r1, neg(imm) if doable
1349    //  mvn r0, r2
1350    //  orr r0, r1, r0
1351    Register scratch;
1352    UseScratchRegisterScope temps(this);
1353    // Try to use rd as a scratch register. We can do this if it aliases rs or
1354    // rm (because we read them in the first instruction), but not rn.
1355    if (!rd.Is(rn)) temps.Include(rd);
1356    scratch = temps.Acquire();
1357    {
1358      // TODO: We shouldn't have to specify how much space the next instruction
1359      // needs.
1360      CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1361      mvn(cond, scratch, operand);
1362    }
1363    if (type == kOrns) {
1364      CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1365      orrs(cond, rd, rn, scratch);
1366      return;
1367    }
1368    CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1369    orr(cond, rd, rn, scratch);
1370    return;
1371  }
1372
1373  if (operand.IsImmediate()) {
1374    UseScratchRegisterScope temps(this);
1375    // Allow using the destination as a scratch register if possible.
1376    if (!rd.Is(rn)) temps.Include(rd);
1377    Register scratch = temps.Acquire();
1378    int32_t imm = operand.GetSignedImmediate();
1379    HandleOutOfBoundsImmediate(cond, scratch, imm);
1380    CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1381    (this->*instruction)(cond, rd, rn, scratch);
1382    return;
1383  }
1384  Assembler::Delegate(type, instruction, cond, rd, rn, operand);
1385}
1386
1387
1388void MacroAssembler::Delegate(InstructionType type,
1389                              InstructionCondSizeRL instruction,
1390                              Condition cond,
1391                              EncodingSize size,
1392                              Register rd,
1393                              Label* label) {
1394  VIXL_ASSERT((type == kLdr) || (type == kAdr));
1395
1396  CONTEXT_SCOPE;
1397  VIXL_ASSERT(size.IsBest());
1398
1399  if ((type == kLdr) && label->IsBound()) {
1400    CodeBufferCheckScope scope(this, 5 * kMaxInstructionSizeInBytes);
1401    UseScratchRegisterScope temps(this);
1402    temps.Include(rd);
1403    uint32_t mask = GetOffsetMask(type, Offset);
1404    ldr(rd, MemOperandComputationHelper(cond, temps.Acquire(), label, mask));
1405    return;
1406  }
1407
1408  Assembler::Delegate(type, instruction, cond, size, rd, label);
1409}
1410
1411
1412bool MacroAssembler::GenerateSplitInstruction(
1413    InstructionCondSizeRROp instruction,
1414    Condition cond,
1415    Register rd,
1416    Register rn,
1417    uint32_t imm,
1418    uint32_t mask) {
1419  uint32_t high = imm & ~mask;
1420  if (!IsModifiedImmediate(high) && !rn.IsPC()) return false;
1421  // If high is a modified immediate, we can perform the operation with
1422  // only 2 instructions.
1423  // Else, if rn is PC, we want to avoid moving PC into a temporary.
1424  // Therefore, we also use the pattern even if the second call may
1425  // generate 3 instructions.
1426  uint32_t low = imm & mask;
1427  CodeBufferCheckScope scope(this,
1428                             (rn.IsPC() ? 4 : 2) * kMaxInstructionSizeInBytes);
1429  (this->*instruction)(cond, Best, rd, rn, low);
1430  (this->*instruction)(cond, Best, rd, rd, high);
1431  return true;
1432}
1433
1434
1435void MacroAssembler::Delegate(InstructionType type,
1436                              InstructionCondSizeRROp instruction,
1437                              Condition cond,
1438                              EncodingSize size,
1439                              Register rd,
1440                              Register rn,
1441                              const Operand& operand) {
1442  VIXL_ASSERT(
1443      (type == kAdc) || (type == kAdcs) || (type == kAdd) || (type == kAdds) ||
1444      (type == kAnd) || (type == kAnds) || (type == kAsr) || (type == kAsrs) ||
1445      (type == kBic) || (type == kBics) || (type == kEor) || (type == kEors) ||
1446      (type == kLsl) || (type == kLsls) || (type == kLsr) || (type == kLsrs) ||
1447      (type == kOrr) || (type == kOrrs) || (type == kRor) || (type == kRors) ||
1448      (type == kRsb) || (type == kRsbs) || (type == kSbc) || (type == kSbcs) ||
1449      (type == kSub) || (type == kSubs));
1450
1451  CONTEXT_SCOPE;
1452  VIXL_ASSERT(size.IsBest());
1453  if (IsUsingT32() && operand.IsRegisterShiftedRegister()) {
1454    InstructionCondRROp shiftop = NULL;
1455    switch (operand.GetShift().GetType()) {
1456      case LSL:
1457        shiftop = &Assembler::lsl;
1458        break;
1459      case LSR:
1460        shiftop = &Assembler::lsr;
1461        break;
1462      case ASR:
1463        shiftop = &Assembler::asr;
1464        break;
1465      case RRX:
1466        // A RegisterShiftedRegister operand cannot have a shift of type RRX.
1467        VIXL_UNREACHABLE();
1468        break;
1469      case ROR:
1470        shiftop = &Assembler::ror;
1471        break;
1472      default:
1473        VIXL_UNREACHABLE();
1474    }
1475    if (shiftop != NULL) {
1476      UseScratchRegisterScope temps(this);
1477      Register rm = operand.GetBaseRegister();
1478      Register rs = operand.GetShiftRegister();
1479      // Try to use rd as a scratch register. We can do this if it aliases rs or
1480      // rm (because we read them in the first instruction), but not rn.
1481      if (!rd.Is(rn)) temps.Include(rd);
1482      Register scratch = temps.Acquire();
1483      CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
1484      (this->*shiftop)(cond, scratch, rm, rs);
1485      (this->*instruction)(cond, size, rd, rn, scratch);
1486      return;
1487    }
1488  }
1489  if (operand.IsImmediate()) {
1490    int32_t imm = operand.GetSignedImmediate();
1491    if (ImmediateT32::IsImmediateT32(~imm)) {
1492      if (IsUsingT32()) {
1493        switch (type) {
1494          case kOrr:
1495            orn(cond, rd, rn, ~imm);
1496            return;
1497          case kOrrs:
1498            orns(cond, rd, rn, ~imm);
1499            return;
1500          default:
1501            break;
1502        }
1503      }
1504    }
1505    if (imm < 0) {
1506      InstructionCondSizeRROp asmcb = NULL;
1507      // Add and sub are equivalent using an arithmetic negation:
1508      //   add rd, rn, #imm <-> sub rd, rn, - #imm
1509      // Add and sub with carry are equivalent using a bitwise NOT:
1510      //   adc rd, rn, #imm <-> sbc rd, rn, NOT #imm
1511      switch (type) {
1512        case kAdd:
1513          asmcb = &Assembler::sub;
1514          imm = -imm;
1515          break;
1516        case kAdds:
1517          asmcb = &Assembler::subs;
1518          imm = -imm;
1519          break;
1520        case kSub:
1521          asmcb = &Assembler::add;
1522          imm = -imm;
1523          break;
1524        case kSubs:
1525          asmcb = &Assembler::adds;
1526          imm = -imm;
1527          break;
1528        case kAdc:
1529          asmcb = &Assembler::sbc;
1530          imm = ~imm;
1531          break;
1532        case kAdcs:
1533          asmcb = &Assembler::sbcs;
1534          imm = ~imm;
1535          break;
1536        case kSbc:
1537          asmcb = &Assembler::adc;
1538          imm = ~imm;
1539          break;
1540        case kSbcs:
1541          asmcb = &Assembler::adcs;
1542          imm = ~imm;
1543          break;
1544        default:
1545          break;
1546      }
1547      if (asmcb != NULL) {
1548        CodeBufferCheckScope scope(this, 4 * kMaxInstructionSizeInBytes);
1549        (this->*asmcb)(cond, size, rd, rn, Operand(imm));
1550        return;
1551      }
1552    }
1553
1554    // When rn is PC, only handle negative offsets. The correct way to handle
1555    // positive offsets isn't clear; does the user want the offset from the
1556    // start of the macro, or from the end (to allow a certain amount of space)?
1557    // When type is Add or Sub, imm is always positive (imm < 0 has just been
1558    // handled and imm == 0 would have been generated without the need of a
1559    // delegate). Therefore, only add to PC is forbidden here.
1560    if ((((type == kAdd) && !rn.IsPC()) || (type == kSub)) &&
1561        (IsUsingA32() || (!rd.IsPC() && !rn.IsPC()))) {
1562      VIXL_ASSERT(imm > 0);
1563      // Try to break the constant into two modified immediates.
1564      // For T32 also try to break the constant into one imm12 and one modified
1565      // immediate. Count the trailing zeroes and get the biggest even value.
1566      int trailing_zeroes = CountTrailingZeros(imm) & ~1u;
1567      uint32_t mask = ((trailing_zeroes < 4) && IsUsingT32())
1568                          ? 0xfff
1569                          : (0xff << trailing_zeroes);
1570      if (GenerateSplitInstruction(instruction, cond, rd, rn, imm, mask)) {
1571        return;
1572      }
1573      InstructionCondSizeRROp asmcb = NULL;
1574      switch (type) {
1575        case kAdd:
1576          asmcb = &Assembler::sub;
1577          break;
1578        case kSub:
1579          asmcb = &Assembler::add;
1580          break;
1581        default:
1582          VIXL_UNREACHABLE();
1583      }
1584      if (GenerateSplitInstruction(asmcb, cond, rd, rn, -imm, mask)) {
1585        return;
1586      }
1587    }
1588
1589    UseScratchRegisterScope temps(this);
1590    // Allow using the destination as a scratch register if possible.
1591    if (!rd.Is(rn)) temps.Include(rd);
1592    if (rn.IsPC()) {
1593      // If we're reading the PC, we need to do it in the first instruction,
1594      // otherwise we'll read the wrong value. We rely on this to handle the
1595      // long-range PC-relative MemOperands which can result from user-managed
1596      // literals.
1597
1598      // Only handle negative offsets. The correct way to handle positive
1599      // offsets isn't clear; does the user want the offset from the start of
1600      // the macro, or from the end (to allow a certain amount of space)?
1601      bool offset_is_negative_or_zero = (imm <= 0);
1602      switch (type) {
1603        case kAdd:
1604        case kAdds:
1605          offset_is_negative_or_zero = (imm <= 0);
1606          break;
1607        case kSub:
1608        case kSubs:
1609          offset_is_negative_or_zero = (imm >= 0);
1610          break;
1611        case kAdc:
1612        case kAdcs:
1613          offset_is_negative_or_zero = (imm < 0);
1614          break;
1615        case kSbc:
1616        case kSbcs:
1617          offset_is_negative_or_zero = (imm > 0);
1618          break;
1619        default:
1620          break;
1621      }
1622      if (offset_is_negative_or_zero) {
1623        {
1624          rn = temps.Acquire();
1625          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1626          mov(cond, rn, pc);
1627        }
1628        // Recurse rather than falling through, to try to get the immediate into
1629        // a single instruction.
1630        CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1631        (this->*instruction)(cond, size, rd, rn, operand);
1632        return;
1633      }
1634    } else {
1635      Register scratch = temps.Acquire();
1636      // TODO: The scope length was measured empirically. We should analyse the
1637      // worst-case size and add targetted tests.
1638      CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1639      mov(cond, scratch, operand.GetImmediate());
1640      (this->*instruction)(cond, size, rd, rn, scratch);
1641      return;
1642    }
1643  }
1644  Assembler::Delegate(type, instruction, cond, size, rd, rn, operand);
1645}
1646
1647
1648void MacroAssembler::Delegate(InstructionType type,
1649                              InstructionRL instruction,
1650                              Register rn,
1651                              Label* label) {
1652  VIXL_ASSERT((type == kCbz) || (type == kCbnz));
1653
1654  CONTEXT_SCOPE;
1655  CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
1656  if (IsUsingA32()) {
1657    if (type == kCbz) {
1658      VIXL_ABORT_WITH_MSG("Cbz is only available for T32.\n");
1659    } else {
1660      VIXL_ABORT_WITH_MSG("Cbnz is only available for T32.\n");
1661    }
1662  } else if (rn.IsLow()) {
1663    switch (type) {
1664      case kCbnz: {
1665        Label done;
1666        cbz(rn, &done);
1667        b(label);
1668        Bind(&done);
1669        return;
1670      }
1671      case kCbz: {
1672        Label done;
1673        cbnz(rn, &done);
1674        b(label);
1675        Bind(&done);
1676        return;
1677      }
1678      default:
1679        break;
1680    }
1681  }
1682  Assembler::Delegate(type, instruction, rn, label);
1683}
1684
1685
1686template <typename T>
1687static inline bool IsI64BitPattern(T imm) {
1688  for (T mask = 0xff << ((sizeof(T) - 1) * 8); mask != 0; mask >>= 8) {
1689    if (((imm & mask) != mask) && ((imm & mask) != 0)) return false;
1690  }
1691  return true;
1692}
1693
1694
1695template <typename T>
1696static inline bool IsI8BitPattern(T imm) {
1697  uint8_t imm8 = imm & 0xff;
1698  for (unsigned rep = sizeof(T) - 1; rep > 0; rep--) {
1699    imm >>= 8;
1700    if ((imm & 0xff) != imm8) return false;
1701  }
1702  return true;
1703}
1704
1705
1706static inline bool CanBeInverted(uint32_t imm32) {
1707  uint32_t fill8 = 0;
1708
1709  if ((imm32 & 0xffffff00) == 0xffffff00) {
1710    //    11111111 11111111 11111111 abcdefgh
1711    return true;
1712  }
1713  if (((imm32 & 0xff) == 0) || ((imm32 & 0xff) == 0xff)) {
1714    fill8 = imm32 & 0xff;
1715    imm32 >>= 8;
1716    if ((imm32 >> 8) == 0xffff) {
1717      //    11111111 11111111 abcdefgh 00000000
1718      // or 11111111 11111111 abcdefgh 11111111
1719      return true;
1720    }
1721    if ((imm32 & 0xff) == fill8) {
1722      imm32 >>= 8;
1723      if ((imm32 >> 8) == 0xff) {
1724        //    11111111 abcdefgh 00000000 00000000
1725        // or 11111111 abcdefgh 11111111 11111111
1726        return true;
1727      }
1728      if ((fill8 == 0xff) && ((imm32 & 0xff) == 0xff)) {
1729        //    abcdefgh 11111111 11111111 11111111
1730        return true;
1731      }
1732    }
1733  }
1734  return false;
1735}
1736
1737
1738template <typename RES, typename T>
1739static inline RES replicate(T imm) {
1740  VIXL_ASSERT((sizeof(RES) > sizeof(T)) &&
1741              (((sizeof(RES) / sizeof(T)) * sizeof(T)) == sizeof(RES)));
1742  RES res = imm;
1743  for (unsigned i = sizeof(RES) / sizeof(T) - 1; i > 0; i--) {
1744    res = (res << (sizeof(T) * 8)) | imm;
1745  }
1746  return res;
1747}
1748
1749
1750void MacroAssembler::Delegate(InstructionType type,
1751                              InstructionCondDtSSop instruction,
1752                              Condition cond,
1753                              DataType dt,
1754                              SRegister rd,
1755                              const SOperand& operand) {
1756  CONTEXT_SCOPE;
1757  if (type == kVmov) {
1758    if (operand.IsImmediate() && dt.Is(F32)) {
1759      const NeonImmediate& neon_imm = operand.GetNeonImmediate();
1760      if (neon_imm.CanConvert<float>()) {
1761        // movw ip, imm16
1762        // movk ip, imm16
1763        // vmov s0, ip
1764        UseScratchRegisterScope temps(this);
1765        Register scratch = temps.Acquire();
1766        float f = neon_imm.GetImmediate<float>();
1767        // TODO: The scope length was measured empirically. We should analyse
1768        // the
1769        // worst-case size and add targetted tests.
1770        CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1771        mov(cond, scratch, FloatToRawbits(f));
1772        vmov(cond, rd, scratch);
1773        return;
1774      }
1775    }
1776  }
1777  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
1778}
1779
1780
1781void MacroAssembler::Delegate(InstructionType type,
1782                              InstructionCondDtDDop instruction,
1783                              Condition cond,
1784                              DataType dt,
1785                              DRegister rd,
1786                              const DOperand& operand) {
1787  CONTEXT_SCOPE;
1788  if (type == kVmov) {
1789    if (operand.IsImmediate()) {
1790      const NeonImmediate& neon_imm = operand.GetNeonImmediate();
1791      switch (dt.GetValue()) {
1792        case I32:
1793          if (neon_imm.CanConvert<uint32_t>()) {
1794            uint32_t imm = neon_imm.GetImmediate<uint32_t>();
1795            // vmov.i32 d0, 0xabababab will translate into vmov.i8 d0, 0xab
1796            if (IsI8BitPattern(imm)) {
1797              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1798              vmov(cond, I8, rd, imm & 0xff);
1799              return;
1800            }
1801            // vmov.i32 d0, 0xff0000ff will translate into
1802            // vmov.i64 d0, 0xff0000ffff0000ff
1803            if (IsI64BitPattern(imm)) {
1804              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1805              vmov(cond, I64, rd, replicate<uint64_t>(imm));
1806              return;
1807            }
1808            // vmov.i32 d0, 0xffab0000 will translate into
1809            // vmvn.i32 d0, 0x0054ffff
1810            if (cond.Is(al) && CanBeInverted(imm)) {
1811              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1812              vmvn(I32, rd, ~imm);
1813              return;
1814            }
1815          }
1816          break;
1817        case I16:
1818          if (neon_imm.CanConvert<uint16_t>()) {
1819            uint16_t imm = neon_imm.GetImmediate<uint16_t>();
1820            // vmov.i16 d0, 0xabab will translate into vmov.i8 d0, 0xab
1821            if (IsI8BitPattern(imm)) {
1822              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1823              vmov(cond, I8, rd, imm & 0xff);
1824              return;
1825            }
1826          }
1827          break;
1828        case I64:
1829          if (neon_imm.CanConvert<uint64_t>()) {
1830            uint64_t imm = neon_imm.GetImmediate<uint64_t>();
1831            // vmov.i64 d0, -1 will translate into vmov.i8 d0, 0xff
1832            if (IsI8BitPattern(imm)) {
1833              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1834              vmov(cond, I8, rd, imm & 0xff);
1835              return;
1836            }
1837            // mov ip, lo(imm64)
1838            // vdup d0, ip
1839            // vdup is prefered to 'vmov d0[0]' as d0[1] does not need to be
1840            // preserved
1841            {
1842              UseScratchRegisterScope temps(this);
1843              Register scratch = temps.Acquire();
1844              {
1845                // TODO: The scope length was measured empirically. We should
1846                // analyse the
1847                // worst-case size and add targetted tests.
1848                CodeBufferCheckScope scope(this,
1849                                           2 * kMaxInstructionSizeInBytes);
1850                mov(cond, scratch, static_cast<uint32_t>(imm & 0xffffffff));
1851              }
1852              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1853              vdup(cond, Untyped32, rd, scratch);
1854            }
1855            // mov ip, hi(imm64)
1856            // vmov d0[1], ip
1857            {
1858              UseScratchRegisterScope temps(this);
1859              Register scratch = temps.Acquire();
1860              {
1861                // TODO: The scope length was measured empirically. We should
1862                // analyse the
1863                // worst-case size and add targetted tests.
1864                CodeBufferCheckScope scope(this,
1865                                           2 * kMaxInstructionSizeInBytes);
1866                mov(cond, scratch, static_cast<uint32_t>(imm >> 32));
1867              }
1868              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1869              vmov(cond, Untyped32, DRegisterLane(rd, 1), scratch);
1870            }
1871            return;
1872          }
1873          break;
1874        default:
1875          break;
1876      }
1877      VIXL_ASSERT(!dt.Is(I8));  // I8 cases should have been handled already.
1878      if ((dt.Is(I16) || dt.Is(I32)) && neon_imm.CanConvert<uint32_t>()) {
1879        // mov ip, imm32
1880        // vdup.16 d0, ip
1881        UseScratchRegisterScope temps(this);
1882        Register scratch = temps.Acquire();
1883        {
1884          CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
1885          mov(cond, scratch, neon_imm.GetImmediate<uint32_t>());
1886        }
1887        DataTypeValue vdup_dt = Untyped32;
1888        switch (dt.GetValue()) {
1889          case I16:
1890            vdup_dt = Untyped16;
1891            break;
1892          case I32:
1893            vdup_dt = Untyped32;
1894            break;
1895          default:
1896            VIXL_UNREACHABLE();
1897        }
1898        CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1899        vdup(cond, vdup_dt, rd, scratch);
1900        return;
1901      }
1902      if (dt.Is(F32) && neon_imm.CanConvert<float>()) {
1903        float f = neon_imm.GetImmediate<float>();
1904        // Punt to vmov.i32
1905        // TODO: The scope length was guessed based on the double case below. We
1906        // should analyse the worst-case size and add targetted tests.
1907        CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
1908        vmov(cond, I32, rd, FloatToRawbits(f));
1909        return;
1910      }
1911      if (dt.Is(F64) && neon_imm.CanConvert<double>()) {
1912        // Punt to vmov.i64
1913        double d = neon_imm.GetImmediate<double>();
1914        // TODO: The scope length was measured empirically. We should analyse
1915        // the
1916        // worst-case size and add targetted tests.
1917        CodeBufferCheckScope scope(this, 6 * kMaxInstructionSizeInBytes);
1918        vmov(cond, I64, rd, DoubleToRawbits(d));
1919        return;
1920      }
1921    }
1922  }
1923  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
1924}
1925
1926
1927void MacroAssembler::Delegate(InstructionType type,
1928                              InstructionCondDtQQop instruction,
1929                              Condition cond,
1930                              DataType dt,
1931                              QRegister rd,
1932                              const QOperand& operand) {
1933  CONTEXT_SCOPE;
1934  if (type == kVmov) {
1935    if (operand.IsImmediate()) {
1936      const NeonImmediate& neon_imm = operand.GetNeonImmediate();
1937      switch (dt.GetValue()) {
1938        case I32:
1939          if (neon_imm.CanConvert<uint32_t>()) {
1940            uint32_t imm = neon_imm.GetImmediate<uint32_t>();
1941            // vmov.i32 d0, 0xabababab will translate into vmov.i8 d0, 0xab
1942            if (IsI8BitPattern(imm)) {
1943              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1944              vmov(cond, I8, rd, imm & 0xff);
1945              return;
1946            }
1947            // vmov.i32 d0, 0xff0000ff will translate into
1948            // vmov.i64 d0, 0xff0000ffff0000ff
1949            if (IsI64BitPattern(imm)) {
1950              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1951              vmov(cond, I64, rd, replicate<uint64_t>(imm));
1952              return;
1953            }
1954            // vmov.i32 d0, 0xffab0000 will translate into
1955            // vmvn.i32 d0, 0x0054ffff
1956            if (CanBeInverted(imm)) {
1957              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1958              vmvn(cond, I32, rd, ~imm);
1959              return;
1960            }
1961          }
1962          break;
1963        case I16:
1964          if (neon_imm.CanConvert<uint16_t>()) {
1965            uint16_t imm = neon_imm.GetImmediate<uint16_t>();
1966            // vmov.i16 d0, 0xabab will translate into vmov.i8 d0, 0xab
1967            if (IsI8BitPattern(imm)) {
1968              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1969              vmov(cond, I8, rd, imm & 0xff);
1970              return;
1971            }
1972          }
1973          break;
1974        case I64:
1975          if (neon_imm.CanConvert<uint64_t>()) {
1976            uint64_t imm = neon_imm.GetImmediate<uint64_t>();
1977            // vmov.i64 d0, -1 will translate into vmov.i8 d0, 0xff
1978            if (IsI8BitPattern(imm)) {
1979              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1980              vmov(cond, I8, rd, imm & 0xff);
1981              return;
1982            }
1983            // mov ip, lo(imm64)
1984            // vdup q0, ip
1985            // vdup is prefered to 'vmov d0[0]' as d0[1-3] don't need to be
1986            // preserved
1987            {
1988              UseScratchRegisterScope temps(this);
1989              Register scratch = temps.Acquire();
1990              {
1991                CodeBufferCheckScope scope(this,
1992                                           2 * kMaxInstructionSizeInBytes);
1993                mov(cond, scratch, static_cast<uint32_t>(imm & 0xffffffff));
1994              }
1995              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
1996              vdup(cond, Untyped32, rd, scratch);
1997            }
1998            // mov ip, hi(imm64)
1999            // vmov.i32 d0[1], ip
2000            // vmov d1, d0
2001            {
2002              UseScratchRegisterScope temps(this);
2003              Register scratch = temps.Acquire();
2004              {
2005                CodeBufferCheckScope scope(this,
2006                                           2 * kMaxInstructionSizeInBytes);
2007                mov(cond, scratch, static_cast<uint32_t>(imm >> 32));
2008              }
2009              {
2010                CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2011                vmov(cond,
2012                     Untyped32,
2013                     DRegisterLane(rd.GetLowDRegister(), 1),
2014                     scratch);
2015              }
2016              CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2017              vmov(cond, F64, rd.GetHighDRegister(), rd.GetLowDRegister());
2018            }
2019            return;
2020          }
2021          break;
2022        default:
2023          break;
2024      }
2025      VIXL_ASSERT(!dt.Is(I8));  // I8 cases should have been handled already.
2026      if ((dt.Is(I16) || dt.Is(I32)) && neon_imm.CanConvert<uint32_t>()) {
2027        // mov ip, imm32
2028        // vdup.16 d0, ip
2029        UseScratchRegisterScope temps(this);
2030        Register scratch = temps.Acquire();
2031        {
2032          CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
2033          mov(cond, scratch, neon_imm.GetImmediate<uint32_t>());
2034        }
2035        DataTypeValue vdup_dt = Untyped32;
2036        switch (dt.GetValue()) {
2037          case I16:
2038            vdup_dt = Untyped16;
2039            break;
2040          case I32:
2041            vdup_dt = Untyped32;
2042            break;
2043          default:
2044            VIXL_UNREACHABLE();
2045        }
2046        CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2047        vdup(cond, vdup_dt, rd, scratch);
2048        return;
2049      }
2050      if (dt.Is(F32) && neon_imm.CanConvert<float>()) {
2051        // Punt to vmov.i64
2052        float f = neon_imm.GetImmediate<float>();
2053        CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2054        vmov(cond, I32, rd, FloatToRawbits(f));
2055        return;
2056      }
2057      if (dt.Is(F64) && neon_imm.CanConvert<double>()) {
2058        // Use vmov to create the double in the low D register, then duplicate
2059        // it into the high D register.
2060        double d = neon_imm.GetImmediate<double>();
2061        CodeBufferCheckScope scope(this, 7 * kMaxInstructionSizeInBytes);
2062        vmov(cond, F64, rd.GetLowDRegister(), d);
2063        vmov(cond, F64, rd.GetHighDRegister(), rd.GetLowDRegister());
2064        return;
2065      }
2066    }
2067  }
2068  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
2069}
2070
2071
2072void MacroAssembler::Delegate(InstructionType type,
2073                              InstructionCondRL instruction,
2074                              Condition cond,
2075                              Register rt,
2076                              Label* label) {
2077  VIXL_ASSERT((type == kLdrb) || (type == kLdrh) || (type == kLdrsb) ||
2078              (type == kLdrsh));
2079
2080  CONTEXT_SCOPE;
2081
2082  if (label->IsBound()) {
2083    CodeBufferCheckScope scope(this, 5 * kMaxInstructionSizeInBytes);
2084    UseScratchRegisterScope temps(this);
2085    temps.Include(rt);
2086    Register scratch = temps.Acquire();
2087    uint32_t mask = GetOffsetMask(type, Offset);
2088    switch (type) {
2089      case kLdrb:
2090        ldrb(rt, MemOperandComputationHelper(cond, scratch, label, mask));
2091        return;
2092      case kLdrh:
2093        ldrh(rt, MemOperandComputationHelper(cond, scratch, label, mask));
2094        return;
2095      case kLdrsb:
2096        ldrsb(rt, MemOperandComputationHelper(cond, scratch, label, mask));
2097        return;
2098      case kLdrsh:
2099        ldrsh(rt, MemOperandComputationHelper(cond, scratch, label, mask));
2100        return;
2101      default:
2102        VIXL_UNREACHABLE();
2103    }
2104    return;
2105  }
2106
2107  Assembler::Delegate(type, instruction, cond, rt, label);
2108}
2109
2110
2111void MacroAssembler::Delegate(InstructionType type,
2112                              InstructionCondRRL instruction,
2113                              Condition cond,
2114                              Register rt,
2115                              Register rt2,
2116                              Label* label) {
2117  VIXL_ASSERT(type == kLdrd);
2118
2119  CONTEXT_SCOPE;
2120
2121  if (label->IsBound()) {
2122    CodeBufferCheckScope scope(this, 6 * kMaxInstructionSizeInBytes);
2123    UseScratchRegisterScope temps(this);
2124    temps.Include(rt, rt2);
2125    Register scratch = temps.Acquire();
2126    uint32_t mask = GetOffsetMask(type, Offset);
2127    ldrd(rt, rt2, MemOperandComputationHelper(cond, scratch, label, mask));
2128    return;
2129  }
2130
2131  Assembler::Delegate(type, instruction, cond, rt, rt2, label);
2132}
2133
2134
2135void MacroAssembler::Delegate(InstructionType type,
2136                              InstructionCondSizeRMop instruction,
2137                              Condition cond,
2138                              EncodingSize size,
2139                              Register rd,
2140                              const MemOperand& operand) {
2141  CONTEXT_SCOPE;
2142  VIXL_ASSERT(size.IsBest());
2143  VIXL_ASSERT((type == kLdr) || (type == kLdrb) || (type == kLdrh) ||
2144              (type == kLdrsb) || (type == kLdrsh) || (type == kStr) ||
2145              (type == kStrb) || (type == kStrh));
2146  if (operand.IsImmediate()) {
2147    const Register& rn = operand.GetBaseRegister();
2148    AddrMode addrmode = operand.GetAddrMode();
2149    int32_t offset = operand.GetOffsetImmediate();
2150    uint32_t extra_offset_mask = GetOffsetMask(type, addrmode);
2151    // Try to maximize the offset used by the MemOperand (load_store_offset).
2152    // Add the part which can't be used by the MemOperand (add_offset).
2153    uint32_t load_store_offset = offset & extra_offset_mask;
2154    uint32_t add_offset = offset & ~extra_offset_mask;
2155    if ((add_offset != 0) &&
2156        (IsModifiedImmediate(offset) || IsModifiedImmediate(-offset))) {
2157      load_store_offset = 0;
2158      add_offset = offset;
2159    }
2160    switch (addrmode) {
2161      case PreIndex:
2162        // Avoid the unpredictable case 'str r0, [r0, imm]!'
2163        if (!rn.Is(rd)) {
2164          // Pre-Indexed case:
2165          // ldr r0, [r1, 12345]! will translate into
2166          //   add r1, r1, 12345
2167          //   ldr r0, [r1]
2168          {
2169            CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2170            add(cond, rn, rn, add_offset);
2171          }
2172          {
2173            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2174            (this->*instruction)(cond,
2175                                 size,
2176                                 rd,
2177                                 MemOperand(rn, load_store_offset, PreIndex));
2178          }
2179          return;
2180        }
2181        break;
2182      case Offset: {
2183        UseScratchRegisterScope temps(this);
2184        // Allow using the destination as a scratch register if possible.
2185        if ((type != kStr) && (type != kStrb) && (type != kStrh) &&
2186            !rd.Is(rn)) {
2187          temps.Include(rd);
2188        }
2189        Register scratch = temps.Acquire();
2190        // Offset case:
2191        // ldr r0, [r1, 12345] will translate into
2192        //   add r0, r1, 12345
2193        //   ldr r0, [r0]
2194        {
2195          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2196          add(cond, scratch, rn, add_offset);
2197        }
2198        {
2199          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2200          (this->*instruction)(cond,
2201                               size,
2202                               rd,
2203                               MemOperand(scratch, load_store_offset));
2204        }
2205        return;
2206      }
2207      case PostIndex:
2208        // Avoid the unpredictable case 'ldr r0, [r0], imm'
2209        if (!rn.Is(rd)) {
2210          // Post-indexed case:
2211          // ldr r0. [r1], imm32 will translate into
2212          //   ldr r0, [r1]
2213          //   movw ip. imm32 & 0xffffffff
2214          //   movt ip, imm32 >> 16
2215          //   add r1, r1, ip
2216          {
2217            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2218            (this->*instruction)(cond,
2219                                 size,
2220                                 rd,
2221                                 MemOperand(rn, load_store_offset, PostIndex));
2222          }
2223          {
2224            CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2225            add(cond, rn, rn, add_offset);
2226          }
2227          return;
2228        }
2229        break;
2230    }
2231  } else if (operand.IsPlainRegister()) {
2232    const Register& rn = operand.GetBaseRegister();
2233    AddrMode addrmode = operand.GetAddrMode();
2234    const Register& rm = operand.GetOffsetRegister();
2235    if (rm.IsPC()) {
2236      VIXL_ABORT_WITH_MSG(
2237          "The MacroAssembler does not convert loads and stores with a PC "
2238          "offset register.\n");
2239    }
2240    if (rn.IsPC()) {
2241      if (addrmode == Offset) {
2242        if (IsUsingT32()) {
2243          VIXL_ABORT_WITH_MSG(
2244              "The MacroAssembler does not convert loads and stores with a PC "
2245              "base register for T32.\n");
2246        }
2247      } else {
2248        VIXL_ABORT_WITH_MSG(
2249            "The MacroAssembler does not convert loads and stores with a PC "
2250            "base register in pre-index or post-index mode.\n");
2251      }
2252    }
2253    switch (addrmode) {
2254      case PreIndex:
2255        // Avoid the unpredictable case 'str r0, [r0, imm]!'
2256        if (!rn.Is(rd)) {
2257          // Pre-Indexed case:
2258          // ldr r0, [r1, r2]! will translate into
2259          //   add r1, r1, r2
2260          //   ldr r0, [r1]
2261          {
2262            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2263            if (operand.GetSign().IsPlus()) {
2264              add(cond, rn, rn, rm);
2265            } else {
2266              sub(cond, rn, rn, rm);
2267            }
2268          }
2269          {
2270            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2271            (this->*instruction)(cond, size, rd, MemOperand(rn, Offset));
2272          }
2273          return;
2274        }
2275        break;
2276      case Offset: {
2277        UseScratchRegisterScope temps(this);
2278        // Allow using the destination as a scratch register if this is not a
2279        // store.
2280        // Avoid using PC as a temporary as this has side-effects.
2281        if ((type != kStr) && (type != kStrb) && (type != kStrh) &&
2282            !rd.IsPC()) {
2283          temps.Include(rd);
2284        }
2285        Register scratch = temps.Acquire();
2286        // Offset case:
2287        // ldr r0, [r1, r2] will translate into
2288        //   add r0, r1, r2
2289        //   ldr r0, [r0]
2290        {
2291          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2292          if (operand.GetSign().IsPlus()) {
2293            add(cond, scratch, rn, rm);
2294          } else {
2295            sub(cond, scratch, rn, rm);
2296          }
2297        }
2298        {
2299          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2300          (this->*instruction)(cond, size, rd, MemOperand(scratch, Offset));
2301        }
2302        return;
2303      }
2304      case PostIndex:
2305        // Avoid the unpredictable case 'ldr r0, [r0], imm'
2306        if (!rn.Is(rd)) {
2307          // Post-indexed case:
2308          // ldr r0. [r1], r2 will translate into
2309          //   ldr r0, [r1]
2310          //   add r1, r1, r2
2311          {
2312            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2313            (this->*instruction)(cond, size, rd, MemOperand(rn, Offset));
2314          }
2315          {
2316            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2317            if (operand.GetSign().IsPlus()) {
2318              add(cond, rn, rn, rm);
2319            } else {
2320              sub(cond, rn, rn, rm);
2321            }
2322          }
2323          return;
2324        }
2325        break;
2326    }
2327  }
2328  Assembler::Delegate(type, instruction, cond, size, rd, operand);
2329}
2330
2331
2332void MacroAssembler::Delegate(InstructionType type,
2333                              InstructionCondRRMop instruction,
2334                              Condition cond,
2335                              Register rt,
2336                              Register rt2,
2337                              const MemOperand& operand) {
2338  if ((type == kLdaexd) || (type == kLdrexd) || (type == kStlex) ||
2339      (type == kStlexb) || (type == kStlexh) || (type == kStrex) ||
2340      (type == kStrexb) || (type == kStrexh)) {
2341    UnimplementedDelegate(type);
2342    return;
2343  }
2344
2345  VIXL_ASSERT((type == kLdrd) || (type == kStrd));
2346
2347  CONTEXT_SCOPE;
2348
2349  // TODO: Should we allow these cases?
2350  if (IsUsingA32()) {
2351    // The first register needs to be even.
2352    if ((rt.GetCode() & 1) != 0) {
2353      UnimplementedDelegate(type);
2354      return;
2355    }
2356    // Registers need to be adjacent.
2357    if (((rt.GetCode() + 1) % kNumberOfRegisters) != rt2.GetCode()) {
2358      UnimplementedDelegate(type);
2359      return;
2360    }
2361    // LDRD lr, pc [...] is not allowed.
2362    if (rt.Is(lr)) {
2363      UnimplementedDelegate(type);
2364      return;
2365    }
2366  }
2367
2368  if (operand.IsImmediate()) {
2369    const Register& rn = operand.GetBaseRegister();
2370    AddrMode addrmode = operand.GetAddrMode();
2371    int32_t offset = operand.GetOffsetImmediate();
2372    uint32_t extra_offset_mask = GetOffsetMask(type, addrmode);
2373    // Try to maximize the offset used by the MemOperand (load_store_offset).
2374    // Add the part which can't be used by the MemOperand (add_offset).
2375    uint32_t load_store_offset = offset & extra_offset_mask;
2376    uint32_t add_offset = offset & ~extra_offset_mask;
2377    if ((add_offset != 0) &&
2378        (IsModifiedImmediate(offset) || IsModifiedImmediate(-offset))) {
2379      load_store_offset = 0;
2380      add_offset = offset;
2381    }
2382    switch (addrmode) {
2383      case PreIndex: {
2384        // Allow using the destinations as a scratch registers if possible.
2385        UseScratchRegisterScope temps(this);
2386        if (type == kLdrd) {
2387          if (!rt.Is(rn)) temps.Include(rt);
2388          if (!rt2.Is(rn)) temps.Include(rt2);
2389        }
2390
2391        // Pre-Indexed case:
2392        // ldrd r0, r1, [r2, 12345]! will translate into
2393        //   add r2, 12345
2394        //   ldrd r0, r1, [r2]
2395        {
2396          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2397          add(cond, rn, rn, add_offset);
2398        }
2399        {
2400          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2401          (this->*instruction)(cond,
2402                               rt,
2403                               rt2,
2404                               MemOperand(rn, load_store_offset, PreIndex));
2405        }
2406        return;
2407      }
2408      case Offset: {
2409        UseScratchRegisterScope temps(this);
2410        // Allow using the destinations as a scratch registers if possible.
2411        if (type == kLdrd) {
2412          if (!rt.Is(rn)) temps.Include(rt);
2413          if (!rt2.Is(rn)) temps.Include(rt2);
2414        }
2415        Register scratch = temps.Acquire();
2416        // Offset case:
2417        // ldrd r0, r1, [r2, 12345] will translate into
2418        //   add r0, r2, 12345
2419        //   ldrd r0, r1, [r0]
2420        {
2421          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2422          add(cond, scratch, rn, add_offset);
2423        }
2424        {
2425          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2426          (this->*instruction)(cond,
2427                               rt,
2428                               rt2,
2429                               MemOperand(scratch, load_store_offset));
2430        }
2431        return;
2432      }
2433      case PostIndex:
2434        // Avoid the unpredictable case 'ldrd r0, r1, [r0], imm'
2435        if (!rn.Is(rt) && !rn.Is(rt2)) {
2436          // Post-indexed case:
2437          // ldrd r0, r1, [r2], imm32 will translate into
2438          //   ldrd r0, r1, [r2]
2439          //   movw ip. imm32 & 0xffffffff
2440          //   movt ip, imm32 >> 16
2441          //   add r2, ip
2442          {
2443            CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2444            (this->*instruction)(cond,
2445                                 rt,
2446                                 rt2,
2447                                 MemOperand(rn, load_store_offset, PostIndex));
2448          }
2449          {
2450            CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2451            add(cond, rn, rn, add_offset);
2452          }
2453          return;
2454        }
2455        break;
2456    }
2457  }
2458  if (operand.IsPlainRegister()) {
2459    const Register& rn = operand.GetBaseRegister();
2460    const Register& rm = operand.GetOffsetRegister();
2461    AddrMode addrmode = operand.GetAddrMode();
2462    switch (addrmode) {
2463      case PreIndex:
2464        // ldrd r0, r1, [r2, r3]! will translate into
2465        //   add r2, r3
2466        //   ldrd r0, r1, [r2]
2467        {
2468          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2469          if (operand.GetSign().IsPlus()) {
2470            add(cond, rn, rn, rm);
2471          } else {
2472            sub(cond, rn, rn, rm);
2473          }
2474        }
2475        {
2476          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2477          (this->*instruction)(cond, rt, rt2, MemOperand(rn, Offset));
2478        }
2479        return;
2480      case PostIndex:
2481        // ldrd r0, r1, [r2], r3 will translate into
2482        //   ldrd r0, r1, [r2]
2483        //   add r2, r3
2484        {
2485          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2486          (this->*instruction)(cond, rt, rt2, MemOperand(rn, Offset));
2487        }
2488        {
2489          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2490          if (operand.GetSign().IsPlus()) {
2491            add(cond, rn, rn, rm);
2492          } else {
2493            sub(cond, rn, rn, rm);
2494          }
2495        }
2496        return;
2497      case Offset: {
2498        UseScratchRegisterScope temps(this);
2499        // Allow using the destinations as a scratch registers if possible.
2500        if (type == kLdrd) {
2501          if (!rt.Is(rn)) temps.Include(rt);
2502          if (!rt2.Is(rn)) temps.Include(rt2);
2503        }
2504        Register scratch = temps.Acquire();
2505        // Offset case:
2506        // ldrd r0, r1, [r2, r3] will translate into
2507        //   add r0, r2, r3
2508        //   ldrd r0, r1, [r0]
2509        {
2510          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2511          if (operand.GetSign().IsPlus()) {
2512            add(cond, scratch, rn, rm);
2513          } else {
2514            sub(cond, scratch, rn, rm);
2515          }
2516        }
2517        {
2518          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2519          (this->*instruction)(cond, rt, rt2, MemOperand(scratch, Offset));
2520        }
2521        return;
2522      }
2523    }
2524  }
2525  Assembler::Delegate(type, instruction, cond, rt, rt2, operand);
2526}
2527
2528
2529void MacroAssembler::Delegate(InstructionType type,
2530                              InstructionCondDtSMop instruction,
2531                              Condition cond,
2532                              DataType dt,
2533                              SRegister rd,
2534                              const MemOperand& operand) {
2535  CONTEXT_SCOPE;
2536  if (operand.IsImmediate()) {
2537    const Register& rn = operand.GetBaseRegister();
2538    AddrMode addrmode = operand.GetAddrMode();
2539    int32_t offset = operand.GetOffsetImmediate();
2540    VIXL_ASSERT(((offset > 0) && operand.GetSign().IsPlus()) ||
2541                ((offset < 0) && operand.GetSign().IsMinus()) || (offset == 0));
2542    if (rn.IsPC()) {
2543      VIXL_ABORT_WITH_MSG(
2544          "The MacroAssembler does not convert vldr or vstr with a PC base "
2545          "register.\n");
2546    }
2547    switch (addrmode) {
2548      case PreIndex:
2549        // Pre-Indexed case:
2550        // vldr.32 s0, [r1, 12345]! will translate into
2551        //   add r1, 12345
2552        //   vldr.32 s0, [r1]
2553        if (offset != 0) {
2554          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2555          add(cond, rn, rn, offset);
2556        }
2557        {
2558          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2559          (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
2560        }
2561        return;
2562      case Offset: {
2563        UseScratchRegisterScope temps(this);
2564        Register scratch = temps.Acquire();
2565        // Offset case:
2566        // vldr.32 s0, [r1, 12345] will translate into
2567        //   add ip, r1, 12345
2568        //   vldr.32 s0, [ip]
2569        {
2570          VIXL_ASSERT(offset != 0);
2571          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2572          add(cond, scratch, rn, offset);
2573        }
2574        {
2575          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2576          (this->*instruction)(cond, dt, rd, MemOperand(scratch, Offset));
2577        }
2578        return;
2579      }
2580      case PostIndex:
2581        // Post-indexed case:
2582        // vldr.32 s0, [r1], imm32 will translate into
2583        //   vldr.32 s0, [r1]
2584        //   movw ip. imm32 & 0xffffffff
2585        //   movt ip, imm32 >> 16
2586        //   add r1, ip
2587        {
2588          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2589          (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
2590        }
2591        if (offset != 0) {
2592          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2593          add(cond, rn, rn, offset);
2594        }
2595        return;
2596    }
2597  }
2598  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
2599}
2600
2601
2602void MacroAssembler::Delegate(InstructionType type,
2603                              InstructionCondDtDMop instruction,
2604                              Condition cond,
2605                              DataType dt,
2606                              DRegister rd,
2607                              const MemOperand& operand) {
2608  CONTEXT_SCOPE;
2609  if (operand.IsImmediate()) {
2610    const Register& rn = operand.GetBaseRegister();
2611    AddrMode addrmode = operand.GetAddrMode();
2612    int32_t offset = operand.GetOffsetImmediate();
2613    VIXL_ASSERT(((offset > 0) && operand.GetSign().IsPlus()) ||
2614                ((offset < 0) && operand.GetSign().IsMinus()) || (offset == 0));
2615    if (rn.IsPC()) {
2616      VIXL_ABORT_WITH_MSG(
2617          "The MacroAssembler does not convert vldr or vstr with a PC base "
2618          "register.\n");
2619    }
2620    switch (addrmode) {
2621      case PreIndex:
2622        // Pre-Indexed case:
2623        // vldr.64 d0, [r1, 12345]! will translate into
2624        //   add r1, 12345
2625        //   vldr.64 d0, [r1]
2626        if (offset != 0) {
2627          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2628          add(cond, rn, rn, offset);
2629        }
2630        {
2631          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2632          (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
2633        }
2634        return;
2635      case Offset: {
2636        UseScratchRegisterScope temps(this);
2637        Register scratch = temps.Acquire();
2638        // Offset case:
2639        // vldr.64 d0, [r1, 12345] will translate into
2640        //   add ip, r1, 12345
2641        //   vldr.32 s0, [ip]
2642        {
2643          VIXL_ASSERT(offset != 0);
2644          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2645          add(cond, scratch, rn, offset);
2646        }
2647        {
2648          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2649          (this->*instruction)(cond, dt, rd, MemOperand(scratch, Offset));
2650        }
2651        return;
2652      }
2653      case PostIndex:
2654        // Post-indexed case:
2655        // vldr.64 d0. [r1], imm32 will translate into
2656        //   vldr.64 d0, [r1]
2657        //   movw ip. imm32 & 0xffffffff
2658        //   movt ip, imm32 >> 16
2659        //   add r1, ip
2660        {
2661          CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2662          (this->*instruction)(cond, dt, rd, MemOperand(rn, Offset));
2663        }
2664        if (offset != 0) {
2665          CodeBufferCheckScope scope(this, 3 * kMaxInstructionSizeInBytes);
2666          add(cond, rn, rn, offset);
2667        }
2668        return;
2669    }
2670  }
2671  Assembler::Delegate(type, instruction, cond, dt, rd, operand);
2672}
2673
2674
2675void MacroAssembler::Delegate(InstructionType type,
2676                              InstructionCondMsrOp instruction,
2677                              Condition cond,
2678                              MaskedSpecialRegister spec_reg,
2679                              const Operand& operand) {
2680  USE(type);
2681  VIXL_ASSERT(type == kMsr);
2682  if (operand.IsImmediate()) {
2683    UseScratchRegisterScope temps(this);
2684    Register scratch = temps.Acquire();
2685    {
2686      CodeBufferCheckScope scope(this, 2 * kMaxInstructionSizeInBytes);
2687      mov(cond, scratch, operand);
2688    }
2689    CodeBufferCheckScope scope(this, kMaxInstructionSizeInBytes);
2690    msr(cond, spec_reg, scratch);
2691    return;
2692  }
2693  Assembler::Delegate(type, instruction, cond, spec_reg, operand);
2694}
2695
2696
2697void MacroAssembler::Delegate(InstructionType type,
2698                              InstructionCondDtDL instruction,
2699                              Condition cond,
2700                              DataType dt,
2701                              DRegister rd,
2702                              Label* label) {
2703  VIXL_ASSERT(type == kVldr);
2704
2705  CONTEXT_SCOPE;
2706
2707  if (label->IsBound()) {
2708    CodeBufferCheckScope scope(this, 5 * kMaxInstructionSizeInBytes);
2709    UseScratchRegisterScope temps(this);
2710    Register scratch = temps.Acquire();
2711    uint32_t mask = GetOffsetMask(type, Offset);
2712    vldr(dt, rd, MemOperandComputationHelper(cond, scratch, label, mask));
2713    return;
2714  }
2715
2716  Assembler::Delegate(type, instruction, cond, dt, rd, label);
2717}
2718
2719
2720void MacroAssembler::Delegate(InstructionType type,
2721                              InstructionCondDtSL instruction,
2722                              Condition cond,
2723                              DataType dt,
2724                              SRegister rd,
2725                              Label* label) {
2726  VIXL_ASSERT(type == kVldr);
2727
2728  CONTEXT_SCOPE;
2729
2730  if (label->IsBound()) {
2731    CodeBufferCheckScope scope(this, 5 * kMaxInstructionSizeInBytes);
2732    UseScratchRegisterScope temps(this);
2733    Register scratch = temps.Acquire();
2734    uint32_t mask = GetOffsetMask(type, Offset);
2735    vldr(dt, rd, MemOperandComputationHelper(cond, scratch, label, mask));
2736    return;
2737  }
2738
2739  Assembler::Delegate(type, instruction, cond, dt, rd, label);
2740}
2741
2742
2743#undef CONTEXT_SCOPE
2744#undef TOSTRING
2745#undef STRINGIFY
2746
2747// Start of generated code.
2748// End of generated code.
2749}  // namespace aarch32
2750}  // namespace vixl
2751